From 23edf638b73c226d80a91ccd7afb9b8fd8a5ed06 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Sun, 14 Jun 2026 21:57:12 -0700 Subject: [PATCH] Fix off-by-one CHARCLASS validation and collapse full-domain to re.full_char Two bugs were biting any CHARCLASS that started at character 0: 1. The validation loop in mk_func_decl seeded prev_hi at -1, so the non-adjacency check 'lo <= prev_hi + 1' rejected the very first range whenever its low bound was 0. Now the check is guarded by have_prev so it never fires on the initial range. 2. mk_charclass would produce a CHARCLASS node spanning the full char domain instead of collapsing to re.full_char, leaving subsequent intersections and complements with a non-canonical full-domain shape. The validation error now also dumps the offending ranges so mis-construction is much easier to localise in the future. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/seq_decl_plugin.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index b50616439..82939dd1e 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -505,7 +505,8 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p if ((num_parameters % 2) != 0) m.raise_exception("re.charclass expects an even number of int parameters (lo, hi pairs)"); unsigned mx = get_char_plugin().max_char(); - int prev_hi = -1; + bool have_prev = false; + int prev_hi = 0; for (unsigned i = 0; i + 1 < num_parameters; i += 2) { if (!parameters[i].is_int() || !parameters[i + 1].is_int()) m.raise_exception("re.charclass parameters must be integers"); @@ -513,9 +514,16 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p int hi = parameters[i + 1].get_int(); if (lo < 0 || hi < 0 || lo > hi || static_cast(hi) > mx) m.raise_exception("re.charclass range out of character domain"); - if (lo <= prev_hi + 1) - m.raise_exception("re.charclass ranges must be sorted, disjoint, and non-adjacent"); + if (have_prev && lo <= prev_hi + 1) { + std::stringstream ss; + ss << "re.charclass ranges must be sorted, disjoint, and non-adjacent (got"; + for (unsigned k = 0; k + 1 < num_parameters; k += 2) + ss << " [" << parameters[k].get_int() << "," << parameters[k + 1].get_int() << "]"; + ss << ")"; + m.raise_exception(ss.str().c_str()); + } prev_hi = hi; + have_prev = true; } if (!range) range = mk_reglan(); return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, @@ -1169,6 +1177,9 @@ app* seq_util::rex::mk_charclass(seq::range_predicate const& p, sort* seq_sort) if (p.is_empty()) return mk_empty(re_sort); unsigned n = p.num_ranges(); + unsigned mx = u.max_char(); + if (n == 1 && p[0].first == 0 && p[0].second == mx) + return mk_full_char(re_sort); vector params; for (unsigned i = 0; i < n; ++i) { auto r = p[i];