mirror of
https://github.com/Z3Prover/z3
synced 2026-06-29 11:58:51 +00:00
Smart constructors for regex ranges: canonical form at construction time (#9814)
Regex range expressions (`re.range`) and Boolean operations over them were left in unsimplified form, defeating downstream optimisations (bisimulation classical fast-path, derivative engine) and producing semantically-empty terms not syntactically equal to `re.none`. ## Changes ### `seq_decl_plugin.h` / `seq_decl_plugin.cpp` - **`seq_util::rex::mk_range(sort*, unsigned lo, unsigned hi)`** — new smart constructor that normalises at call time: - `lo > hi` → `re.empty` - `lo == hi` → `str.to_re` (singleton string) - `lo < hi` → `re.range` - **`mk_info_rec` `OP_RE_RANGE`** — concrete non-empty ranges (both bounds are single-char literals with `lo ≤ hi`) now return `classical = true`, enabling the XOR-bisimulation `classical_distinguishing` fast-path on character-predicate leaves. Symbolic/unknown ranges retain `classical = false`. ### `seq_rewriter.cpp` - **`mk_re_range`** — singleton collapse: `(re.range "a" "a")` → `(str.to_re "a")` - **`mk_regex_inter_normalize`** — range × range intersection: `[a,b] ∩ [c,d]` → `[max(a,c), min(b,d)]`, or `re.none` (disjoint), or `str.to_re` (boundary singleton); now delegates to `re().mk_range(sort*, lo, hi)` - **`mk_regex_union_normalize`** — range × range union for overlapping/adjacent ranges: `[a,b] ∪ [c,d]` → `[min(a,c), max(b,d)]`; disjoint ranges fall through to existing `merge_regex_sets`; now delegates to `re().mk_range(sort*, lo, hi)` - **`mk_re_complement`** — range complement expands to one or two concrete ranges instead of an opaque `re.comp` node; now delegates to `re().mk_range(sort*, lo, hi)`: - `comp([0, b])` → `[b+1, max]` - `comp([a, max])` → `[0, a-1]` - `comp([a, b])` → `[0, a-1] ∪ [b+1, max]` ``` (simplify (re.range "z" "a")) ; → re.none (simplify (re.range "a" "a")) ; → (str.to_re "a") (simplify (re.inter (re.range "a" "z") (re.range "f" "k"))); → (re.range "f" "k") (simplify (re.union (re.range "a" "f") (re.range "g" "k"))); → (re.range "a" "k") (simplify (re.comp (re.range "b" "y"))) ; → (re.union [0,a] [z,max]) ``` ### Tests New `src/test/seq_rewriter.cpp` with 14 cases covering all the above reductions plus downstream propagation (star/concat/union/inter absorbing empty ranges). --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
parent
897c4475af
commit
8c2a425e4b
6 changed files with 272 additions and 4 deletions
|
|
@ -1208,6 +1208,15 @@ app* seq_util::rex::mk_of_pred(expr* p) {
|
|||
return m.mk_app(m_fid, OP_RE_OF_PRED, 0, nullptr, 1, &p);
|
||||
}
|
||||
|
||||
app* seq_util::rex::mk_range(sort* re_sort, unsigned lo, unsigned hi) {
|
||||
if (lo > hi)
|
||||
return mk_empty(re_sort);
|
||||
app* lo_str = u.str.mk_string(zstring(lo));
|
||||
if (lo == hi)
|
||||
return mk_to_re(lo_str);
|
||||
return mk_range(lo_str, u.str.mk_string(zstring(hi)));
|
||||
}
|
||||
|
||||
bool seq_util::rex::is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const {
|
||||
if (is_loop(n)) {
|
||||
app const* a = to_app(n);
|
||||
|
|
@ -1671,11 +1680,19 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
|
|||
case OP_RE_OPTION:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
return i1.opt();
|
||||
case OP_RE_RANGE:
|
||||
case OP_RE_RANGE: {
|
||||
// A concrete range [lo, hi] with lo <= hi is non-empty and classical.
|
||||
zstring slo, shi;
|
||||
if (u.str.is_string(e->get_arg(0), slo) && slo.length() == 1 &&
|
||||
u.str.is_string(e->get_arg(1), shi) && shi.length() == 1 &&
|
||||
slo[0] <= shi[0])
|
||||
return info(true, l_false, 1, true);
|
||||
// Symbolic or unknown: not classical
|
||||
return info(true, l_false, 1, false);
|
||||
}
|
||||
case OP_RE_FULL_CHAR_SET:
|
||||
case OP_RE_OF_PRED:
|
||||
//TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
|
||||
//TBD: check if the range is unsat
|
||||
return info(true, l_false, 1, false);
|
||||
case OP_RE_CONCAT:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue