mirror of
https://github.com/Z3Prover/z3
synced 2026-07-04 14:26:10 +00:00
Add OP_RE_XOR and union-find bisimulation for ground regex equivalence (#9804)
Implements the algorithm of Eq(p,q) = Empty(p XOR q)' using a union-find
driven bisimulation closure (per the CAV'26 ERE paper).
### What's added
* **New primitive OP_RE_XOR (re.xor)** wired through seq_decl_plugin:
parser signature, info propagation (nullable, min_length), and
pretty-printer.
* **seq_rewriter**: structural XOR rewrites ( XOR r = empty, XOR empty =
r, ull XOR r = comp(r), comp/comp absorption, complement push, AC
normalisation), nullability (Null(p XOR q) = Null(p) != Null(q)),
derivative (D_a(p XOR q) = D_a(p) XOR D_a(q)), reverse, antimirov
derivative, and `check_deriv_normal_form` coverage.
* **New class seq::regex_bisim** in
`src/ast/rewriter/seq_regex_bisim.{h,cpp}` to keep the bisim logic out
of the already-large `seq_rewriter.cpp`. Uses `basic_union_find` from
`util/union_find.h`, an `obj_map` for the node assignment, and a
50000-step bound (returns `l_undef` on overrun).
* **Integration** in `seq_rewriter::reduce_re_eq` (with a re-entry
guard) and in `seq_regex::propagate_eq` / `propagate_ne` for ground
regexes; on `l_undef` we fall back to the existing axiomatisation.
* **`sls_seq_plugin`**: extend `OP_RE_DIFF` switch arms to also cover
`OP_RE_XOR`.
### Validation
* Full release build with MSVC + Ninja.
* `./test-z3 /a` -- 89/89 tests passing.
* `./test-z3 /seq smt2print_parse` -- PASS.
* Smoke tests with `(a|b)*` vs `(a*b*)*` (equal) and `a*` vs `(a|b)*`
(not equal) return the expected `sat`/`unsat` quickly.
---------
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
589bd9e6f5
commit
513b81253b
9 changed files with 664 additions and 20 deletions
|
|
@ -135,7 +135,8 @@ class seq_rewriter {
|
|||
// re2automaton m_re2aut;
|
||||
op_cache m_op_cache;
|
||||
expr_ref_vector m_es, m_lhs, m_rhs;
|
||||
bool m_coalesce_chars;
|
||||
bool m_coalesce_chars;
|
||||
bool m_in_bisim { false };
|
||||
|
||||
enum length_comparison {
|
||||
shorter_c,
|
||||
|
|
@ -180,6 +181,7 @@ class seq_rewriter {
|
|||
expr_ref mk_der_concat(expr* a, expr* b);
|
||||
expr_ref mk_der_union(expr* a, expr* b);
|
||||
expr_ref mk_der_inter(expr* a, expr* b);
|
||||
expr_ref mk_der_xor(expr* a, expr* b);
|
||||
expr_ref mk_der_compl(expr* a);
|
||||
expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort);
|
||||
expr_ref mk_der_antimirov_union(expr* r1, expr* r2);
|
||||
|
|
@ -262,6 +264,8 @@ class seq_rewriter {
|
|||
br_status mk_re_complement(expr* a, expr_ref& result);
|
||||
br_status mk_re_star(expr* a, expr_ref& result);
|
||||
br_status mk_re_diff(expr* a, expr* b, expr_ref& result);
|
||||
br_status mk_re_xor(expr* a, expr* b, expr_ref& result);
|
||||
br_status mk_re_xor0(expr* a, expr* b, expr_ref& result);
|
||||
br_status mk_re_plus(expr* a, expr_ref& result);
|
||||
br_status mk_re_opt(expr* a, expr_ref& result);
|
||||
br_status mk_re_power(func_decl* f, expr* a, expr_ref& result);
|
||||
|
|
@ -381,6 +385,18 @@ public:
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Construct r1 XOR r2 applying the structural rewrites in
|
||||
* mk_re_xor (r XOR r = empty, comp/empty/full normalisation, AC
|
||||
* ordering). Used by the bisimulation procedure.
|
||||
*/
|
||||
expr_ref mk_re_xor_simplified(expr* r1, expr* r2) {
|
||||
expr_ref result(m());
|
||||
if (mk_re_xor(r1, r2, result) == BR_FAILED)
|
||||
result = re().mk_xor(r1, r2);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* check if regular expression is of the form all ++ s ++ all ++ t + u ++ all, where, s, t, u are sequences
|
||||
*/
|
||||
|
|
@ -410,6 +426,17 @@ public:
|
|||
*/
|
||||
expr_ref mk_derivative(expr* r);
|
||||
|
||||
/*
|
||||
Classical (non-antimirov) Brzozowski derivative wrt the canonical
|
||||
variable v0 = (:var 0). Unlike `mk_derivative` this entry point keeps
|
||||
the symbolic derivative as a single transition regex (TRegex): boolean
|
||||
operators are pushed into the ITE leaves rather than lifted to the top
|
||||
via _OP_RE_ANTIMIROV_UNION. Used by the regex_bisim equivalence
|
||||
procedure which relies on each leaf of D(p XOR q) being a coherent
|
||||
XOR pair (D_v p) XOR (D_v q).
|
||||
*/
|
||||
expr_ref mk_brz_derivative(expr* r);
|
||||
|
||||
// heuristic elimination of element from condition that comes form a derivative.
|
||||
// special case optimization for conjunctions of equalities, disequalities and ranges.
|
||||
void elim_condition(expr* elem, expr_ref& cond);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue