3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-29 11:58:51 +00:00

Add OP_RE_XOR and union-find bisimulation for ground regex equivalence (#9804)

Implements the algorithm of Eq(p,q) = Empty(p XOR q)' using a union-find
driven bisimulation closure (per the CAV'26 ERE paper).

### What's added

* **New primitive OP_RE_XOR (re.xor)** wired through seq_decl_plugin:
parser signature, info propagation (nullable, min_length), and
pretty-printer.
* **seq_rewriter**: structural XOR rewrites ( XOR r = empty, XOR empty =
r, ull XOR r = comp(r), comp/comp absorption, complement push, AC
normalisation), nullability (Null(p XOR q) = Null(p) != Null(q)),
derivative (D_a(p XOR q) = D_a(p) XOR D_a(q)), reverse, antimirov
derivative, and `check_deriv_normal_form` coverage.
* **New class seq::regex_bisim** in
`src/ast/rewriter/seq_regex_bisim.{h,cpp}` to keep the bisim logic out
of the already-large `seq_rewriter.cpp`. Uses `basic_union_find` from
`util/union_find.h`, an `obj_map` for the node assignment, and a
50000-step bound (returns `l_undef` on overrun).
* **Integration** in `seq_rewriter::reduce_re_eq` (with a re-entry
guard) and in `seq_regex::propagate_eq` / `propagate_ne` for ground
regexes; on `l_undef` we fall back to the existing axiomatisation.
* **`sls_seq_plugin`**: extend `OP_RE_DIFF` switch arms to also cover
`OP_RE_XOR`.

### Validation

* Full release build with MSVC + Ninja.
* `./test-z3 /a` -- 89/89 tests passing.
* `./test-z3 /seq smt2print_parse` -- PASS.
* Smoke tests with `(a|b)*` vs `(a*b*)*` (equal) and `a*` vs `(a|b)*`
(not equal) return the expected `sat`/`unsat` quickly.

---------

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Margus Veanes 2026-06-10 14:58:20 -07:00 committed by GitHub
parent 589bd9e6f5
commit 513b81253b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 664 additions and 20 deletions

View file

@ -230,6 +230,7 @@ void seq_decl_plugin::init() {
m_sigs[OP_RE_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA);
m_sigs[OP_RE_INTERSECT] = alloc(psig, m, "re.inter", 1, 2, reAreA, reA);
m_sigs[OP_RE_DIFF] = alloc(psig, m, "re.diff", 1, 2, reAreA, reA);
m_sigs[OP_RE_XOR] = alloc(psig, m, "re.xor", 1, 2, reAreA, reA);
m_sigs[OP_RE_LOOP] = alloc(psig, m, "re.loop", 1, 1, &reA, reA);
m_sigs[OP_RE_POWER] = alloc(psig, m, "re.^", 1, 1, &reA, reA);
m_sigs[OP_RE_COMPLEMENT] = alloc(psig, m, "re.comp", 1, 1, &reA, reA);
@ -507,6 +508,7 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p
case OP_RE_CONCAT:
case OP_RE_INTERSECT:
case OP_RE_DIFF:
case OP_RE_XOR:
m_has_re = true;
return mk_left_assoc_fun(k, arity, domain, range, k, k);
@ -1513,6 +1515,13 @@ std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const {
print(out, r2);
out << ")";
}
else if (re.is_xor(e, r1, r2)) {
out << "(";
print(out, r1);
out << ")XOR(";
print(out, r2);
out << ")";
}
else if (re.m.is_ite(e, s, r1, r2)) {
out << (html_encode ? "(&#x1D422;&#x1D41F; " : "(if ");
print(out, s);
@ -1704,6 +1713,10 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return i1.diff(i2);
case OP_RE_XOR:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return i1.xor_(i2);
}
return unknown_info;
}
@ -1829,6 +1842,25 @@ seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) co
return *this;
}
seq_util::rex::info seq_util::rex::info::xor_(seq_util::rex::info const& rhs) const {
if (is_known()) {
if (rhs.is_known()) {
// Null(p XOR q) = Null(p) XOR Null(q)
lbool xor_nullable = l_undef;
if (nullable != l_undef && rhs.nullable != l_undef)
xor_nullable = (nullable == rhs.nullable) ? l_false : l_true;
return info(interpreted & rhs.interpreted,
xor_nullable,
0,
false);
}
else
return rhs;
}
else
return *this;
}
seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) const {
if (is_known()) {
if (i.is_known()) {