Add OP_RE_XOR and union-find bisimulation for ground regex equivalence (#9804)

Implements the algorithm of Eq(p,q) = Empty(p XOR q)' using a union-find driven bisimulation closure (per the CAV'26 ERE paper). ### What's added * **New primitive OP_RE_XOR (re.xor)** wired through seq_decl_plugin: parser signature, info propagation (nullable, min_length), and pretty-printer. * **seq_rewriter**: structural XOR rewrites ( XOR r = empty, XOR empty = r, ull XOR r = comp(r), comp/comp absorption, complement push, AC normalisation), nullability (Null(p XOR q) = Null(p) != Null(q)), derivative (D_a(p XOR q) = D_a(p) XOR D_a(q)), reverse, antimirov derivative, and `check_deriv_normal_form` coverage. * **New class seq::regex_bisim** in `src/ast/rewriter/seq_regex_bisim.{h,cpp}` to keep the bisim logic out of the already-large `seq_rewriter.cpp`. Uses `basic_union_find` from `util/union_find.h`, an `obj_map` for the node assignment, and a 50000-step bound (returns `l_undef` on overrun). * **Integration** in `seq_rewriter::reduce_re_eq` (with a re-entry guard) and in `seq_regex::propagate_eq` / `propagate_ne` for ground regexes; on `l_undef` we fall back to the existing axiomatisation. * **`sls_seq_plugin`**: extend `OP_RE_DIFF` switch arms to also cover `OP_RE_XOR`. ### Validation * Full release build with MSVC + Ninja. * `./test-z3 /a` -- 89/89 tests passing. * `./test-z3 /seq smt2print_parse` -- PASS. * Smoke tests with `(a|b)*` vs `(a*b*)*` (equal) and `a*` vs `(a|b)*` (not equal) return the expected `sat`/`unsat` quickly. --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-07-23 23:42:33 +00:00 · 2026-06-10 14:58:20 -07:00 · 2026-06-10 14:58:20 -07:00 · 513b81253b
commit 513b81253b
parent 589bd9e6f5
9 changed files with 664 additions and 20 deletions
--- a/src/ast/rewriter/CMakeLists.txt
+++ b/src/ast/rewriter/CMakeLists.txt
@ -41,6 +41,7 @@ z3_add_component(rewriter
    seq_eq_solver.cpp
    seq_subset.cpp
    seq_rewriter.cpp
+    seq_regex_bisim.cpp
    seq_skolem.cpp
    th_rewriter.cpp
    value_sweep.cpp
--- a/src/ast/rewriter/seq_regex_bisim.cpp
+++ b/src/ast/rewriter/seq_regex_bisim.cpp
@ -0,0 +1,267 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_regex_bisim.cpp
+
+Abstract:
+
+    See seq_regex_bisim.h.
+
+Author:
+
+    Nikolaj Bjorner (nbjorner)
+    Margus Veanes   (veanes)
+
+--*/
+
+#include "ast/rewriter/seq_regex_bisim.h"
+#include "ast/rewriter/seq_rewriter.h"
+#include "ast/ast_pp.h"
+#include "ast/ast_util.h"
+#include "ast/for_each_expr.h"
+
+namespace seq {
+
+    regex_bisim::regex_bisim(seq_rewriter& rw):
+        m(rw.m()),
+        m_rw(rw),
+        m_util(rw.u()),
+        m_pinned(m),
+        m_worklist(m) {
+    }
+
+    void regex_bisim::reset() {
+        m_uf.reset();
+        m_node_of.reset();
+        m_pinned.reset();
+        m_worklist.reset();
+        m_steps = 0;
+    }
+
+    /*
+       Map an expression to a union-find node, allocating a fresh node on
+       first encounter.
+    */
+    unsigned regex_bisim::node_of(expr* r) {
+        unsigned id = 0;
+        if (m_node_of.find(r, id))
+            return id;
+        id = m_uf.mk_var();
+        m_node_of.insert(r, id);
+        m_pinned.push_back(r);
+        return id;
+    }
+
+    /*
+       Compute a definite nullability answer for r.
+       If the seq_rewriter is unable to produce a literal true/false (for
+       example because r contains an uninterpreted symbol), return l_undef.
+    */
+    lbool regex_bisim::nullability(expr* r) {
+        expr_ref n = m_rw.is_nullable(r);
+        if (m.is_true(n))
+            return l_true;
+        if (m.is_false(n))
+            return l_false;
+        return l_undef;
+    }
+
+    /*
+       Test whether a regex expression is a kind that the bisimulation
+       procedure can reason about. We require it to be a syntactic ground
+       term (no free variables) and that its info reports min_length info
+       (which implies that it parses cleanly as a regex constructor).
+    */
+    bool regex_bisim::is_supported(expr* r) {
+        if (!m_util.is_re(r))
+            return false;
+        if (!m_util.re.get_info(r).is_known())
+            return false;
+        // Reject regexes mentioning free variables; the symbolic
+        // derivative engine introduces (:var 0) only after we call it
+        // ourselves, so any pre-existing variable would be a free var.
+        if (!is_ground(r))
+            return false;
+        return true;
+    }
+
+    /*
+       Collect the leaves of a t-regex der (an ITE / antimirov union /
+       union-tree with regex leaves) into the output vector. Empty
+       (re.empty) leaves are dropped.
+
+       Returns false if we encountered an unexpected node (e.g. a free
+       variable creeping in) — in that case the caller should bail out.
+    */
+    bool regex_bisim::collect_leaves(expr* der, expr_ref_vector& leaves) {
+        ptr_vector<expr> work;
+        obj_hashtable<expr> seen;
+        work.push_back(der);
+        seen.insert(der);
+        while (!work.empty()) {
+            expr* e = work.back();
+            work.pop_back();
+            expr* c = nullptr, * t = nullptr, * f = nullptr;
+            if (m.is_ite(e, c, t, f) ||
+                m_util.re.is_union(e, t, f) ||
+                m_util.re.is_antimirov_union(e, t, f)) {
+                if (seen.insert_if_not_there(t))
+                    work.push_back(t);
+                if (seen.insert_if_not_there(f))
+                    work.push_back(f);
+                continue;
+            }
+            if (m_util.re.is_empty(e))
+                continue;
+            if (!m_util.is_re(e))
+                return false;
+            leaves.push_back(e);
+        }
+        return true;
+    }
+
+    /*
+       Fast inequivalence check based on the get_info().classical flag.
+
+       Invariant: if r is well-formed and get_info(r).classical is true,
+       then L(r) is non-empty. The flag is set for regexes built only
+       from str.to_re, re.all, union, concat, star, plus, opt, loop;
+       it excludes complement, intersection, diff, xor, and the empty
+       regex.
+
+       A bare regex leaf l (i.e. not a XOR pair) represents the implicit
+       pair (empty XOR l). If l is classical, L(l) is non-empty so the
+       pair is non-empty: the original two regexes have a distinguishing
+       prefix and are inequivalent.
+
+       For an XOR leaf xor(a, b): if both a and b are classical and have
+       different min_length, then the shortest word of one is not in the
+       other, so the pair is non-empty and we can short-circuit. (The
+       case a == b syntactically is already handled by mk_re_xor0.)
+
+       Returns true if the leaf proves inequivalence; false if no
+       conclusion can be drawn.
+    */
+    bool regex_bisim::classical_distinguishing(expr* l) {
+        expr* a = nullptr, * b = nullptr;
+        if (m_util.re.is_xor(l, a, b)) {
+            auto ia = m_util.re.get_info(a);
+            auto ib = m_util.re.get_info(b);
+            if (ia.is_known() && ib.is_known() &&
+                ia.classical && ib.classical &&
+                ia.min_length != ib.min_length)
+                return true;
+            return false;
+        }
+        if (m_util.re.is_empty(l))
+            return false;
+        auto info = m_util.re.get_info(l);
+        return info.is_known() && info.classical;
+    }
+
+    /*
+       Merge the two sides of the XOR pair, returning true if a fresh
+       merge happened (i.e. the pair must still be processed) and false
+       if the two sides were already in the same union-find class.
+
+       For non-XOR leaves we treat the leaf l as the pair (empty XOR l).
+    */
+    bool regex_bisim::merge_leaf(expr* leaf) {
+        expr* a = nullptr, * b = nullptr;
+        if (!m_util.re.is_xor(leaf, a, b)) {
+            a = m_util.re.mk_empty(leaf->get_sort());
+            b = leaf;
+            m_pinned.push_back(a);
+        }
+        unsigned ia = node_of(a);
+        unsigned ib = node_of(b);
+        if (m_uf.find(ia) == m_uf.find(ib))
+            return false;
+        m_uf.merge(ia, ib);
+        return true;
+    }
+
+    /*
+       Decide equivalence by bisimulation on D(p XOR q).
+    */
+    lbool regex_bisim::are_equivalent(expr* p, expr* q) {
+        return are_equivalent_core(p, q);
+    }
+
+    lbool regex_bisim::are_equivalent_core(expr* p, expr* q) {
+        if (!is_supported(p) || !is_supported(q))
+            return l_undef;
+        if (p == q)
+            return l_true;
+
+        reset();
+
+        // Build the initial pair r0 = p XOR q applying the structural
+        // XOR rewrites (r XOR r = empty, AC normalisation, etc.).
+        expr_ref r0 = m_rw.mk_re_xor_simplified(p, q);
+
+        // If r0 simplified to empty, the two regexes are equivalent.
+        if (m_util.re.is_empty(r0))
+            return l_true;
+
+        lbool n0 = nullability(r0);
+        if (n0 == l_true)
+            return l_false; // distinguishing empty word
+        if (n0 == l_undef)
+            return l_undef;
+
+        // Classical-leaf shortcut applied to r0 (covers the case where
+        // mk_re_xor_simplified collapsed p XOR q to a bare classical
+        // residual, e.g. when one side reduced to empty).
+        if (classical_distinguishing(r0))
+            return l_false;
+
+        if (!merge_leaf(r0))
+            return l_true; // already merged: trivially equivalent
+        m_worklist.push_back(r0);
+
+        while (!m_worklist.empty()) {
+            if (++m_steps > m_step_bound)
+                return l_undef;
+
+            expr_ref r(m_worklist.back(), m);
+            m_worklist.pop_back();
+
+            // Compute the symbolic derivative wrt the canonical variable
+            // (:var 0). The result is a transition regex (ITE tree) whose
+            // leaves are regex expressions. We use the classical Brzozowski
+            // entry point so the derivative stays as a single TRegex and
+            // does not lift unions to the top via antimirov nodes — this
+            // preserves the XOR-pair invariant the bisimulation relies on.
+            expr_ref d(m_rw.mk_brz_derivative(r), m);
+            expr_ref_vector leaves(m);
+            if (!collect_leaves(d, leaves))
+                return l_undef;
+
+            // First pass: check for any nullable leaf (definitive
+            // distinguishing empty-continuation word) or any classically
+            // non-empty leaf (definitive distinguishing non-empty prefix).
+            for (expr* l : leaves) {
+                lbool nl = nullability(l);
+                if (nl == l_true)
+                    return l_false;
+                if (nl == l_undef)
+                    return l_undef;
+                if (classical_distinguishing(l))
+                    return l_false;
+            }
+
+            // Second pass: merge each leaf into the union-find; new
+            // merges go onto the worklist.
+            for (expr* l : leaves) {
+                if (merge_leaf(l)) {
+                    m_pinned.push_back(l);
+                    m_worklist.push_back(l);
+                }
+            }
+        }
+        return l_true;
+    }
+}
--- a/src/ast/rewriter/seq_regex_bisim.h
+++ b/src/ast/rewriter/seq_regex_bisim.h
@ -0,0 +1,99 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_regex_bisim.h
+
+Abstract:
+
+    Union-find based bisimulation algorithm for symbolic regular expression
+    equivalence, based on the construction described in:
+
+      "Symbolic Extended Regular Expression Equivalence"
+      Veanes, Bjorner et al., CAV'26 (see \git\ere\cav26\paper.tex)
+
+    The algorithm decides equivalence of two regexes p, q by performing
+    a bisimulation search on the symbolic derivative of p XOR q. A
+    union-find structure tracks pairs of regex states that should be
+    bisimilar, and the worklist is initialised with the singleton
+    {p XOR q}.
+
+    The algorithm returns:
+        l_true   p and q are equivalent (bisimilar)
+        l_false  p and q are inequivalent (a distinguishing prefix exists)
+        l_undef  the algorithm could not decide within the configured bound
+                 or encountered a non-ground regex it cannot reason about
+
+Author:
+
+    Nikolaj Bjorner (nbjorner)
+    Margus Veanes   (veanes)
+
+--*/
+#pragma once
+
+#include "ast/seq_decl_plugin.h"
+#include "util/lbool.h"
+#include "util/obj_hashtable.h"
+#include "util/union_find.h"
+
+class seq_rewriter;
+
+namespace seq {
+
+    /*
+       Union-find based bisimulation algorithm for deciding equivalence
+       of two ground (closed) regular expressions in ERE.
+
+       Usage:
+
+           seq::regex_bisim bisim(rewriter);
+           lbool r = bisim.are_equivalent(p, q);
+           if (r == l_true)  // p ≡ q
+           if (r == l_false) // p ≢ q
+           if (r == l_undef) // cannot decide
+
+       The implementation only attempts the equivalence check on ground
+       regexes (no free variables) that use the standard symbolic regex
+       constructors. For non-ground or unsupported inputs the procedure
+       conservatively returns l_undef.
+    */
+    class regex_bisim {
+    private:
+        ast_manager&             m;
+        seq_rewriter&            m_rw;
+        seq_util                 m_util;
+        basic_union_find         m_uf;
+        obj_map<expr, unsigned>  m_node_of;
+        expr_ref_vector          m_pinned;
+        expr_ref_vector          m_worklist;
+        unsigned                 m_step_bound { 50000 };
+        unsigned                 m_steps      { 0 };
+
+        unsigned node_of(expr* r);
+        bool merge_leaf(expr* xor_pair);
+        bool collect_leaves(expr* der, expr_ref_vector& leaves);
+        lbool nullability(expr* r);
+        bool is_supported(expr* r);
+        // Returns true if the leaf l proves that the original pair is
+        // inequivalent and the bisim can short-circuit to l_false.
+        // Uses the get_info().classical invariant: a classical regex
+        // has non-empty language, so a bare classical leaf (implicitly
+        // empty XOR r) is a distinguishing prefix. Similarly an XOR of
+        // two classical regexes with different min_length is
+        // distinguishing.
+        bool classical_distinguishing(expr* l);
+        void reset();
+
+        lbool are_equivalent_core(expr* p, expr* q);
+
+    public:
+        regex_bisim(seq_rewriter& rw);
+
+        void set_step_bound(unsigned n) { m_step_bound = n; }
+
+        lbool are_equivalent(expr* p, expr* q);
+    };
+
+}
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@ -20,6 +20,7 @@ Authors:

 #include "util/uint_set.h"
 #include "ast/rewriter/seq_rewriter.h"
+#include "ast/rewriter/seq_regex_bisim.h"
 #include "ast/arith_decl_plugin.h"
 #include "ast/array_decl_plugin.h"
 #include "ast/ast_pp.h"
@ -267,6 +268,14 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con
            st = BR_DONE;
        }          
        break;
+    case OP_RE_XOR:
+        if (num_args == 2)
+            st = mk_re_xor(args[0], args[1], result);
+        else if (num_args == 1) {
+            result = args[0];
+            st = BR_DONE;
+        }
+        break;
    case OP_RE_INTERSECT:
        if (num_args == 1) {
            result = args[0];
@ -2622,6 +2631,23 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
        m_br.mk_not(is_nullable(r2), result);
        m_br.mk_and(result, is_nullable(r1), result);
    }
+    else if (re().is_xor(r, r1, r2)) {
+        // Null(r1 XOR r2) = Null(r1) XOR Null(r2)
+        expr_ref n1(is_nullable(r1), m());
+        expr_ref n2(is_nullable(r2), m());
+        // Simplify when either operand is a boolean literal so the
+        // bisimulation procedure can use the answer directly.
+        if (m().is_true(n1))
+            result = mk_not(m(), n2);
+        else if (m().is_false(n1))
+            result = n2;
+        else if (m().is_true(n2))
+            result = mk_not(m(), n1);
+        else if (m().is_false(n2))
+            result = n1;
+        else
+            result = m().mk_xor(n1, n2);
+    }
    else if (re().is_star(r) || 
        re().is_opt(r) ||
        re().is_full_seq(r) ||
@ -2742,6 +2768,12 @@ br_status seq_rewriter::mk_re_reverse(expr* r, expr_ref& result) {
        result = re().mk_diff(a, b);
        return BR_REWRITE2;
    }
+    else if (re().is_xor(r, r1, r2)) {
+        auto a = re().mk_reverse(r1);
+        auto b = re().mk_reverse(r2);
+        result = re().mk_xor(a, b);
+        return BR_REWRITE2;
+    }
    else if (m().is_ite(r, p, r1, r2)) {
        result = m().mk_ite(p, re().mk_reverse(r1), re().mk_reverse(r2));
        return BR_REWRITE2;
@ -2882,6 +2914,7 @@ bool seq_rewriter::check_deriv_normal_form(expr* r, int level) {
        re().is_concat(r, r1, r2) ||
        re().is_union(r, r1, r2) ||
        re().is_intersection(r, r1, r2) ||
+        re().is_xor(r, r1, r2) ||
        m().is_ite(r, p, r1, r2)) {
        check_deriv_normal_form(r1, new_level);
        check_deriv_normal_form(r2, new_level);
@ -2921,6 +2954,14 @@ expr_ref seq_rewriter::mk_derivative(expr* r) {
    return mk_antimirov_deriv(v, r, m().mk_true());
 }

+expr_ref seq_rewriter::mk_brz_derivative(expr* r) {
+    sort* seq_sort = nullptr, * ele_sort = nullptr;
+    VERIFY(m_util.is_re(r, seq_sort));
+    VERIFY(m_util.is_seq(seq_sort, ele_sort));
+    expr_ref v(m().mk_var(0, ele_sort), m());
+    return mk_derivative_rec(v, r);
+}
+
 expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
    return mk_antimirov_deriv(ele, r, m().mk_true());
 }
@ -3120,6 +3161,10 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
        result = mk_antimirov_deriv_intersection(e, 
            mk_antimirov_deriv(e, r1, path),
            mk_antimirov_deriv_negate(e, mk_antimirov_deriv(e, r2, path)), m().mk_true());
+    else if (re().is_xor(r, r1, r2))
+        // D(e, r1 XOR r2) = D(e, r1) XOR D(e, r2)
+        result = mk_der_xor(mk_antimirov_deriv(e, r1, path),
+                            mk_antimirov_deriv(e, r2, path));
    else if (re().is_of_pred(r, r1)) {
        array_util array(m());
        expr* args[2] = { r1, e };
@ -3451,6 +3496,11 @@ expr_ref seq_rewriter::mk_regex_reverse(expr* r) {
        auto b1 = mk_regex_reverse(r2);
        result = re().mk_diff(a1, b1);
    }
+    else if (re().is_xor(r, r1, r2)) {
+        auto a1 = mk_regex_reverse(r1);
+        auto b1 = mk_regex_reverse(r2);
+        result = re().mk_xor(a1, b1);
+    }
    else if (re().is_star(r, r1))
        result = re().mk_star(mk_regex_reverse(r1));
    else if (re().is_plus(r, r1))
@ -3546,7 +3596,6 @@ expr_ref  seq_rewriter::simplify_path(expr* elem, expr* path) {


 expr_ref seq_rewriter::mk_der_antimirov_union(expr* r1, expr* r2) {
-    verbose_stream() << "union " << r1->get_id() << " " << r2->get_id() << "\n";
    return mk_der_op(_OP_RE_ANTIMIROV_UNION, r1, r2);
 }

@ -3558,6 +3607,10 @@ expr_ref seq_rewriter::mk_der_inter(expr* r1, expr* r2) {
    return mk_der_op(OP_RE_INTERSECT, r1, r2);
 }

+expr_ref seq_rewriter::mk_der_xor(expr* r1, expr* r2) {
+    return mk_der_op(OP_RE_XOR, r1, r2);
+}
+
 expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) {
    return mk_der_op(OP_RE_CONCAT, r1, r2);
 }
@ -3729,7 +3782,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
                return result;
            }
            // Order with higher IDs on the outside
-            bool is_symmetric = k == OP_RE_UNION || k == OP_RE_INTERSECT;
+            bool is_symmetric = k == OP_RE_UNION || k == OP_RE_INTERSECT || k == OP_RE_XOR;
            if (is_symmetric && get_id(ca) < get_id(cb)) {
                std::swap(a, b);
                std::swap(ca, cb);
@ -3776,6 +3829,10 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
        if (BR_FAILED == mk_re_concat(a, b, result))
            result = re().mk_concat(a, b);
        break;
+    case OP_RE_XOR:
+        if (BR_FAILED == mk_re_xor(a, b, result))
+            result = re().mk_xor(a, b);
+        break;
    default:
        UNREACHABLE();
        break;
@ -3806,6 +3863,10 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
        if (BR_FAILED != mk_re_concat(a, b, result))
            return result;
        break;
+    case OP_RE_XOR:
+        if (BR_FAILED != mk_re_xor0(a, b, result))
+            return result;
+        break;
    default:
        break;
    }
@ -3909,6 +3970,13 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) {
    return result;
 }

+/*
+   Classical Brzozowski derivative used by the regex_bisim equivalence
+   procedure. Unlike `mk_antimirov_deriv`, this variant never creates
+   _OP_RE_ANTIMIROV_UNION nodes — it stays in a classical (single regex
+   tree) form. The bisimulation algorithm relies on this so that each
+   leaf of D(p XOR q) is a coherent XOR pair (D_v p) XOR (D_v q).
+*/
 expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
    expr_ref result(m());
    sort* seq_sort = nullptr, *ele_sort = nullptr;
@ -3920,57 +3988,59 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
    unsigned lo = 0, hi = 0;
    if (re().is_concat(r, r1, r2)) {
        expr_ref is_n = is_nullable(r1);
-        expr_ref dr1 = mk_derivative(ele, r1);
+        expr_ref dr1 = mk_derivative_rec(ele, r1);
        result = mk_der_concat(dr1, r2);
        if (m().is_false(is_n)) {
            return result;
        }
-        expr_ref dr2 = mk_derivative(ele, r2);
+        expr_ref dr2 = mk_derivative_rec(ele, r2);
        is_n = re_predicate(is_n, seq_sort);
        if (re().is_empty(dr2)) {
            //do not concatenate [], it is a deade-end 
            return result;
        }
        else {
-            // Instead of mk_der_union here, we use mk_der_antimirov_union to
-            // force the two cases to be considered separately and lifted to
-            // the top level. This avoids blowup in cases where determinization
-            // is expensive.
-            return mk_der_antimirov_union(result, mk_der_concat(is_n, dr2));
+            // Classical Brzozowski union: keep the derivative tree free of
+            // antimirov-union nodes so the bisimulation procedure sees a
+            // single regex tree whose leaves are XOR pairs.
+            return mk_der_union(result, mk_der_concat(is_n, dr2));
        }
    }
    else if (re().is_star(r, r1)) {
-        return mk_der_concat(mk_derivative(ele, r1), r);
+        return mk_der_concat(mk_derivative_rec(ele, r1), r);
    }
    else if (re().is_plus(r, r1)) {
        expr_ref star(re().mk_star(r1), m());
-        return mk_derivative(ele, star);
+        return mk_derivative_rec(ele, star);
    }
    else if (re().is_union(r, r1, r2)) {
-        return mk_der_union(mk_derivative(ele, r1), mk_derivative(ele, r2));
+        return mk_der_union(mk_derivative_rec(ele, r1), mk_derivative_rec(ele, r2));
    }
    else if (re().is_intersection(r, r1, r2)) {
-        return mk_der_inter(mk_derivative(ele, r1), mk_derivative(ele, r2));
+        return mk_der_inter(mk_derivative_rec(ele, r1), mk_derivative_rec(ele, r2));
    }
    else if (re().is_diff(r, r1, r2)) {
-        return mk_der_inter(mk_derivative(ele, r1), mk_der_compl(mk_derivative(ele, r2)));
+        return mk_der_inter(mk_derivative_rec(ele, r1), mk_der_compl(mk_derivative_rec(ele, r2)));
+    }
+    else if (re().is_xor(r, r1, r2)) {
+        return mk_der_xor(mk_derivative_rec(ele, r1), mk_derivative_rec(ele, r2));
    }
    else if (m().is_ite(r, p, r1, r2)) {
        // there is no BDD normalization here
-        result = m().mk_ite(p, mk_derivative(ele, r1), mk_derivative(ele, r2));
+        result = m().mk_ite(p, mk_derivative_rec(ele, r1), mk_derivative_rec(ele, r2));
        return result;
    }
    else if (re().is_opt(r, r1)) {
-        return mk_derivative(ele, r1);
+        return mk_derivative_rec(ele, r1);
    }
    else if (re().is_complement(r, r1)) {
-        return mk_der_compl(mk_derivative(ele, r1));
+        return mk_der_compl(mk_derivative_rec(ele, r1));
    }
    else if (re().is_loop(r, r1, lo)) {
        if (lo > 0) {
            lo--;
        }
-        result = mk_derivative(ele, r1);
+        result = mk_derivative_rec(ele, r1);
        //do not concatenate with [] (emptyset)
        if (re().is_empty(result)) {
            return result;
@ -3988,7 +4058,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
        if (lo > 0) {
            lo--;
        }
-        result = mk_derivative(ele, r1);
+        result = mk_derivative_rec(ele, r1);
        //do not concatenate with [] (emptyset) or handle the rest of the loop if no more iterations remain
        if (re().is_empty(result) || hi == 0) {
            return result;
@ -4756,6 +4826,91 @@ br_status seq_rewriter::mk_re_diff(expr* a, expr* b, expr_ref& result) {
    return BR_REWRITE2;
 }

+/*
+   Symmetric difference / XOR of regexes.
+   LANG(a XOR b) = (LANG(a) \ LANG(b)) U (LANG(b) \ LANG(a))
+
+   Equivalence preserving rewrites applied here (paper Section 5):
+     r XOR r           = []
+     r XOR []          = r
+     [] XOR r          = r
+     comp(r) XOR comp(s)  = r XOR s
+     r XOR comp(s)     = comp(r XOR s)
+     comp(r) XOR s     = comp(r XOR s)
+     full_seq XOR r    = comp(r)
+     r XOR full_seq    = comp(r)
+   We also normalize the argument order using expression ids so that the
+   structure is canonical for AC.
+*/
+br_status seq_rewriter::mk_re_xor0(expr* a, expr* b, expr_ref& result) {
+    // Reduction-only variant of mk_re_xor for use inside mk_der_op.
+    // Avoids any transformation that would create a top-level re.xor
+    // node (e.g. AC normalisation or complement absorption), because
+    // mk_der_op needs to keep distributing the operation through ITE
+    // BDDs. Only structural simplifications that produce a non-XOR
+    // result are applied here.
+    if (a == b) {
+        result = re().mk_empty(a->get_sort());
+        return BR_DONE;
+    }
+    if (re().is_empty(a)) {
+        result = b;
+        return BR_DONE;
+    }
+    if (re().is_empty(b)) {
+        result = a;
+        return BR_DONE;
+    }
+    return BR_FAILED;
+}
+
+br_status seq_rewriter::mk_re_xor(expr* a, expr* b, expr_ref& result) {
+    if (a == b) {
+        result = re().mk_empty(a->get_sort());
+        return BR_DONE;
+    }
+    if (re().is_empty(a)) {
+        result = b;
+        return BR_DONE;
+    }
+    if (re().is_empty(b)) {
+        result = a;
+        return BR_DONE;
+    }
+    if (re().is_full_seq(a)) {
+        result = re().mk_complement(b);
+        return BR_REWRITE1;
+    }
+    if (re().is_full_seq(b)) {
+        result = re().mk_complement(a);
+        return BR_REWRITE1;
+    }
+    expr* ra = nullptr, * rb = nullptr;
+    bool ca = re().is_complement(a, ra);
+    bool cb = re().is_complement(b, rb);
+    if (ca && cb) {
+        // comp(ra) XOR comp(rb) = ra XOR rb
+        result = re().mk_xor(ra, rb);
+        return BR_REWRITE1;
+    }
+    if (ca) {
+        // comp(ra) XOR b = comp(ra XOR b)
+        result = re().mk_complement(re().mk_xor(ra, b));
+        return BR_REWRITE2;
+    }
+    if (cb) {
+        // a XOR comp(rb) = comp(a XOR rb)
+        result = re().mk_complement(re().mk_xor(a, rb));
+        return BR_REWRITE2;
+    }
+    // Normalize order using expression ids (AC normalization).
+    if (a->get_id() > b->get_id()) {
+        result = re().mk_xor(b, a);
+        return BR_DONE;
+    }
+    return BR_FAILED;
+}
+

 br_status seq_rewriter::mk_re_loop(func_decl* f, unsigned num_args, expr* const* args, expr_ref& result) {
    rational n1, n2;
@ -5177,6 +5332,30 @@ br_status seq_rewriter::reduce_re_eq(expr* l, expr* r, expr_ref& result) {
    if (re().is_empty(r)) {
        return reduce_re_is_empty(l, result);
    }
+    if (l == r) {
+        result = m().mk_true();
+        return BR_DONE;
+    }
+    /*
+     * Try the union-find bisimulation procedure for ground regex equality.
+     * Guarded against re-entry because the bisim may construct equalities
+     * indirectly.  On l_undef the rewriter falls through to the existing
+     * axiomatisation path.
+     */
+    if (!m_in_bisim && is_ground(l) && is_ground(r)) {
+        flet<bool> _block(m_in_bisim, true);
+        seq::regex_bisim bisim(*this);
+        switch (bisim.are_equivalent(l, r)) {
+        case l_true:
+            result = m().mk_true();
+            return BR_DONE;
+        case l_false:
+            result = m().mk_false();
+            return BR_DONE;
+        case l_undef:
+            break;
+        }
+    }
    return BR_FAILED;
 }

--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@ -135,7 +135,8 @@ class seq_rewriter {
    // re2automaton   m_re2aut;
    op_cache       m_op_cache;
    expr_ref_vector m_es, m_lhs, m_rhs;
-    bool           m_coalesce_chars;    
+    bool           m_coalesce_chars;
+    bool           m_in_bisim { false };

    enum length_comparison {
        shorter_c, 
@ -180,6 +181,7 @@ class seq_rewriter {
    expr_ref mk_der_concat(expr* a, expr* b);
    expr_ref mk_der_union(expr* a, expr* b);
    expr_ref mk_der_inter(expr* a, expr* b);
+    expr_ref mk_der_xor(expr* a, expr* b);
    expr_ref mk_der_compl(expr* a);
    expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort);
    expr_ref mk_der_antimirov_union(expr* r1, expr* r2);
@ -262,6 +264,8 @@ class seq_rewriter {
    br_status mk_re_complement(expr* a, expr_ref& result);
    br_status mk_re_star(expr* a, expr_ref& result);
    br_status mk_re_diff(expr* a, expr* b, expr_ref& result);
+    br_status mk_re_xor(expr* a, expr* b, expr_ref& result);
+    br_status mk_re_xor0(expr* a, expr* b, expr_ref& result);
    br_status mk_re_plus(expr* a, expr_ref& result);
    br_status mk_re_opt(expr* a, expr_ref& result);
    br_status mk_re_power(func_decl* f, expr* a, expr_ref& result);
@ -381,6 +385,18 @@ public:
        return result;
    }

+    /*
+     * Construct r1 XOR r2 applying the structural rewrites in
+     * mk_re_xor (r XOR r = empty, comp/empty/full normalisation, AC
+     * ordering). Used by the bisimulation procedure.
+     */
+    expr_ref mk_re_xor_simplified(expr* r1, expr* r2) {
+        expr_ref result(m());
+        if (mk_re_xor(r1, r2, result) == BR_FAILED)
+            result = re().mk_xor(r1, r2);
+        return result;
+    }
+
    /**
     * check if regular expression is of the form all ++ s ++ all ++ t + u ++ all, where, s, t, u are sequences
     */
@ -410,6 +426,17 @@ public:
    */
    expr_ref mk_derivative(expr* r);

+    /*
+    Classical (non-antimirov) Brzozowski derivative wrt the canonical
+    variable v0 = (:var 0). Unlike `mk_derivative` this entry point keeps
+    the symbolic derivative as a single transition regex (TRegex): boolean
+    operators are pushed into the ITE leaves rather than lifted to the top
+    via _OP_RE_ANTIMIROV_UNION. Used by the regex_bisim equivalence
+    procedure which relies on each leaf of D(p XOR q) being a coherent
+    XOR pair (D_v p) XOR (D_v q).
+    */
+    expr_ref mk_brz_derivative(expr* r);
+
    // heuristic elimination of element from condition that comes form a derivative.
    // special case optimization for conjunctions of equalities, disequalities and ranges.
    void elim_condition(expr* elem, expr_ref& cond);
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@ -230,6 +230,7 @@ void seq_decl_plugin::init() {
    m_sigs[OP_RE_UNION]      = alloc(psig, m, "re.union",     1, 2, reAreA, reA);
    m_sigs[OP_RE_INTERSECT]  = alloc(psig, m, "re.inter",     1, 2, reAreA, reA);
    m_sigs[OP_RE_DIFF]       = alloc(psig, m, "re.diff",      1, 2, reAreA, reA);
+    m_sigs[OP_RE_XOR]        = alloc(psig, m, "re.xor",       1, 2, reAreA, reA);
    m_sigs[OP_RE_LOOP]           = alloc(psig, m, "re.loop",    1, 1, &reA, reA);
    m_sigs[OP_RE_POWER]          = alloc(psig, m, "re.^", 1, 1, &reA, reA);
    m_sigs[OP_RE_COMPLEMENT]     = alloc(psig, m, "re.comp", 1, 1, &reA, reA);
@ -507,6 +508,7 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p
    case OP_RE_CONCAT:
    case OP_RE_INTERSECT:
    case OP_RE_DIFF:
+    case OP_RE_XOR:
        m_has_re = true;
        return mk_left_assoc_fun(k, arity, domain, range, k, k);

@ -1513,6 +1515,13 @@ std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const {
        print(out, r2);
        out << ")";
    }
+    else if (re.is_xor(e, r1, r2)) {
+        out << "(";
+        print(out, r1);
+        out << ")XOR(";
+        print(out, r2);
+        out << ")";
+    }
    else if (re.m.is_ite(e, s, r1, r2)) {
        out << (html_encode ? "(&#x1D422;&#x1D41F; " : "(if ");
        print(out, s);
@ -1704,6 +1713,10 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
            i1 = get_info_rec(e->get_arg(0));
            i2 = get_info_rec(e->get_arg(1));
            return i1.diff(i2);
+        case OP_RE_XOR:
+            i1 = get_info_rec(e->get_arg(0));
+            i2 = get_info_rec(e->get_arg(1));
+            return i1.xor_(i2);
        }
        return unknown_info;
    }
@ -1829,6 +1842,25 @@ seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) co
        return *this;
 }

+seq_util::rex::info seq_util::rex::info::xor_(seq_util::rex::info const& rhs) const {
+    if (is_known()) {
+        if (rhs.is_known()) {
+            // Null(p XOR q) = Null(p) XOR Null(q)
+            lbool xor_nullable = l_undef;
+            if (nullable != l_undef && rhs.nullable != l_undef)
+                xor_nullable = (nullable == rhs.nullable) ? l_false : l_true;
+            return info(interpreted & rhs.interpreted,
+                xor_nullable,
+                0,
+                false);
+        }
+        else
+            return rhs;
+    }
+    else
+        return *this;
+}
+
 seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) const {
    if (is_known()) {
        if (i.is_known()) {
--- a/src/ast/seq_decl_plugin.h
+++ b/src/ast/seq_decl_plugin.h
@ -68,6 +68,7 @@ enum seq_op_kind {
    OP_RE_UNION,
    OP_RE_DIFF,
    OP_RE_INTERSECT,
+    OP_RE_XOR,
    OP_RE_LOOP,
    OP_RE_POWER,
    OP_RE_COMPLEMENT,
@ -491,6 +492,7 @@ public:
            info disj(info const& rhs) const;
            info conj(info const& rhs) const; 
            info diff(info const& rhs) const;
+            info xor_(info const& rhs) const;
            info orelse(info const& rhs) const;
            info loop(unsigned lower, unsigned upper) const;

@ -523,6 +525,7 @@ public:
        app* mk_union(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_UNION, r1, r2); }
        app* mk_inter(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_INTERSECT, r1, r2); }
        app* mk_diff(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_DIFF, r1, r2); }
+        app* mk_xor(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_XOR, r1, r2); }
        app* mk_complement(expr* r) { return m.mk_app(m_fid, OP_RE_COMPLEMENT, r); }
        app* mk_star(expr* r) { return m.mk_app(m_fid, OP_RE_STAR, r); }
        app* mk_plus(expr* r) { return m.mk_app(m_fid, OP_RE_PLUS, r); }
@ -546,6 +549,7 @@ public:
        bool is_union(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_UNION); }
        bool is_intersection(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_INTERSECT); }
        bool is_diff(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_DIFF); }
+        bool is_xor(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_XOR); }
        bool is_complement(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_COMPLEMENT); }
        bool is_star(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_STAR); }
        bool is_plus(expr const* n)    const { return is_app_of(n, m_fid, OP_RE_PLUS); }
@ -578,6 +582,7 @@ public:
        MATCH_BINARY(is_union);
        MATCH_BINARY(is_intersection);
        MATCH_BINARY(is_diff);
+        MATCH_BINARY(is_xor);
        MATCH_BINARY(is_range);
        MATCH_UNARY(is_complement);
        MATCH_UNARY(is_star);
--- a/src/ast/sls/sls_seq_plugin.cpp
+++ b/src/ast/sls/sls_seq_plugin.cpp
@ -511,6 +511,7 @@ namespace sls {
            case OP_RE_CONCAT:
            case OP_RE_UNION:
            case OP_RE_DIFF:
+            case OP_RE_XOR:
            case OP_RE_INTERSECT:
            case OP_RE_LOOP:
            case OP_RE_POWER:
@ -1294,6 +1295,7 @@ namespace sls {
        case OP_RE_CONCAT:
        case OP_RE_UNION:
        case OP_RE_DIFF:
+        case OP_RE_XOR:
        case OP_RE_INTERSECT:
        case OP_RE_LOOP:
        case OP_RE_POWER:
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@ -21,6 +21,7 @@ Author:
 #include "ast/expr_abstract.h"
 #include "ast/ast_util.h"
 #include "ast/for_each_expr.h"
+#include "ast/rewriter/seq_regex_bisim.h"
 #include <ast/rewriter/expr_safe_replace.h>

 namespace smt {
@ -460,6 +461,23 @@ namespace smt {
        if (re().is_empty(r))
            //trivially true
            return;
+        // Try the bisimulation procedure on ground regexes first.  If it
+        // returns a definite answer, dispatch the corresponding axiom and
+        // bypass the symbolic emptiness/derivative closure.
+        if (is_ground(r1) && is_ground(r2)) {
+            seq::regex_bisim bisim(seq_rw());
+            switch (bisim.are_equivalent(r1, r2)) {
+            case l_true:
+                STRACE(seq_regex_brief, tout << "bisim:eq ";);
+                return; // trivially true
+            case l_false:
+                STRACE(seq_regex_brief, tout << "bisim:neq ";);
+                th.add_axiom(~th.mk_eq(r1, r2, false), false_literal);
+                return;
+            case l_undef:
+                break;
+            }
+        }
        expr_ref emp(re().mk_empty(r->get_sort()), m);
        expr_ref f(m.mk_fresh_const("re.char", seq_sort), m); 
        expr_ref is_empty = sk().mk_is_empty(r, r, f);
@ -478,6 +496,20 @@ namespace smt {
        sort* seq_sort = nullptr;
        VERIFY(u().is_re(r1, seq_sort));
        expr_ref r = symmetric_diff(r1, r2);
+        if (is_ground(r1) && is_ground(r2)) {
+            seq::regex_bisim bisim(seq_rw());
+            switch (bisim.are_equivalent(r1, r2)) {
+            case l_true:
+                STRACE(seq_regex_brief, tout << "bisim:eq ";);
+                th.add_axiom(th.mk_eq(r1, r2, false), false_literal);
+                return;
+            case l_false:
+                STRACE(seq_regex_brief, tout << "bisim:neq ";);
+                return; // trivially satisfied
+            case l_undef:
+                break;
+            }
+        }
        expr_ref emp(re().mk_empty(r->get_sort()), m);
        expr_ref n(m.mk_fresh_const("re.char", seq_sort), m);
        expr_ref is_non_empty = sk().mk_is_non_empty(r, r, n);