3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-13 12:25:37 +00:00

Add OP_RE_XOR and union-find bisimulation for ground regex equivalence (#9804)

Implements the algorithm of Eq(p,q) = Empty(p XOR q)' using a union-find
driven bisimulation closure (per the CAV'26 ERE paper).

### What's added

* **New primitive OP_RE_XOR (re.xor)** wired through seq_decl_plugin:
parser signature, info propagation (nullable, min_length), and
pretty-printer.
* **seq_rewriter**: structural XOR rewrites ( XOR r = empty, XOR empty =
r, ull XOR r = comp(r), comp/comp absorption, complement push, AC
normalisation), nullability (Null(p XOR q) = Null(p) != Null(q)),
derivative (D_a(p XOR q) = D_a(p) XOR D_a(q)), reverse, antimirov
derivative, and `check_deriv_normal_form` coverage.
* **New class seq::regex_bisim** in
`src/ast/rewriter/seq_regex_bisim.{h,cpp}` to keep the bisim logic out
of the already-large `seq_rewriter.cpp`. Uses `basic_union_find` from
`util/union_find.h`, an `obj_map` for the node assignment, and a
50000-step bound (returns `l_undef` on overrun).
* **Integration** in `seq_rewriter::reduce_re_eq` (with a re-entry
guard) and in `seq_regex::propagate_eq` / `propagate_ne` for ground
regexes; on `l_undef` we fall back to the existing axiomatisation.
* **`sls_seq_plugin`**: extend `OP_RE_DIFF` switch arms to also cover
`OP_RE_XOR`.

### Validation

* Full release build with MSVC + Ninja.
* `./test-z3 /a` -- 89/89 tests passing.
* `./test-z3 /seq smt2print_parse` -- PASS.
* Smoke tests with `(a|b)*` vs `(a*b*)*` (equal) and `a*` vs `(a|b)*`
(not equal) return the expected `sat`/`unsat` quickly.

---------

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Margus Veanes 2026-06-10 14:58:20 -07:00 committed by GitHub
parent 589bd9e6f5
commit 513b81253b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 664 additions and 20 deletions

View file

@ -41,6 +41,7 @@ z3_add_component(rewriter
seq_eq_solver.cpp
seq_subset.cpp
seq_rewriter.cpp
seq_regex_bisim.cpp
seq_skolem.cpp
th_rewriter.cpp
value_sweep.cpp

View file

@ -0,0 +1,267 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_regex_bisim.cpp
Abstract:
See seq_regex_bisim.h.
Author:
Nikolaj Bjorner (nbjorner)
Margus Veanes (veanes)
--*/
#include "ast/rewriter/seq_regex_bisim.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/ast_pp.h"
#include "ast/ast_util.h"
#include "ast/for_each_expr.h"
namespace seq {
regex_bisim::regex_bisim(seq_rewriter& rw):
m(rw.m()),
m_rw(rw),
m_util(rw.u()),
m_pinned(m),
m_worklist(m) {
}
void regex_bisim::reset() {
m_uf.reset();
m_node_of.reset();
m_pinned.reset();
m_worklist.reset();
m_steps = 0;
}
/*
Map an expression to a union-find node, allocating a fresh node on
first encounter.
*/
unsigned regex_bisim::node_of(expr* r) {
unsigned id = 0;
if (m_node_of.find(r, id))
return id;
id = m_uf.mk_var();
m_node_of.insert(r, id);
m_pinned.push_back(r);
return id;
}
/*
Compute a definite nullability answer for r.
If the seq_rewriter is unable to produce a literal true/false (for
example because r contains an uninterpreted symbol), return l_undef.
*/
lbool regex_bisim::nullability(expr* r) {
expr_ref n = m_rw.is_nullable(r);
if (m.is_true(n))
return l_true;
if (m.is_false(n))
return l_false;
return l_undef;
}
/*
Test whether a regex expression is a kind that the bisimulation
procedure can reason about. We require it to be a syntactic ground
term (no free variables) and that its info reports min_length info
(which implies that it parses cleanly as a regex constructor).
*/
bool regex_bisim::is_supported(expr* r) {
if (!m_util.is_re(r))
return false;
if (!m_util.re.get_info(r).is_known())
return false;
// Reject regexes mentioning free variables; the symbolic
// derivative engine introduces (:var 0) only after we call it
// ourselves, so any pre-existing variable would be a free var.
if (!is_ground(r))
return false;
return true;
}
/*
Collect the leaves of a t-regex der (an ITE / antimirov union /
union-tree with regex leaves) into the output vector. Empty
(re.empty) leaves are dropped.
Returns false if we encountered an unexpected node (e.g. a free
variable creeping in) in that case the caller should bail out.
*/
bool regex_bisim::collect_leaves(expr* der, expr_ref_vector& leaves) {
ptr_vector<expr> work;
obj_hashtable<expr> seen;
work.push_back(der);
seen.insert(der);
while (!work.empty()) {
expr* e = work.back();
work.pop_back();
expr* c = nullptr, * t = nullptr, * f = nullptr;
if (m.is_ite(e, c, t, f) ||
m_util.re.is_union(e, t, f) ||
m_util.re.is_antimirov_union(e, t, f)) {
if (seen.insert_if_not_there(t))
work.push_back(t);
if (seen.insert_if_not_there(f))
work.push_back(f);
continue;
}
if (m_util.re.is_empty(e))
continue;
if (!m_util.is_re(e))
return false;
leaves.push_back(e);
}
return true;
}
/*
Fast inequivalence check based on the get_info().classical flag.
Invariant: if r is well-formed and get_info(r).classical is true,
then L(r) is non-empty. The flag is set for regexes built only
from str.to_re, re.all, union, concat, star, plus, opt, loop;
it excludes complement, intersection, diff, xor, and the empty
regex.
A bare regex leaf l (i.e. not a XOR pair) represents the implicit
pair (empty XOR l). If l is classical, L(l) is non-empty so the
pair is non-empty: the original two regexes have a distinguishing
prefix and are inequivalent.
For an XOR leaf xor(a, b): if both a and b are classical and have
different min_length, then the shortest word of one is not in the
other, so the pair is non-empty and we can short-circuit. (The
case a == b syntactically is already handled by mk_re_xor0.)
Returns true if the leaf proves inequivalence; false if no
conclusion can be drawn.
*/
bool regex_bisim::classical_distinguishing(expr* l) {
expr* a = nullptr, * b = nullptr;
if (m_util.re.is_xor(l, a, b)) {
auto ia = m_util.re.get_info(a);
auto ib = m_util.re.get_info(b);
if (ia.is_known() && ib.is_known() &&
ia.classical && ib.classical &&
ia.min_length != ib.min_length)
return true;
return false;
}
if (m_util.re.is_empty(l))
return false;
auto info = m_util.re.get_info(l);
return info.is_known() && info.classical;
}
/*
Merge the two sides of the XOR pair, returning true if a fresh
merge happened (i.e. the pair must still be processed) and false
if the two sides were already in the same union-find class.
For non-XOR leaves we treat the leaf l as the pair (empty XOR l).
*/
bool regex_bisim::merge_leaf(expr* leaf) {
expr* a = nullptr, * b = nullptr;
if (!m_util.re.is_xor(leaf, a, b)) {
a = m_util.re.mk_empty(leaf->get_sort());
b = leaf;
m_pinned.push_back(a);
}
unsigned ia = node_of(a);
unsigned ib = node_of(b);
if (m_uf.find(ia) == m_uf.find(ib))
return false;
m_uf.merge(ia, ib);
return true;
}
/*
Decide equivalence by bisimulation on D(p XOR q).
*/
lbool regex_bisim::are_equivalent(expr* p, expr* q) {
return are_equivalent_core(p, q);
}
lbool regex_bisim::are_equivalent_core(expr* p, expr* q) {
if (!is_supported(p) || !is_supported(q))
return l_undef;
if (p == q)
return l_true;
reset();
// Build the initial pair r0 = p XOR q applying the structural
// XOR rewrites (r XOR r = empty, AC normalisation, etc.).
expr_ref r0 = m_rw.mk_re_xor_simplified(p, q);
// If r0 simplified to empty, the two regexes are equivalent.
if (m_util.re.is_empty(r0))
return l_true;
lbool n0 = nullability(r0);
if (n0 == l_true)
return l_false; // distinguishing empty word
if (n0 == l_undef)
return l_undef;
// Classical-leaf shortcut applied to r0 (covers the case where
// mk_re_xor_simplified collapsed p XOR q to a bare classical
// residual, e.g. when one side reduced to empty).
if (classical_distinguishing(r0))
return l_false;
if (!merge_leaf(r0))
return l_true; // already merged: trivially equivalent
m_worklist.push_back(r0);
while (!m_worklist.empty()) {
if (++m_steps > m_step_bound)
return l_undef;
expr_ref r(m_worklist.back(), m);
m_worklist.pop_back();
// Compute the symbolic derivative wrt the canonical variable
// (:var 0). The result is a transition regex (ITE tree) whose
// leaves are regex expressions. We use the classical Brzozowski
// entry point so the derivative stays as a single TRegex and
// does not lift unions to the top via antimirov nodes — this
// preserves the XOR-pair invariant the bisimulation relies on.
expr_ref d(m_rw.mk_brz_derivative(r), m);
expr_ref_vector leaves(m);
if (!collect_leaves(d, leaves))
return l_undef;
// First pass: check for any nullable leaf (definitive
// distinguishing empty-continuation word) or any classically
// non-empty leaf (definitive distinguishing non-empty prefix).
for (expr* l : leaves) {
lbool nl = nullability(l);
if (nl == l_true)
return l_false;
if (nl == l_undef)
return l_undef;
if (classical_distinguishing(l))
return l_false;
}
// Second pass: merge each leaf into the union-find; new
// merges go onto the worklist.
for (expr* l : leaves) {
if (merge_leaf(l)) {
m_pinned.push_back(l);
m_worklist.push_back(l);
}
}
}
return l_true;
}
}

View file

@ -0,0 +1,99 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_regex_bisim.h
Abstract:
Union-find based bisimulation algorithm for symbolic regular expression
equivalence, based on the construction described in:
"Symbolic Extended Regular Expression Equivalence"
Veanes, Bjorner et al., CAV'26 (see \git\ere\cav26\paper.tex)
The algorithm decides equivalence of two regexes p, q by performing
a bisimulation search on the symbolic derivative of p XOR q. A
union-find structure tracks pairs of regex states that should be
bisimilar, and the worklist is initialised with the singleton
{p XOR q}.
The algorithm returns:
l_true p and q are equivalent (bisimilar)
l_false p and q are inequivalent (a distinguishing prefix exists)
l_undef the algorithm could not decide within the configured bound
or encountered a non-ground regex it cannot reason about
Author:
Nikolaj Bjorner (nbjorner)
Margus Veanes (veanes)
--*/
#pragma once
#include "ast/seq_decl_plugin.h"
#include "util/lbool.h"
#include "util/obj_hashtable.h"
#include "util/union_find.h"
class seq_rewriter;
namespace seq {
/*
Union-find based bisimulation algorithm for deciding equivalence
of two ground (closed) regular expressions in ERE.
Usage:
seq::regex_bisim bisim(rewriter);
lbool r = bisim.are_equivalent(p, q);
if (r == l_true) // p ≡ q
if (r == l_false) // p ≢ q
if (r == l_undef) // cannot decide
The implementation only attempts the equivalence check on ground
regexes (no free variables) that use the standard symbolic regex
constructors. For non-ground or unsupported inputs the procedure
conservatively returns l_undef.
*/
class regex_bisim {
private:
ast_manager& m;
seq_rewriter& m_rw;
seq_util m_util;
basic_union_find m_uf;
obj_map<expr, unsigned> m_node_of;
expr_ref_vector m_pinned;
expr_ref_vector m_worklist;
unsigned m_step_bound { 50000 };
unsigned m_steps { 0 };
unsigned node_of(expr* r);
bool merge_leaf(expr* xor_pair);
bool collect_leaves(expr* der, expr_ref_vector& leaves);
lbool nullability(expr* r);
bool is_supported(expr* r);
// Returns true if the leaf l proves that the original pair is
// inequivalent and the bisim can short-circuit to l_false.
// Uses the get_info().classical invariant: a classical regex
// has non-empty language, so a bare classical leaf (implicitly
// empty XOR r) is a distinguishing prefix. Similarly an XOR of
// two classical regexes with different min_length is
// distinguishing.
bool classical_distinguishing(expr* l);
void reset();
lbool are_equivalent_core(expr* p, expr* q);
public:
regex_bisim(seq_rewriter& rw);
void set_step_bound(unsigned n) { m_step_bound = n; }
lbool are_equivalent(expr* p, expr* q);
};
}

View file

@ -20,6 +20,7 @@ Authors:
#include "util/uint_set.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/rewriter/seq_regex_bisim.h"
#include "ast/arith_decl_plugin.h"
#include "ast/array_decl_plugin.h"
#include "ast/ast_pp.h"
@ -267,6 +268,14 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con
st = BR_DONE;
}
break;
case OP_RE_XOR:
if (num_args == 2)
st = mk_re_xor(args[0], args[1], result);
else if (num_args == 1) {
result = args[0];
st = BR_DONE;
}
break;
case OP_RE_INTERSECT:
if (num_args == 1) {
result = args[0];
@ -2622,6 +2631,23 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
m_br.mk_not(is_nullable(r2), result);
m_br.mk_and(result, is_nullable(r1), result);
}
else if (re().is_xor(r, r1, r2)) {
// Null(r1 XOR r2) = Null(r1) XOR Null(r2)
expr_ref n1(is_nullable(r1), m());
expr_ref n2(is_nullable(r2), m());
// Simplify when either operand is a boolean literal so the
// bisimulation procedure can use the answer directly.
if (m().is_true(n1))
result = mk_not(m(), n2);
else if (m().is_false(n1))
result = n2;
else if (m().is_true(n2))
result = mk_not(m(), n1);
else if (m().is_false(n2))
result = n1;
else
result = m().mk_xor(n1, n2);
}
else if (re().is_star(r) ||
re().is_opt(r) ||
re().is_full_seq(r) ||
@ -2742,6 +2768,12 @@ br_status seq_rewriter::mk_re_reverse(expr* r, expr_ref& result) {
result = re().mk_diff(a, b);
return BR_REWRITE2;
}
else if (re().is_xor(r, r1, r2)) {
auto a = re().mk_reverse(r1);
auto b = re().mk_reverse(r2);
result = re().mk_xor(a, b);
return BR_REWRITE2;
}
else if (m().is_ite(r, p, r1, r2)) {
result = m().mk_ite(p, re().mk_reverse(r1), re().mk_reverse(r2));
return BR_REWRITE2;
@ -2882,6 +2914,7 @@ bool seq_rewriter::check_deriv_normal_form(expr* r, int level) {
re().is_concat(r, r1, r2) ||
re().is_union(r, r1, r2) ||
re().is_intersection(r, r1, r2) ||
re().is_xor(r, r1, r2) ||
m().is_ite(r, p, r1, r2)) {
check_deriv_normal_form(r1, new_level);
check_deriv_normal_form(r2, new_level);
@ -2921,6 +2954,14 @@ expr_ref seq_rewriter::mk_derivative(expr* r) {
return mk_antimirov_deriv(v, r, m().mk_true());
}
expr_ref seq_rewriter::mk_brz_derivative(expr* r) {
sort* seq_sort = nullptr, * ele_sort = nullptr;
VERIFY(m_util.is_re(r, seq_sort));
VERIFY(m_util.is_seq(seq_sort, ele_sort));
expr_ref v(m().mk_var(0, ele_sort), m());
return mk_derivative_rec(v, r);
}
expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
return mk_antimirov_deriv(ele, r, m().mk_true());
}
@ -3120,6 +3161,10 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
result = mk_antimirov_deriv_intersection(e,
mk_antimirov_deriv(e, r1, path),
mk_antimirov_deriv_negate(e, mk_antimirov_deriv(e, r2, path)), m().mk_true());
else if (re().is_xor(r, r1, r2))
// D(e, r1 XOR r2) = D(e, r1) XOR D(e, r2)
result = mk_der_xor(mk_antimirov_deriv(e, r1, path),
mk_antimirov_deriv(e, r2, path));
else if (re().is_of_pred(r, r1)) {
array_util array(m());
expr* args[2] = { r1, e };
@ -3451,6 +3496,11 @@ expr_ref seq_rewriter::mk_regex_reverse(expr* r) {
auto b1 = mk_regex_reverse(r2);
result = re().mk_diff(a1, b1);
}
else if (re().is_xor(r, r1, r2)) {
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_xor(a1, b1);
}
else if (re().is_star(r, r1))
result = re().mk_star(mk_regex_reverse(r1));
else if (re().is_plus(r, r1))
@ -3546,7 +3596,6 @@ expr_ref seq_rewriter::simplify_path(expr* elem, expr* path) {
expr_ref seq_rewriter::mk_der_antimirov_union(expr* r1, expr* r2) {
verbose_stream() << "union " << r1->get_id() << " " << r2->get_id() << "\n";
return mk_der_op(_OP_RE_ANTIMIROV_UNION, r1, r2);
}
@ -3558,6 +3607,10 @@ expr_ref seq_rewriter::mk_der_inter(expr* r1, expr* r2) {
return mk_der_op(OP_RE_INTERSECT, r1, r2);
}
expr_ref seq_rewriter::mk_der_xor(expr* r1, expr* r2) {
return mk_der_op(OP_RE_XOR, r1, r2);
}
expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) {
return mk_der_op(OP_RE_CONCAT, r1, r2);
}
@ -3729,7 +3782,7 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
return result;
}
// Order with higher IDs on the outside
bool is_symmetric = k == OP_RE_UNION || k == OP_RE_INTERSECT;
bool is_symmetric = k == OP_RE_UNION || k == OP_RE_INTERSECT || k == OP_RE_XOR;
if (is_symmetric && get_id(ca) < get_id(cb)) {
std::swap(a, b);
std::swap(ca, cb);
@ -3776,6 +3829,10 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
if (BR_FAILED == mk_re_concat(a, b, result))
result = re().mk_concat(a, b);
break;
case OP_RE_XOR:
if (BR_FAILED == mk_re_xor(a, b, result))
result = re().mk_xor(a, b);
break;
default:
UNREACHABLE();
break;
@ -3806,6 +3863,10 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
if (BR_FAILED != mk_re_concat(a, b, result))
return result;
break;
case OP_RE_XOR:
if (BR_FAILED != mk_re_xor0(a, b, result))
return result;
break;
default:
break;
}
@ -3909,6 +3970,13 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) {
return result;
}
/*
Classical Brzozowski derivative used by the regex_bisim equivalence
procedure. Unlike `mk_antimirov_deriv`, this variant never creates
_OP_RE_ANTIMIROV_UNION nodes it stays in a classical (single regex
tree) form. The bisimulation algorithm relies on this so that each
leaf of D(p XOR q) is a coherent XOR pair (D_v p) XOR (D_v q).
*/
expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
expr_ref result(m());
sort* seq_sort = nullptr, *ele_sort = nullptr;
@ -3920,57 +3988,59 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
unsigned lo = 0, hi = 0;
if (re().is_concat(r, r1, r2)) {
expr_ref is_n = is_nullable(r1);
expr_ref dr1 = mk_derivative(ele, r1);
expr_ref dr1 = mk_derivative_rec(ele, r1);
result = mk_der_concat(dr1, r2);
if (m().is_false(is_n)) {
return result;
}
expr_ref dr2 = mk_derivative(ele, r2);
expr_ref dr2 = mk_derivative_rec(ele, r2);
is_n = re_predicate(is_n, seq_sort);
if (re().is_empty(dr2)) {
//do not concatenate [], it is a deade-end
return result;
}
else {
// Instead of mk_der_union here, we use mk_der_antimirov_union to
// force the two cases to be considered separately and lifted to
// the top level. This avoids blowup in cases where determinization
// is expensive.
return mk_der_antimirov_union(result, mk_der_concat(is_n, dr2));
// Classical Brzozowski union: keep the derivative tree free of
// antimirov-union nodes so the bisimulation procedure sees a
// single regex tree whose leaves are XOR pairs.
return mk_der_union(result, mk_der_concat(is_n, dr2));
}
}
else if (re().is_star(r, r1)) {
return mk_der_concat(mk_derivative(ele, r1), r);
return mk_der_concat(mk_derivative_rec(ele, r1), r);
}
else if (re().is_plus(r, r1)) {
expr_ref star(re().mk_star(r1), m());
return mk_derivative(ele, star);
return mk_derivative_rec(ele, star);
}
else if (re().is_union(r, r1, r2)) {
return mk_der_union(mk_derivative(ele, r1), mk_derivative(ele, r2));
return mk_der_union(mk_derivative_rec(ele, r1), mk_derivative_rec(ele, r2));
}
else if (re().is_intersection(r, r1, r2)) {
return mk_der_inter(mk_derivative(ele, r1), mk_derivative(ele, r2));
return mk_der_inter(mk_derivative_rec(ele, r1), mk_derivative_rec(ele, r2));
}
else if (re().is_diff(r, r1, r2)) {
return mk_der_inter(mk_derivative(ele, r1), mk_der_compl(mk_derivative(ele, r2)));
return mk_der_inter(mk_derivative_rec(ele, r1), mk_der_compl(mk_derivative_rec(ele, r2)));
}
else if (re().is_xor(r, r1, r2)) {
return mk_der_xor(mk_derivative_rec(ele, r1), mk_derivative_rec(ele, r2));
}
else if (m().is_ite(r, p, r1, r2)) {
// there is no BDD normalization here
result = m().mk_ite(p, mk_derivative(ele, r1), mk_derivative(ele, r2));
result = m().mk_ite(p, mk_derivative_rec(ele, r1), mk_derivative_rec(ele, r2));
return result;
}
else if (re().is_opt(r, r1)) {
return mk_derivative(ele, r1);
return mk_derivative_rec(ele, r1);
}
else if (re().is_complement(r, r1)) {
return mk_der_compl(mk_derivative(ele, r1));
return mk_der_compl(mk_derivative_rec(ele, r1));
}
else if (re().is_loop(r, r1, lo)) {
if (lo > 0) {
lo--;
}
result = mk_derivative(ele, r1);
result = mk_derivative_rec(ele, r1);
//do not concatenate with [] (emptyset)
if (re().is_empty(result)) {
return result;
@ -3988,7 +4058,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
if (lo > 0) {
lo--;
}
result = mk_derivative(ele, r1);
result = mk_derivative_rec(ele, r1);
//do not concatenate with [] (emptyset) or handle the rest of the loop if no more iterations remain
if (re().is_empty(result) || hi == 0) {
return result;
@ -4756,6 +4826,91 @@ br_status seq_rewriter::mk_re_diff(expr* a, expr* b, expr_ref& result) {
return BR_REWRITE2;
}
/*
Symmetric difference / XOR of regexes.
LANG(a XOR b) = (LANG(a) \ LANG(b)) U (LANG(b) \ LANG(a))
Equivalence preserving rewrites applied here (paper Section 5):
r XOR r = []
r XOR [] = r
[] XOR r = r
comp(r) XOR comp(s) = r XOR s
r XOR comp(s) = comp(r XOR s)
comp(r) XOR s = comp(r XOR s)
full_seq XOR r = comp(r)
r XOR full_seq = comp(r)
We also normalize the argument order using expression ids so that the
structure is canonical for AC.
*/
br_status seq_rewriter::mk_re_xor0(expr* a, expr* b, expr_ref& result) {
// Reduction-only variant of mk_re_xor for use inside mk_der_op.
// Avoids any transformation that would create a top-level re.xor
// node (e.g. AC normalisation or complement absorption), because
// mk_der_op needs to keep distributing the operation through ITE
// BDDs. Only structural simplifications that produce a non-XOR
// result are applied here.
if (a == b) {
result = re().mk_empty(a->get_sort());
return BR_DONE;
}
if (re().is_empty(a)) {
result = b;
return BR_DONE;
}
if (re().is_empty(b)) {
result = a;
return BR_DONE;
}
return BR_FAILED;
}
br_status seq_rewriter::mk_re_xor(expr* a, expr* b, expr_ref& result) {
if (a == b) {
result = re().mk_empty(a->get_sort());
return BR_DONE;
}
if (re().is_empty(a)) {
result = b;
return BR_DONE;
}
if (re().is_empty(b)) {
result = a;
return BR_DONE;
}
if (re().is_full_seq(a)) {
result = re().mk_complement(b);
return BR_REWRITE1;
}
if (re().is_full_seq(b)) {
result = re().mk_complement(a);
return BR_REWRITE1;
}
expr* ra = nullptr, * rb = nullptr;
bool ca = re().is_complement(a, ra);
bool cb = re().is_complement(b, rb);
if (ca && cb) {
// comp(ra) XOR comp(rb) = ra XOR rb
result = re().mk_xor(ra, rb);
return BR_REWRITE1;
}
if (ca) {
// comp(ra) XOR b = comp(ra XOR b)
result = re().mk_complement(re().mk_xor(ra, b));
return BR_REWRITE2;
}
if (cb) {
// a XOR comp(rb) = comp(a XOR rb)
result = re().mk_complement(re().mk_xor(a, rb));
return BR_REWRITE2;
}
// Normalize order using expression ids (AC normalization).
if (a->get_id() > b->get_id()) {
result = re().mk_xor(b, a);
return BR_DONE;
}
return BR_FAILED;
}
br_status seq_rewriter::mk_re_loop(func_decl* f, unsigned num_args, expr* const* args, expr_ref& result) {
rational n1, n2;
@ -5177,6 +5332,30 @@ br_status seq_rewriter::reduce_re_eq(expr* l, expr* r, expr_ref& result) {
if (re().is_empty(r)) {
return reduce_re_is_empty(l, result);
}
if (l == r) {
result = m().mk_true();
return BR_DONE;
}
/*
* Try the union-find bisimulation procedure for ground regex equality.
* Guarded against re-entry because the bisim may construct equalities
* indirectly. On l_undef the rewriter falls through to the existing
* axiomatisation path.
*/
if (!m_in_bisim && is_ground(l) && is_ground(r)) {
flet<bool> _block(m_in_bisim, true);
seq::regex_bisim bisim(*this);
switch (bisim.are_equivalent(l, r)) {
case l_true:
result = m().mk_true();
return BR_DONE;
case l_false:
result = m().mk_false();
return BR_DONE;
case l_undef:
break;
}
}
return BR_FAILED;
}

View file

@ -135,7 +135,8 @@ class seq_rewriter {
// re2automaton m_re2aut;
op_cache m_op_cache;
expr_ref_vector m_es, m_lhs, m_rhs;
bool m_coalesce_chars;
bool m_coalesce_chars;
bool m_in_bisim { false };
enum length_comparison {
shorter_c,
@ -180,6 +181,7 @@ class seq_rewriter {
expr_ref mk_der_concat(expr* a, expr* b);
expr_ref mk_der_union(expr* a, expr* b);
expr_ref mk_der_inter(expr* a, expr* b);
expr_ref mk_der_xor(expr* a, expr* b);
expr_ref mk_der_compl(expr* a);
expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort);
expr_ref mk_der_antimirov_union(expr* r1, expr* r2);
@ -262,6 +264,8 @@ class seq_rewriter {
br_status mk_re_complement(expr* a, expr_ref& result);
br_status mk_re_star(expr* a, expr_ref& result);
br_status mk_re_diff(expr* a, expr* b, expr_ref& result);
br_status mk_re_xor(expr* a, expr* b, expr_ref& result);
br_status mk_re_xor0(expr* a, expr* b, expr_ref& result);
br_status mk_re_plus(expr* a, expr_ref& result);
br_status mk_re_opt(expr* a, expr_ref& result);
br_status mk_re_power(func_decl* f, expr* a, expr_ref& result);
@ -381,6 +385,18 @@ public:
return result;
}
/*
* Construct r1 XOR r2 applying the structural rewrites in
* mk_re_xor (r XOR r = empty, comp/empty/full normalisation, AC
* ordering). Used by the bisimulation procedure.
*/
expr_ref mk_re_xor_simplified(expr* r1, expr* r2) {
expr_ref result(m());
if (mk_re_xor(r1, r2, result) == BR_FAILED)
result = re().mk_xor(r1, r2);
return result;
}
/**
* check if regular expression is of the form all ++ s ++ all ++ t + u ++ all, where, s, t, u are sequences
*/
@ -410,6 +426,17 @@ public:
*/
expr_ref mk_derivative(expr* r);
/*
Classical (non-antimirov) Brzozowski derivative wrt the canonical
variable v0 = (:var 0). Unlike `mk_derivative` this entry point keeps
the symbolic derivative as a single transition regex (TRegex): boolean
operators are pushed into the ITE leaves rather than lifted to the top
via _OP_RE_ANTIMIROV_UNION. Used by the regex_bisim equivalence
procedure which relies on each leaf of D(p XOR q) being a coherent
XOR pair (D_v p) XOR (D_v q).
*/
expr_ref mk_brz_derivative(expr* r);
// heuristic elimination of element from condition that comes form a derivative.
// special case optimization for conjunctions of equalities, disequalities and ranges.
void elim_condition(expr* elem, expr_ref& cond);

View file

@ -230,6 +230,7 @@ void seq_decl_plugin::init() {
m_sigs[OP_RE_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA);
m_sigs[OP_RE_INTERSECT] = alloc(psig, m, "re.inter", 1, 2, reAreA, reA);
m_sigs[OP_RE_DIFF] = alloc(psig, m, "re.diff", 1, 2, reAreA, reA);
m_sigs[OP_RE_XOR] = alloc(psig, m, "re.xor", 1, 2, reAreA, reA);
m_sigs[OP_RE_LOOP] = alloc(psig, m, "re.loop", 1, 1, &reA, reA);
m_sigs[OP_RE_POWER] = alloc(psig, m, "re.^", 1, 1, &reA, reA);
m_sigs[OP_RE_COMPLEMENT] = alloc(psig, m, "re.comp", 1, 1, &reA, reA);
@ -507,6 +508,7 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p
case OP_RE_CONCAT:
case OP_RE_INTERSECT:
case OP_RE_DIFF:
case OP_RE_XOR:
m_has_re = true;
return mk_left_assoc_fun(k, arity, domain, range, k, k);
@ -1513,6 +1515,13 @@ std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const {
print(out, r2);
out << ")";
}
else if (re.is_xor(e, r1, r2)) {
out << "(";
print(out, r1);
out << ")XOR(";
print(out, r2);
out << ")";
}
else if (re.m.is_ite(e, s, r1, r2)) {
out << (html_encode ? "(&#x1D422;&#x1D41F; " : "(if ");
print(out, s);
@ -1704,6 +1713,10 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return i1.diff(i2);
case OP_RE_XOR:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return i1.xor_(i2);
}
return unknown_info;
}
@ -1829,6 +1842,25 @@ seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) co
return *this;
}
seq_util::rex::info seq_util::rex::info::xor_(seq_util::rex::info const& rhs) const {
if (is_known()) {
if (rhs.is_known()) {
// Null(p XOR q) = Null(p) XOR Null(q)
lbool xor_nullable = l_undef;
if (nullable != l_undef && rhs.nullable != l_undef)
xor_nullable = (nullable == rhs.nullable) ? l_false : l_true;
return info(interpreted & rhs.interpreted,
xor_nullable,
0,
false);
}
else
return rhs;
}
else
return *this;
}
seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) const {
if (is_known()) {
if (i.is_known()) {

View file

@ -68,6 +68,7 @@ enum seq_op_kind {
OP_RE_UNION,
OP_RE_DIFF,
OP_RE_INTERSECT,
OP_RE_XOR,
OP_RE_LOOP,
OP_RE_POWER,
OP_RE_COMPLEMENT,
@ -491,6 +492,7 @@ public:
info disj(info const& rhs) const;
info conj(info const& rhs) const;
info diff(info const& rhs) const;
info xor_(info const& rhs) const;
info orelse(info const& rhs) const;
info loop(unsigned lower, unsigned upper) const;
@ -523,6 +525,7 @@ public:
app* mk_union(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_UNION, r1, r2); }
app* mk_inter(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_INTERSECT, r1, r2); }
app* mk_diff(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_DIFF, r1, r2); }
app* mk_xor(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_XOR, r1, r2); }
app* mk_complement(expr* r) { return m.mk_app(m_fid, OP_RE_COMPLEMENT, r); }
app* mk_star(expr* r) { return m.mk_app(m_fid, OP_RE_STAR, r); }
app* mk_plus(expr* r) { return m.mk_app(m_fid, OP_RE_PLUS, r); }
@ -546,6 +549,7 @@ public:
bool is_union(expr const* n) const { return is_app_of(n, m_fid, OP_RE_UNION); }
bool is_intersection(expr const* n) const { return is_app_of(n, m_fid, OP_RE_INTERSECT); }
bool is_diff(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DIFF); }
bool is_xor(expr const* n) const { return is_app_of(n, m_fid, OP_RE_XOR); }
bool is_complement(expr const* n) const { return is_app_of(n, m_fid, OP_RE_COMPLEMENT); }
bool is_star(expr const* n) const { return is_app_of(n, m_fid, OP_RE_STAR); }
bool is_plus(expr const* n) const { return is_app_of(n, m_fid, OP_RE_PLUS); }
@ -578,6 +582,7 @@ public:
MATCH_BINARY(is_union);
MATCH_BINARY(is_intersection);
MATCH_BINARY(is_diff);
MATCH_BINARY(is_xor);
MATCH_BINARY(is_range);
MATCH_UNARY(is_complement);
MATCH_UNARY(is_star);

View file

@ -511,6 +511,7 @@ namespace sls {
case OP_RE_CONCAT:
case OP_RE_UNION:
case OP_RE_DIFF:
case OP_RE_XOR:
case OP_RE_INTERSECT:
case OP_RE_LOOP:
case OP_RE_POWER:
@ -1294,6 +1295,7 @@ namespace sls {
case OP_RE_CONCAT:
case OP_RE_UNION:
case OP_RE_DIFF:
case OP_RE_XOR:
case OP_RE_INTERSECT:
case OP_RE_LOOP:
case OP_RE_POWER:

View file

@ -21,6 +21,7 @@ Author:
#include "ast/expr_abstract.h"
#include "ast/ast_util.h"
#include "ast/for_each_expr.h"
#include "ast/rewriter/seq_regex_bisim.h"
#include <ast/rewriter/expr_safe_replace.h>
namespace smt {
@ -460,6 +461,23 @@ namespace smt {
if (re().is_empty(r))
//trivially true
return;
// Try the bisimulation procedure on ground regexes first. If it
// returns a definite answer, dispatch the corresponding axiom and
// bypass the symbolic emptiness/derivative closure.
if (is_ground(r1) && is_ground(r2)) {
seq::regex_bisim bisim(seq_rw());
switch (bisim.are_equivalent(r1, r2)) {
case l_true:
STRACE(seq_regex_brief, tout << "bisim:eq ";);
return; // trivially true
case l_false:
STRACE(seq_regex_brief, tout << "bisim:neq ";);
th.add_axiom(~th.mk_eq(r1, r2, false), false_literal);
return;
case l_undef:
break;
}
}
expr_ref emp(re().mk_empty(r->get_sort()), m);
expr_ref f(m.mk_fresh_const("re.char", seq_sort), m);
expr_ref is_empty = sk().mk_is_empty(r, r, f);
@ -478,6 +496,20 @@ namespace smt {
sort* seq_sort = nullptr;
VERIFY(u().is_re(r1, seq_sort));
expr_ref r = symmetric_diff(r1, r2);
if (is_ground(r1) && is_ground(r2)) {
seq::regex_bisim bisim(seq_rw());
switch (bisim.are_equivalent(r1, r2)) {
case l_true:
STRACE(seq_regex_brief, tout << "bisim:eq ";);
th.add_axiom(th.mk_eq(r1, r2, false), false_literal);
return;
case l_false:
STRACE(seq_regex_brief, tout << "bisim:neq ";);
return; // trivially satisfied
case l_undef:
break;
}
}
expr_ref emp(re().mk_empty(r->get_sort()), m);
expr_ref n(m.mk_fresh_const("re.char", seq_sort), m);
expr_ref is_non_empty = sk().mk_is_non_empty(r, r, n);