3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-26 02:20:58 +00:00

feat(seq::bisim): hoist all ITEs to top in bisim derivative leaves

In the bisimulation equivalence path, derivative leaves are now
extracted via the new seq_rewriter::brz_derivative_cofactors
(derive::derivative_cofactors), which computes the symbolic
derivative and enumerates its reachable leaves in fully ITE-hoisted
normal form: every if-then-else over the input character (including
ones previously buried under a concat or union) is hoisted to the top
via decompose_ite, infeasible minterms are pruned, and unions are kept
intact as single states. Each leaf is therefore a ground regex free of
(:var 0), so its nullability is always decidable.

This replaces collect_leaves (which only split top-level ITEs and left
buried (:var 0) ITEs inside leaves), the root cause of bisim returning
l_undef and falling through to the slow theory solver.

Validation on the regex-equivalence corpus (1523 files, -T:5, 8 workers):
- vs master: total solved 1394 vs 1378 (+16), soft_timeouts 129 vs 145,
  0 soundness disagreements (was 18 -> 5 -> 0).
- vs derive: +242 solved (1394 vs 1152), 25.4% faster on commonly-solved
  files, fixes 18 soundness disagreements, only 6 regressions.
- corpus wall time halved (172s vs 332s/349s).
- All 91 unit tests pass, including seq_regex_bisim.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Margus Veanes 2026-06-25 08:40:32 +03:00
parent a2b73b0ee6
commit ee20c9963b
5 changed files with 52 additions and 42 deletions

View file

@ -1349,5 +1349,15 @@ namespace seq {
get_cofactors_rec(r, result);
}
void derive::derivative_cofactors(expr* r, expr_ref_pair_vector& result) {
// Compute the symbolic derivative wrt the canonical variable
// (:var 0); operator() sets m_ele to that variable.
expr_ref d = (*this)(r);
// Enumerate the reachable, fully ITE-hoisted leaves of the
// transition regex. get_cofactors uses the SAME m_ele set above,
// so the (:var 0) conditions in d are matched and pruned.
get_cofactors(m_ele, d, result);
}
}

View file

@ -204,6 +204,25 @@ namespace seq {
*/
void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result);
/**
* Compute the symbolic derivative of r and enumerate its reachable
* leaves in fully ITE-hoisted normal form.
*
* Concretely this returns, for every feasible minterm (character
* class) of δ(r), a pair (path_condition, target_regex). Every
* if-then-else over the input character (including ones that would
* otherwise be buried under a concat/union) is hoisted to the top
* via the same path/interval pruning used by the derivative engine,
* so each target_regex is free of (:var 0) and its nullability is
* always decidable. Unions are kept intact as single leaves (a
* union leaf denotes a single bisimulation state). Infeasible
* minterms are pruned, so all returned leaves are reachable.
*
* This is the entry point the regex_bisim equivalence procedure
* uses: it consumes the target_regex of each pair and ignores the
* (redundant) path condition.
*/
void derivative_cofactors(expr* r, expr_ref_pair_vector& result);
void set_antimirov(bool flag) {
m_antimirov_derivative = flag;

View file

@ -85,39 +85,6 @@ namespace seq {
return is_ground(r);
}
/*
Collect the leaves of a t-regex der (an ITE / antimirov union /
union-tree with regex leaves) into the output vector. Empty
(re.empty) leaves are dropped.
Returns false if we encountered an unexpected node (e.g. a free
variable creeping in) in that case the caller should bail out.
*/
bool regex_bisim::collect_leaves(expr* der, expr_ref_vector& leaves) {
ptr_vector<expr> work;
obj_hashtable<expr> seen;
work.push_back(der);
seen.insert(der);
while (!work.empty()) {
expr* e = work.back();
work.pop_back();
expr* c = nullptr, * t = nullptr, * f = nullptr;
if (m.is_ite(e, c, t, f)) {
if (seen.insert_if_not_there(t))
work.push_back(t);
if (seen.insert_if_not_there(f))
work.push_back(f);
continue;
}
if (m_util.re.is_empty(e))
continue;
if (!m_util.is_re(e))
return false;
leaves.push_back(e);
}
return true;
}
/*
Fast inequivalence check based on the get_info().classical flag.
@ -226,15 +193,19 @@ namespace seq {
m_worklist.pop_back();
// Compute the symbolic derivative wrt the canonical variable
// (:var 0). The result is a transition regex (ITE tree) whose
// leaves are regex expressions. We use the classical Brzozowski
// entry point so the derivative stays as a single TRegex and
// does not lift unions to the top via antimirov nodes — this
// preserves the XOR-pair invariant the bisimulation relies on.
expr_ref d(m_rw.mk_brz_derivative(r), m);
// (:var 0) and enumerate its reachable leaves in fully
// ITE-hoisted normal form. Every if-then-else over the input
// character — even one that would otherwise be buried under a
// concat or union — is hoisted to the top and infeasible
// minterms are pruned, so each leaf is a ground regex free of
// (:var 0) whose nullability is always decidable. Unions are
// kept intact as single leaves (a union leaf denotes a single
// bisimulation state, never a split into separate states).
expr_ref_pair_vector cofs(m);
m_rw.brz_derivative_cofactors(r, cofs);
expr_ref_vector leaves(m);
if (!collect_leaves(d, leaves))
return l_undef;
for (auto const& p : cofs)
leaves.push_back(p.second);
// First pass: check for any nullable leaf (definitive
// distinguishing empty-continuation word) or any classically

View file

@ -74,7 +74,6 @@ namespace seq {
unsigned node_of(expr* r);
bool merge_leaf(expr* xor_pair);
bool collect_leaves(expr* der, expr_ref_vector& leaves);
lbool nullability(expr* r);
bool is_supported(expr* r);
// Returns true if the leaf l proves that the original pair is

View file

@ -474,6 +474,17 @@ public:
m_derive.get_cofactors(ele, r, result);
}
/*
Compute the symbolic derivative of r and enumerate its reachable leaves
in fully ITE-hoisted normal form: a list of (path_condition, target)
pairs where every target is free of (:var 0) (so nullability is always
decidable) and unions are kept intact as single states. Used by
regex_bisim, which consumes the targets and ignores the path conditions.
*/
void brz_derivative_cofactors(expr* r, expr_ref_pair_vector& result) {
m_derive.derivative_cofactors(r, result);
}
// heuristic elimination of element from condition that comes form a derivative.
// special case optimization for conjunctions of equalities, disequalities and ranges.
void elim_condition(expr* elem, expr_ref& cond);