mirror of
https://github.com/Z3Prover/z3
synced 2026-06-26 02:20:58 +00:00
feat(seq::bisim): hoist all ITEs to top in bisim derivative leaves
In the bisimulation equivalence path, derivative leaves are now extracted via the new seq_rewriter::brz_derivative_cofactors (derive::derivative_cofactors), which computes the symbolic derivative and enumerates its reachable leaves in fully ITE-hoisted normal form: every if-then-else over the input character (including ones previously buried under a concat or union) is hoisted to the top via decompose_ite, infeasible minterms are pruned, and unions are kept intact as single states. Each leaf is therefore a ground regex free of (:var 0), so its nullability is always decidable. This replaces collect_leaves (which only split top-level ITEs and left buried (:var 0) ITEs inside leaves), the root cause of bisim returning l_undef and falling through to the slow theory solver. Validation on the regex-equivalence corpus (1523 files, -T:5, 8 workers): - vs master: total solved 1394 vs 1378 (+16), soft_timeouts 129 vs 145, 0 soundness disagreements (was 18 -> 5 -> 0). - vs derive: +242 solved (1394 vs 1152), 25.4% faster on commonly-solved files, fixes 18 soundness disagreements, only 6 regressions. - corpus wall time halved (172s vs 332s/349s). - All 91 unit tests pass, including seq_regex_bisim. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
a2b73b0ee6
commit
ee20c9963b
5 changed files with 52 additions and 42 deletions
|
|
@ -1349,5 +1349,15 @@ namespace seq {
|
|||
get_cofactors_rec(r, result);
|
||||
}
|
||||
|
||||
void derive::derivative_cofactors(expr* r, expr_ref_pair_vector& result) {
|
||||
// Compute the symbolic derivative wrt the canonical variable
|
||||
// (:var 0); operator() sets m_ele to that variable.
|
||||
expr_ref d = (*this)(r);
|
||||
// Enumerate the reachable, fully ITE-hoisted leaves of the
|
||||
// transition regex. get_cofactors uses the SAME m_ele set above,
|
||||
// so the (:var 0) conditions in d are matched and pruned.
|
||||
get_cofactors(m_ele, d, result);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -204,6 +204,25 @@ namespace seq {
|
|||
*/
|
||||
void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result);
|
||||
|
||||
/**
|
||||
* Compute the symbolic derivative of r and enumerate its reachable
|
||||
* leaves in fully ITE-hoisted normal form.
|
||||
*
|
||||
* Concretely this returns, for every feasible minterm (character
|
||||
* class) of δ(r), a pair (path_condition, target_regex). Every
|
||||
* if-then-else over the input character (including ones that would
|
||||
* otherwise be buried under a concat/union) is hoisted to the top
|
||||
* via the same path/interval pruning used by the derivative engine,
|
||||
* so each target_regex is free of (:var 0) and its nullability is
|
||||
* always decidable. Unions are kept intact as single leaves (a
|
||||
* union leaf denotes a single bisimulation state). Infeasible
|
||||
* minterms are pruned, so all returned leaves are reachable.
|
||||
*
|
||||
* This is the entry point the regex_bisim equivalence procedure
|
||||
* uses: it consumes the target_regex of each pair and ignores the
|
||||
* (redundant) path condition.
|
||||
*/
|
||||
void derivative_cofactors(expr* r, expr_ref_pair_vector& result);
|
||||
|
||||
void set_antimirov(bool flag) {
|
||||
m_antimirov_derivative = flag;
|
||||
|
|
|
|||
|
|
@ -85,39 +85,6 @@ namespace seq {
|
|||
return is_ground(r);
|
||||
}
|
||||
|
||||
/*
|
||||
Collect the leaves of a t-regex der (an ITE / antimirov union /
|
||||
union-tree with regex leaves) into the output vector. Empty
|
||||
(re.empty) leaves are dropped.
|
||||
|
||||
Returns false if we encountered an unexpected node (e.g. a free
|
||||
variable creeping in) — in that case the caller should bail out.
|
||||
*/
|
||||
bool regex_bisim::collect_leaves(expr* der, expr_ref_vector& leaves) {
|
||||
ptr_vector<expr> work;
|
||||
obj_hashtable<expr> seen;
|
||||
work.push_back(der);
|
||||
seen.insert(der);
|
||||
while (!work.empty()) {
|
||||
expr* e = work.back();
|
||||
work.pop_back();
|
||||
expr* c = nullptr, * t = nullptr, * f = nullptr;
|
||||
if (m.is_ite(e, c, t, f)) {
|
||||
if (seen.insert_if_not_there(t))
|
||||
work.push_back(t);
|
||||
if (seen.insert_if_not_there(f))
|
||||
work.push_back(f);
|
||||
continue;
|
||||
}
|
||||
if (m_util.re.is_empty(e))
|
||||
continue;
|
||||
if (!m_util.is_re(e))
|
||||
return false;
|
||||
leaves.push_back(e);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
Fast inequivalence check based on the get_info().classical flag.
|
||||
|
||||
|
|
@ -226,15 +193,19 @@ namespace seq {
|
|||
m_worklist.pop_back();
|
||||
|
||||
// Compute the symbolic derivative wrt the canonical variable
|
||||
// (:var 0). The result is a transition regex (ITE tree) whose
|
||||
// leaves are regex expressions. We use the classical Brzozowski
|
||||
// entry point so the derivative stays as a single TRegex and
|
||||
// does not lift unions to the top via antimirov nodes — this
|
||||
// preserves the XOR-pair invariant the bisimulation relies on.
|
||||
expr_ref d(m_rw.mk_brz_derivative(r), m);
|
||||
// (:var 0) and enumerate its reachable leaves in fully
|
||||
// ITE-hoisted normal form. Every if-then-else over the input
|
||||
// character — even one that would otherwise be buried under a
|
||||
// concat or union — is hoisted to the top and infeasible
|
||||
// minterms are pruned, so each leaf is a ground regex free of
|
||||
// (:var 0) whose nullability is always decidable. Unions are
|
||||
// kept intact as single leaves (a union leaf denotes a single
|
||||
// bisimulation state, never a split into separate states).
|
||||
expr_ref_pair_vector cofs(m);
|
||||
m_rw.brz_derivative_cofactors(r, cofs);
|
||||
expr_ref_vector leaves(m);
|
||||
if (!collect_leaves(d, leaves))
|
||||
return l_undef;
|
||||
for (auto const& p : cofs)
|
||||
leaves.push_back(p.second);
|
||||
|
||||
// First pass: check for any nullable leaf (definitive
|
||||
// distinguishing empty-continuation word) or any classically
|
||||
|
|
|
|||
|
|
@ -74,7 +74,6 @@ namespace seq {
|
|||
|
||||
unsigned node_of(expr* r);
|
||||
bool merge_leaf(expr* xor_pair);
|
||||
bool collect_leaves(expr* der, expr_ref_vector& leaves);
|
||||
lbool nullability(expr* r);
|
||||
bool is_supported(expr* r);
|
||||
// Returns true if the leaf l proves that the original pair is
|
||||
|
|
|
|||
|
|
@ -474,6 +474,17 @@ public:
|
|||
m_derive.get_cofactors(ele, r, result);
|
||||
}
|
||||
|
||||
/*
|
||||
Compute the symbolic derivative of r and enumerate its reachable leaves
|
||||
in fully ITE-hoisted normal form: a list of (path_condition, target)
|
||||
pairs where every target is free of (:var 0) (so nullability is always
|
||||
decidable) and unions are kept intact as single states. Used by
|
||||
regex_bisim, which consumes the targets and ignores the path conditions.
|
||||
*/
|
||||
void brz_derivative_cofactors(expr* r, expr_ref_pair_vector& result) {
|
||||
m_derive.derivative_cofactors(r, result);
|
||||
}
|
||||
|
||||
// heuristic elimination of element from condition that comes form a derivative.
|
||||
// special case optimization for conjunctions of equalities, disequalities and ranges.
|
||||
void elim_condition(expr* elem, expr_ref& cond);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue