mirror of
https://github.com/Z3Prover/z3
synced 2026-07-02 13:26:10 +00:00
feat(seq::bisim): hoist all ITEs to top in bisim derivative leaves
In the bisimulation equivalence path, derivative leaves are now extracted via the new seq_rewriter::brz_derivative_cofactors (derive::derivative_cofactors), which computes the symbolic derivative and enumerates its reachable leaves in fully ITE-hoisted normal form: every if-then-else over the input character (including ones previously buried under a concat or union) is hoisted to the top via decompose_ite, infeasible minterms are pruned, and unions are kept intact as single states. Each leaf is therefore a ground regex free of (:var 0), so its nullability is always decidable. This replaces collect_leaves (which only split top-level ITEs and left buried (:var 0) ITEs inside leaves), the root cause of bisim returning l_undef and falling through to the slow theory solver. Validation on the regex-equivalence corpus (1523 files, -T:5, 8 workers): - vs master: total solved 1394 vs 1378 (+16), soft_timeouts 129 vs 145, 0 soundness disagreements (was 18 -> 5 -> 0). - vs derive: +242 solved (1394 vs 1152), 25.4% faster on commonly-solved files, fixes 18 soundness disagreements, only 6 regressions. - corpus wall time halved (172s vs 332s/349s). - All 91 unit tests pass, including seq_regex_bisim. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
a2b73b0ee6
commit
ee20c9963b
5 changed files with 52 additions and 42 deletions
|
|
@ -1349,5 +1349,15 @@ namespace seq {
|
||||||
get_cofactors_rec(r, result);
|
get_cofactors_rec(r, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void derive::derivative_cofactors(expr* r, expr_ref_pair_vector& result) {
|
||||||
|
// Compute the symbolic derivative wrt the canonical variable
|
||||||
|
// (:var 0); operator() sets m_ele to that variable.
|
||||||
|
expr_ref d = (*this)(r);
|
||||||
|
// Enumerate the reachable, fully ITE-hoisted leaves of the
|
||||||
|
// transition regex. get_cofactors uses the SAME m_ele set above,
|
||||||
|
// so the (:var 0) conditions in d are matched and pruned.
|
||||||
|
get_cofactors(m_ele, d, result);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -204,6 +204,25 @@ namespace seq {
|
||||||
*/
|
*/
|
||||||
void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result);
|
void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the symbolic derivative of r and enumerate its reachable
|
||||||
|
* leaves in fully ITE-hoisted normal form.
|
||||||
|
*
|
||||||
|
* Concretely this returns, for every feasible minterm (character
|
||||||
|
* class) of δ(r), a pair (path_condition, target_regex). Every
|
||||||
|
* if-then-else over the input character (including ones that would
|
||||||
|
* otherwise be buried under a concat/union) is hoisted to the top
|
||||||
|
* via the same path/interval pruning used by the derivative engine,
|
||||||
|
* so each target_regex is free of (:var 0) and its nullability is
|
||||||
|
* always decidable. Unions are kept intact as single leaves (a
|
||||||
|
* union leaf denotes a single bisimulation state). Infeasible
|
||||||
|
* minterms are pruned, so all returned leaves are reachable.
|
||||||
|
*
|
||||||
|
* This is the entry point the regex_bisim equivalence procedure
|
||||||
|
* uses: it consumes the target_regex of each pair and ignores the
|
||||||
|
* (redundant) path condition.
|
||||||
|
*/
|
||||||
|
void derivative_cofactors(expr* r, expr_ref_pair_vector& result);
|
||||||
|
|
||||||
void set_antimirov(bool flag) {
|
void set_antimirov(bool flag) {
|
||||||
m_antimirov_derivative = flag;
|
m_antimirov_derivative = flag;
|
||||||
|
|
|
||||||
|
|
@ -85,39 +85,6 @@ namespace seq {
|
||||||
return is_ground(r);
|
return is_ground(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
Collect the leaves of a t-regex der (an ITE / antimirov union /
|
|
||||||
union-tree with regex leaves) into the output vector. Empty
|
|
||||||
(re.empty) leaves are dropped.
|
|
||||||
|
|
||||||
Returns false if we encountered an unexpected node (e.g. a free
|
|
||||||
variable creeping in) — in that case the caller should bail out.
|
|
||||||
*/
|
|
||||||
bool regex_bisim::collect_leaves(expr* der, expr_ref_vector& leaves) {
|
|
||||||
ptr_vector<expr> work;
|
|
||||||
obj_hashtable<expr> seen;
|
|
||||||
work.push_back(der);
|
|
||||||
seen.insert(der);
|
|
||||||
while (!work.empty()) {
|
|
||||||
expr* e = work.back();
|
|
||||||
work.pop_back();
|
|
||||||
expr* c = nullptr, * t = nullptr, * f = nullptr;
|
|
||||||
if (m.is_ite(e, c, t, f)) {
|
|
||||||
if (seen.insert_if_not_there(t))
|
|
||||||
work.push_back(t);
|
|
||||||
if (seen.insert_if_not_there(f))
|
|
||||||
work.push_back(f);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (m_util.re.is_empty(e))
|
|
||||||
continue;
|
|
||||||
if (!m_util.is_re(e))
|
|
||||||
return false;
|
|
||||||
leaves.push_back(e);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Fast inequivalence check based on the get_info().classical flag.
|
Fast inequivalence check based on the get_info().classical flag.
|
||||||
|
|
||||||
|
|
@ -226,15 +193,19 @@ namespace seq {
|
||||||
m_worklist.pop_back();
|
m_worklist.pop_back();
|
||||||
|
|
||||||
// Compute the symbolic derivative wrt the canonical variable
|
// Compute the symbolic derivative wrt the canonical variable
|
||||||
// (:var 0). The result is a transition regex (ITE tree) whose
|
// (:var 0) and enumerate its reachable leaves in fully
|
||||||
// leaves are regex expressions. We use the classical Brzozowski
|
// ITE-hoisted normal form. Every if-then-else over the input
|
||||||
// entry point so the derivative stays as a single TRegex and
|
// character — even one that would otherwise be buried under a
|
||||||
// does not lift unions to the top via antimirov nodes — this
|
// concat or union — is hoisted to the top and infeasible
|
||||||
// preserves the XOR-pair invariant the bisimulation relies on.
|
// minterms are pruned, so each leaf is a ground regex free of
|
||||||
expr_ref d(m_rw.mk_brz_derivative(r), m);
|
// (:var 0) whose nullability is always decidable. Unions are
|
||||||
|
// kept intact as single leaves (a union leaf denotes a single
|
||||||
|
// bisimulation state, never a split into separate states).
|
||||||
|
expr_ref_pair_vector cofs(m);
|
||||||
|
m_rw.brz_derivative_cofactors(r, cofs);
|
||||||
expr_ref_vector leaves(m);
|
expr_ref_vector leaves(m);
|
||||||
if (!collect_leaves(d, leaves))
|
for (auto const& p : cofs)
|
||||||
return l_undef;
|
leaves.push_back(p.second);
|
||||||
|
|
||||||
// First pass: check for any nullable leaf (definitive
|
// First pass: check for any nullable leaf (definitive
|
||||||
// distinguishing empty-continuation word) or any classically
|
// distinguishing empty-continuation word) or any classically
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,6 @@ namespace seq {
|
||||||
|
|
||||||
unsigned node_of(expr* r);
|
unsigned node_of(expr* r);
|
||||||
bool merge_leaf(expr* xor_pair);
|
bool merge_leaf(expr* xor_pair);
|
||||||
bool collect_leaves(expr* der, expr_ref_vector& leaves);
|
|
||||||
lbool nullability(expr* r);
|
lbool nullability(expr* r);
|
||||||
bool is_supported(expr* r);
|
bool is_supported(expr* r);
|
||||||
// Returns true if the leaf l proves that the original pair is
|
// Returns true if the leaf l proves that the original pair is
|
||||||
|
|
|
||||||
|
|
@ -474,6 +474,17 @@ public:
|
||||||
m_derive.get_cofactors(ele, r, result);
|
m_derive.get_cofactors(ele, r, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Compute the symbolic derivative of r and enumerate its reachable leaves
|
||||||
|
in fully ITE-hoisted normal form: a list of (path_condition, target)
|
||||||
|
pairs where every target is free of (:var 0) (so nullability is always
|
||||||
|
decidable) and unions are kept intact as single states. Used by
|
||||||
|
regex_bisim, which consumes the targets and ignores the path conditions.
|
||||||
|
*/
|
||||||
|
void brz_derivative_cofactors(expr* r, expr_ref_pair_vector& result) {
|
||||||
|
m_derive.derivative_cofactors(r, result);
|
||||||
|
}
|
||||||
|
|
||||||
// heuristic elimination of element from condition that comes form a derivative.
|
// heuristic elimination of element from condition that comes form a derivative.
|
||||||
// special case optimization for conjunctions of equalities, disequalities and ranges.
|
// special case optimization for conjunctions of equalities, disequalities and ranges.
|
||||||
void elim_condition(expr* elem, expr_ref& cond);
|
void elim_condition(expr* elem, expr_ref& cond);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue