diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 039e680814..2f6666a8fc 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -1349,5 +1349,15 @@ namespace seq { get_cofactors_rec(r, result); } + void derive::derivative_cofactors(expr* r, expr_ref_pair_vector& result) { + // Compute the symbolic derivative wrt the canonical variable + // (:var 0); operator() sets m_ele to that variable. + expr_ref d = (*this)(r); + // Enumerate the reachable, fully ITE-hoisted leaves of the + // transition regex. get_cofactors uses the SAME m_ele set above, + // so the (:var 0) conditions in d are matched and pruned. + get_cofactors(m_ele, d, result); + } + } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index c011af70eb..5af528d15f 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -204,6 +204,25 @@ namespace seq { */ void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result); + /** + * Compute the symbolic derivative of r and enumerate its reachable + * leaves in fully ITE-hoisted normal form. + * + * Concretely this returns, for every feasible minterm (character + * class) of δ(r), a pair (path_condition, target_regex). Every + * if-then-else over the input character (including ones that would + * otherwise be buried under a concat/union) is hoisted to the top + * via the same path/interval pruning used by the derivative engine, + * so each target_regex is free of (:var 0) and its nullability is + * always decidable. Unions are kept intact as single leaves (a + * union leaf denotes a single bisimulation state). Infeasible + * minterms are pruned, so all returned leaves are reachable. + * + * This is the entry point the regex_bisim equivalence procedure + * uses: it consumes the target_regex of each pair and ignores the + * (redundant) path condition. + */ + void derivative_cofactors(expr* r, expr_ref_pair_vector& result); void set_antimirov(bool flag) { m_antimirov_derivative = flag; diff --git a/src/ast/rewriter/seq_regex_bisim.cpp b/src/ast/rewriter/seq_regex_bisim.cpp index 9e1a8e4986..68b2907a55 100644 --- a/src/ast/rewriter/seq_regex_bisim.cpp +++ b/src/ast/rewriter/seq_regex_bisim.cpp @@ -85,39 +85,6 @@ namespace seq { return is_ground(r); } - /* - Collect the leaves of a t-regex der (an ITE / antimirov union / - union-tree with regex leaves) into the output vector. Empty - (re.empty) leaves are dropped. - - Returns false if we encountered an unexpected node (e.g. a free - variable creeping in) — in that case the caller should bail out. - */ - bool regex_bisim::collect_leaves(expr* der, expr_ref_vector& leaves) { - ptr_vector work; - obj_hashtable seen; - work.push_back(der); - seen.insert(der); - while (!work.empty()) { - expr* e = work.back(); - work.pop_back(); - expr* c = nullptr, * t = nullptr, * f = nullptr; - if (m.is_ite(e, c, t, f)) { - if (seen.insert_if_not_there(t)) - work.push_back(t); - if (seen.insert_if_not_there(f)) - work.push_back(f); - continue; - } - if (m_util.re.is_empty(e)) - continue; - if (!m_util.is_re(e)) - return false; - leaves.push_back(e); - } - return true; - } - /* Fast inequivalence check based on the get_info().classical flag. @@ -226,15 +193,19 @@ namespace seq { m_worklist.pop_back(); // Compute the symbolic derivative wrt the canonical variable - // (:var 0). The result is a transition regex (ITE tree) whose - // leaves are regex expressions. We use the classical Brzozowski - // entry point so the derivative stays as a single TRegex and - // does not lift unions to the top via antimirov nodes — this - // preserves the XOR-pair invariant the bisimulation relies on. - expr_ref d(m_rw.mk_brz_derivative(r), m); + // (:var 0) and enumerate its reachable leaves in fully + // ITE-hoisted normal form. Every if-then-else over the input + // character — even one that would otherwise be buried under a + // concat or union — is hoisted to the top and infeasible + // minterms are pruned, so each leaf is a ground regex free of + // (:var 0) whose nullability is always decidable. Unions are + // kept intact as single leaves (a union leaf denotes a single + // bisimulation state, never a split into separate states). + expr_ref_pair_vector cofs(m); + m_rw.brz_derivative_cofactors(r, cofs); expr_ref_vector leaves(m); - if (!collect_leaves(d, leaves)) - return l_undef; + for (auto const& p : cofs) + leaves.push_back(p.second); // First pass: check for any nullable leaf (definitive // distinguishing empty-continuation word) or any classically diff --git a/src/ast/rewriter/seq_regex_bisim.h b/src/ast/rewriter/seq_regex_bisim.h index d158cc3793..7ec5c30a33 100644 --- a/src/ast/rewriter/seq_regex_bisim.h +++ b/src/ast/rewriter/seq_regex_bisim.h @@ -74,7 +74,6 @@ namespace seq { unsigned node_of(expr* r); bool merge_leaf(expr* xor_pair); - bool collect_leaves(expr* der, expr_ref_vector& leaves); lbool nullability(expr* r); bool is_supported(expr* r); // Returns true if the leaf l proves that the original pair is diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index a3b5fcd929..39084deeff 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -474,6 +474,17 @@ public: m_derive.get_cofactors(ele, r, result); } + /* + Compute the symbolic derivative of r and enumerate its reachable leaves + in fully ITE-hoisted normal form: a list of (path_condition, target) + pairs where every target is free of (:var 0) (so nullability is always + decidable) and unions are kept intact as single states. Used by + regex_bisim, which consumes the targets and ignores the path conditions. + */ + void brz_derivative_cofactors(expr* r, expr_ref_pair_vector& result) { + m_derive.derivative_cofactors(r, result); + } + // heuristic elimination of element from condition that comes form a derivative. // special case optimization for conjunctions of equalities, disequalities and ranges. void elim_condition(expr* elem, expr_ref& cond);