From be8462cfccbe508c2e690a163423c5016293355b Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Fri, 26 Jun 2026 10:29:26 +0300 Subject: [PATCH] Prune infeasible paths in regex emptiness derivative targets get_derivative_targets collected ITE/union leaves with a naive DFS that ignored the path conditions, so it reached infeasible leaves (e.g. a nested branch requiring elem = 'a' and elem = 'B' simultaneously). Inside inter(L, comp M) an infeasible classical (intersection/complement-free) leaf was then misjudged by re_is_empty as a non-empty residual, so the emptiness check returned l_false and membership kept unrolling -> timeout on regex-inclusion benchmarks such as z3test/regressions/smt2/5721.smt2. Enumerate the reachable leaves with brz_derivative_cofactors instead: it conjoins the ITE-path conditions, prunes infeasible character-range combinations, and re-normalizes each leaf with the path-aware smart constructors so semantically equal states dedup by expr identity. 5721 now solves in ~30ms (was timeout); 5728/5731 unchanged; 92/92 unit tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/smt/seq_regex.cpp | 45 +++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 08852a2a12..878629e530 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -762,36 +762,27 @@ namespace smt { /* - Return a list of all target regexes in the derivative of a regex r, - ignoring the conditions along each path. + Return a list of all reachable target regexes in the derivative of a + regex r. - The derivative construction uses (:var 0) and tries - to eliminate unsat condition paths but it does not perform - full satisfiability checks and it is not guaranteed - that all targets are actually reachable + The derivative is taken wrt (:var 0) and its reachable leaves are + enumerated with the path-aware cofactor engine, which conjoins the + ITE-path conditions and prunes infeasible character-range combinations + (e.g. a nested branch requiring elem = 'a' and elem = 'B'). Each leaf + is re-normalized with the path-aware smart constructors so that + semantically equal states stay syntactically identical (essential for + state dedup in the emptiness closure). + + Without this pruning the naive ITE-tree DFS would reach infeasible + leaves; an infeasible classical (intersection/complement-free) leaf + would then be misjudged as a non-empty residual. */ void seq_regex::get_derivative_targets(expr* r, expr_ref_vector& targets) { - // constructs the derivative wrt (:var 0) - expr_ref d(seq_rw().mk_derivative(r), m); - - // use DFS to collect all the targets (leaf regexes) in d. - expr* _1 = nullptr, * e1 = nullptr, * e2 = nullptr; - obj_hashtable::entry* _2 = nullptr; - vector workset; - workset.push_back(d); - obj_hashtable done; - done.insert(d); - while (workset.size() > 0) { - expr* e = workset.back(); - workset.pop_back(); - if (m.is_ite(e, _1, e1, e2) || re().is_union(e, e1, e2)) { - if (done.insert_if_not_there_core(e1, _2)) - workset.push_back(e1); - if (done.insert_if_not_there_core(e2, _2)) - workset.push_back(e2); - } - else if (!re().is_empty(e)) - targets.push_back(e); + expr_ref_pair_vector cofactors(m); + seq_rw().brz_derivative_cofactors(r, cofactors); + for (auto const& [c, t] : cofactors) { + if (!re().is_empty(t)) + targets.push_back(t); } }