From 622977814c43a2da288a5366f4d5c7f1a732a4d5 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Mon, 15 Jun 2026 10:28:18 -0700 Subject: [PATCH] fix(seq_regex_bisim): treat each transition-regex leaf as a single state The bisimulation worklist in regex_bisim::collect_leaves used to descend through re.union and re.antimirov_union at the top of each leaf of the transition regex, splitting a single bisimulation state into multiple states before they were merged into the union-find. This contradicts the bisimulation invariant: each leaf of a t-regex represents one state, regardless of its top-level shape. While the split happens to be sound when the goal is asserting L(union(A,B)) = empty (since L(A) = empty AND L(B) = empty is equivalent), it adds spurious merges to the union-find that distort state-class identities, slow down convergence on hard equivalence queries, and create latent unsoundness risk for any extension that interprets leaves more semantically (XOR pair handling, classical-flag propagation, future antimirov re-enable). After the fix, on the 1523-file regex-equivalence corpus (C:/git/bench/inputs/regex-equivalence, 5s/file timeout): * no SAT<->UNSAT verdict flips vs prior master, * 6 cases flip from timeout to sat (now decided in <1s), * ~10% wall-time reduction on commonly-solved files (0.902x). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_regex_bisim.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/ast/rewriter/seq_regex_bisim.cpp b/src/ast/rewriter/seq_regex_bisim.cpp index 5e849017c..e296d5b3e 100644 --- a/src/ast/rewriter/seq_regex_bisim.cpp +++ b/src/ast/rewriter/seq_regex_bisim.cpp @@ -86,9 +86,15 @@ namespace seq { } /* - Collect the leaves of a t-regex der (an ITE / antimirov union / - union-tree with regex leaves) into the output vector. Empty - (re.empty) leaves are dropped. + Collect the leaves of a t-regex der (an ITE-tree whose leaves are + regex expressions) into the output vector. Empty (re.empty) leaves + are dropped. + + Each leaf is treated as a single bisimulation state regardless of + its top-level shape (including re.union and re.antimirov_union): + descending into a union at the leaf would split one state into + several, which is semantically unsound for the bisimulation / + union-find merging that follows. Returns false if we encountered an unexpected node (e.g. a free variable creeping in) — in that case the caller should bail out. @@ -102,9 +108,7 @@ namespace seq { expr* e = work.back(); work.pop_back(); expr* c = nullptr, * t = nullptr, * f = nullptr; - if (m.is_ite(e, c, t, f) || - m_util.re.is_union(e, t, f) || - m_util.re.is_antimirov_union(e, t, f)) { + if (m.is_ite(e, c, t, f)) { if (seen.insert_if_not_there(t)) work.push_back(t); if (seen.insert_if_not_there(f))