From 1089c92ce8bb4ed129ff64167482f42cca027a14 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Fri, 26 Jun 2026 13:07:29 +0300 Subject: [PATCH] Remove harmful prefix-factoring from derivative union normalization The p.x | p.y -> p.(x|y) prefix-factoring rule in add_union_elem is semantically valid but defeats bisimulation/emptiness state dedup: factoring a common nullable-star prefix (e.g. (a|b)*) produces nested S.(...|...) leaves that never stabilise to a bounded state family, so the closure keys ever-larger distinct expressions and explodes. Removing it makes branch derivatives match the classical Brzozowski distributed form, fixing the flat_vs_loop regex-equivalence timeouts (n5..n20 now solve in tens-to-hundreds of ms vs prior 10s+ timeout). Full 1523-file regex-equivalence sweep: OK 1478 -> 1486, BRANCH_WORSE 25 -> 15, no new timeouts, no soundness mismatches. 92/92 unit tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index eac129b6c1..8cda29383b 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -825,15 +825,15 @@ namespace seq { changed = true; break; } - // Prefix factoring: p·x ∪ p·y = p·(x ∪ y). - expr *p1, *x1, *p2, *x2; - if (re().is_concat(e, p1, x1) && re().is_concat(s, p2, x2) && p1 == p2) { - set.set(i, set.back()); - set.pop_back(); - e = mk_deriv_concat(p1, mk_union(x1, x2)); - changed = true; - break; - } + // NB: a `p·x ∪ p·y → p·(x ∪ y)` prefix-factoring rule used to + // live here. It is semantically valid but harmful: factoring a + // common nullable-star prefix (e.g. (a|b)*) produces nested + // `S·(… ∪ …)` leaves that never stabilise to a bounded state + // family, so the bisimulation/emptiness closure keys ever-larger + // distinct expressions and fails to dedup. Leaving the union in + // distributed form keeps each disjunct a "position" state, which + // matches the classical Brzozowski derivative and lets bisim + // close in a bounded number of steps on flat-vs-loop equivalence. } } set.push_back(e);