From 3d42d997c6385dac314dc563de5186ca67bd04b1 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 25 Jun 2026 22:56:51 +0300 Subject: [PATCH] perf(seq): collapse adjacent full_seq factors in regex concatenation mk_re_concat only merged Sigma* Sigma* when both stars were direct siblings. Once a literal nests the concatenation, e.g. (Sigma* x Sigma*) ++ Sigma*, the trailing duplicate Sigma* survived because the left operand is a concat that ends with Sigma* rather than a bare full_seq. Redundant adjacent stars then multiply derivative states during bisimulation. Add two grouping-insensitive rules using the existing starts_with_full_seq/ends_with_full_seq helpers: R ++ Sigma* -> R when R ends with Sigma* Sigma* ++ R -> R when R starts with Sigma* Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_rewriter.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 9288897b60..eeedc8f586 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3798,6 +3798,17 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { result = b; return BR_DONE; } + // Collapse adjacent full_seq factors regardless of concat grouping: + // (R ++ Σ*) ++ Σ* → R ++ Σ* (a ends with Σ*, b is Σ*) + // Σ* ++ (Σ* ++ R) → Σ* ++ R (a is Σ*, b starts with Σ*) + if (re().is_full_seq(b) && ends_with_full_seq(a)) { + result = a; + return BR_DONE; + } + if (re().is_full_seq(a) && starts_with_full_seq(b)) { + result = b; + return BR_DONE; + } expr* u1 = nullptr, *u2 = nullptr; if (re().is_full_seq(a) && re().is_union(b, u1, u2) && (starts_with_full_seq(u1) || starts_with_full_seq(u2))) {