From e093be8b607d255373402bd8f52554cd0dd504ba Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:38:38 -0700 Subject: [PATCH] seq_rewriter: add missing concat rewrites for nullable/full-seq/star cases (#9782) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `seq_rewriter.cpp` was missing several regex-concat normalizations around `re.all` (`Σ*`), causing avoidable growth and missed simplifications. This update fills the four gaps: nullable absorption, guarded union distribution, intersection suffix elimination, and nested-star collapse. - **Nullable/full-seq absorption (A1)** - Generalizes `Σ*·R → Σ*` and `R·Σ* → Σ*` beyond `Σ*·Σ*`. - Applies when `R` is interpreted, nullable, and has `min_length = 0`. - **Guarded distribution over union (A2)** - Adds `Σ*·(R1 ∪ R2)` distribution when at least one arm is already `Σ*`-headed. - Rebuilds via normalized union so the redundant arm collapses to `Σ*`. - **Intersection + full-seq tail elimination (A3)** - Adds `(R1 ∩ … ∩ Rn)·Σ* → (R1 ∩ … ∩ Rn)` when every intersection leaf already ends in `Σ*`. - **Nested star concat collapse (A4)** - Adds `R*·(R*·X) → R*·X`, covering non-adjacent star patterns not handled by the prior adjacent-only rewrite. ```cpp if (re().is_full_seq(a) && accepts_empty_word(b)) result = a; // A1 if (re().is_full_seq(a) && re().is_union(b, u1, u2) && ...) ... // A2 if (re().is_intersection(a, u1, u2) && re().is_full_seq(b) && ...) result=a; // A3 if (re().is_star(a, a1) && re().is_concat(b, b1, b2) && re().is_star(b1,b3) && a1==b3) result=b; // A4 ``` Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_rewriter.cpp | 58 ++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 26455e2f1..46da49cf2 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -4491,10 +4491,60 @@ br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) { r* ++ r -> r ++ r* */ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { + auto accepts_empty_word = [&](expr* r) { + auto info = re().get_info(r); + return info.interpreted && info.nullable == l_true && info.min_length == 0; + }; + auto starts_with_full_seq = [&](expr* r) { + expr* r1 = nullptr, *r2 = nullptr; + return re().is_full_seq(r) || (re().is_concat(r, r1, r2) && re().is_full_seq(r1)); + }; + auto ends_with_full_seq = [&](expr* r) { + expr* r1 = nullptr, *r2 = nullptr; + while (re().is_concat(r, r1, r2)) + r = r2; + return re().is_full_seq(r); + }; + auto all_inter_arms_end_with_full_seq = [&](expr* r) { + ptr_buffer todo; + todo.push_back(r); + while (!todo.empty()) { + expr* r1 = nullptr, *r2 = nullptr; + expr* t = todo.back(); + todo.pop_back(); + if (re().is_intersection(t, r1, r2)) { + todo.push_back(r1); + todo.push_back(r2); + } + else if (!ends_with_full_seq(t)) { + return false; + } + } + return true; + }; if (re().is_full_seq(a) && re().is_full_seq(b)) { result = a; return BR_DONE; } + if (re().is_full_seq(a) && accepts_empty_word(b)) { + result = a; + return BR_DONE; + } + if (re().is_full_seq(b) && accepts_empty_word(a)) { + result = b; + return BR_DONE; + } + expr* u1 = nullptr, *u2 = nullptr; + if (re().is_full_seq(a) && re().is_union(b, u1, u2) && + (starts_with_full_seq(u1) || starts_with_full_seq(u2))) { + result = mk_regex_union_normalize(mk_regex_concat(a, u1), mk_regex_concat(a, u2)); + return BR_REWRITE2; + } + if (re().is_intersection(a, u1, u2) && re().is_full_seq(b) && + all_inter_arms_end_with_full_seq(a)) { + result = a; + return BR_DONE; + } if (re().is_empty(a)) { result = a; return BR_DONE; @@ -4525,7 +4575,8 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { result = re().mk_to_re(str().mk_concat(a_str, b_str)); return BR_REWRITE2; } - expr* a1 = nullptr, *b1 = nullptr; + expr* a1 = nullptr; + expr* b1 = nullptr; if (re().is_to_re(a, a1) && re().is_to_re(b, b1)) { result = re().mk_to_re(str().mk_concat(a1, b1)); return BR_DONE; @@ -4534,6 +4585,11 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { result = a; return BR_DONE; } + expr* b2 = nullptr, *b3 = nullptr; + if (re().is_star(a, a1) && re().is_concat(b, b1, b2) && re().is_star(b1, b3) && a1 == b3) { + result = b; + return BR_DONE; + } if (re().is_star(a, a1) && a1 == b) { result = re().mk_concat(b, a); return BR_DONE;