mirror of
https://github.com/Z3Prover/z3
synced 2026-06-10 10:57:15 +00:00
seq_rewriter: add missing concat rewrites for nullable/full-seq/star cases (#9782)
`seq_rewriter.cpp` was missing several regex-concat normalizations around `re.all` (`Σ*`), causing avoidable growth and missed simplifications. This update fills the four gaps: nullable absorption, guarded union distribution, intersection suffix elimination, and nested-star collapse. - **Nullable/full-seq absorption (A1)** - Generalizes `Σ*·R → Σ*` and `R·Σ* → Σ*` beyond `Σ*·Σ*`. - Applies when `R` is interpreted, nullable, and has `min_length = 0`. - **Guarded distribution over union (A2)** - Adds `Σ*·(R1 ∪ R2)` distribution when at least one arm is already `Σ*`-headed. - Rebuilds via normalized union so the redundant arm collapses to `Σ*`. - **Intersection + full-seq tail elimination (A3)** - Adds `(R1 ∩ … ∩ Rn)·Σ* → (R1 ∩ … ∩ Rn)` when every intersection leaf already ends in `Σ*`. - **Nested star concat collapse (A4)** - Adds `R*·(R*·X) → R*·X`, covering non-adjacent star patterns not handled by the prior adjacent-only rewrite. ```cpp if (re().is_full_seq(a) && accepts_empty_word(b)) result = a; // A1 if (re().is_full_seq(a) && re().is_union(b, u1, u2) && ...) ... // A2 if (re().is_intersection(a, u1, u2) && re().is_full_seq(b) && ...) result=a; // A3 if (re().is_star(a, a1) && re().is_concat(b, b1, b2) && re().is_star(b1,b3) && a1==b3) result=b; // A4 ``` Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
parent
d415ead6a2
commit
e093be8b60
1 changed files with 57 additions and 1 deletions
|
|
@ -4491,10 +4491,60 @@ br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) {
|
|||
r* ++ r -> r ++ r*
|
||||
*/
|
||||
br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
|
||||
auto accepts_empty_word = [&](expr* r) {
|
||||
auto info = re().get_info(r);
|
||||
return info.interpreted && info.nullable == l_true && info.min_length == 0;
|
||||
};
|
||||
auto starts_with_full_seq = [&](expr* r) {
|
||||
expr* r1 = nullptr, *r2 = nullptr;
|
||||
return re().is_full_seq(r) || (re().is_concat(r, r1, r2) && re().is_full_seq(r1));
|
||||
};
|
||||
auto ends_with_full_seq = [&](expr* r) {
|
||||
expr* r1 = nullptr, *r2 = nullptr;
|
||||
while (re().is_concat(r, r1, r2))
|
||||
r = r2;
|
||||
return re().is_full_seq(r);
|
||||
};
|
||||
auto all_inter_arms_end_with_full_seq = [&](expr* r) {
|
||||
ptr_buffer<expr> todo;
|
||||
todo.push_back(r);
|
||||
while (!todo.empty()) {
|
||||
expr* r1 = nullptr, *r2 = nullptr;
|
||||
expr* t = todo.back();
|
||||
todo.pop_back();
|
||||
if (re().is_intersection(t, r1, r2)) {
|
||||
todo.push_back(r1);
|
||||
todo.push_back(r2);
|
||||
}
|
||||
else if (!ends_with_full_seq(t)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
if (re().is_full_seq(a) && re().is_full_seq(b)) {
|
||||
result = a;
|
||||
return BR_DONE;
|
||||
}
|
||||
if (re().is_full_seq(a) && accepts_empty_word(b)) {
|
||||
result = a;
|
||||
return BR_DONE;
|
||||
}
|
||||
if (re().is_full_seq(b) && accepts_empty_word(a)) {
|
||||
result = b;
|
||||
return BR_DONE;
|
||||
}
|
||||
expr* u1 = nullptr, *u2 = nullptr;
|
||||
if (re().is_full_seq(a) && re().is_union(b, u1, u2) &&
|
||||
(starts_with_full_seq(u1) || starts_with_full_seq(u2))) {
|
||||
result = mk_regex_union_normalize(mk_regex_concat(a, u1), mk_regex_concat(a, u2));
|
||||
return BR_REWRITE2;
|
||||
}
|
||||
if (re().is_intersection(a, u1, u2) && re().is_full_seq(b) &&
|
||||
all_inter_arms_end_with_full_seq(a)) {
|
||||
result = a;
|
||||
return BR_DONE;
|
||||
}
|
||||
if (re().is_empty(a)) {
|
||||
result = a;
|
||||
return BR_DONE;
|
||||
|
|
@ -4525,7 +4575,8 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
|
|||
result = re().mk_to_re(str().mk_concat(a_str, b_str));
|
||||
return BR_REWRITE2;
|
||||
}
|
||||
expr* a1 = nullptr, *b1 = nullptr;
|
||||
expr* a1 = nullptr;
|
||||
expr* b1 = nullptr;
|
||||
if (re().is_to_re(a, a1) && re().is_to_re(b, b1)) {
|
||||
result = re().mk_to_re(str().mk_concat(a1, b1));
|
||||
return BR_DONE;
|
||||
|
|
@ -4534,6 +4585,11 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
|
|||
result = a;
|
||||
return BR_DONE;
|
||||
}
|
||||
expr* b2 = nullptr, *b3 = nullptr;
|
||||
if (re().is_star(a, a1) && re().is_concat(b, b1, b2) && re().is_star(b1, b3) && a1 == b3) {
|
||||
result = b;
|
||||
return BR_DONE;
|
||||
}
|
||||
if (re().is_star(a, a1) && a1 == b) {
|
||||
result = re().mk_concat(b, a);
|
||||
return BR_DONE;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue