3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-10 10:57:15 +00:00

seq_rewriter: add missing concat rewrites for nullable/full-seq/star cases (#9782)

`seq_rewriter.cpp` was missing several regex-concat normalizations
around `re.all` (`Σ*`), causing avoidable growth and missed
simplifications. This update fills the four gaps: nullable absorption,
guarded union distribution, intersection suffix elimination, and
nested-star collapse.

- **Nullable/full-seq absorption (A1)**
  - Generalizes `Σ*·R → Σ*` and `R·Σ* → Σ*` beyond `Σ*·Σ*`.
  - Applies when `R` is interpreted, nullable, and has `min_length = 0`.

- **Guarded distribution over union (A2)**
- Adds `Σ*·(R1 ∪ R2)` distribution when at least one arm is already
`Σ*`-headed.
- Rebuilds via normalized union so the redundant arm collapses to `Σ*`.

- **Intersection + full-seq tail elimination (A3)**
- Adds `(R1 ∩ … ∩ Rn)·Σ* → (R1 ∩ … ∩ Rn)` when every intersection leaf
already ends in `Σ*`.

- **Nested star concat collapse (A4)**
- Adds `R*·(R*·X) → R*·X`, covering non-adjacent star patterns not
handled by the prior adjacent-only rewrite.

```cpp
if (re().is_full_seq(a) && accepts_empty_word(b)) result = a;               // A1
if (re().is_full_seq(a) && re().is_union(b, u1, u2) && ...) ...             // A2
if (re().is_intersection(a, u1, u2) && re().is_full_seq(b) && ...) result=a; // A3
if (re().is_star(a, a1) && re().is_concat(b, b1, b2) && re().is_star(b1,b3) && a1==b3) result=b; // A4
```

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
Copilot 2026-06-09 14:38:38 -07:00 committed by GitHub
parent d415ead6a2
commit e093be8b60
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -4491,10 +4491,60 @@ br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) {
r* ++ r -> r ++ r*
*/
br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
auto accepts_empty_word = [&](expr* r) {
auto info = re().get_info(r);
return info.interpreted && info.nullable == l_true && info.min_length == 0;
};
auto starts_with_full_seq = [&](expr* r) {
expr* r1 = nullptr, *r2 = nullptr;
return re().is_full_seq(r) || (re().is_concat(r, r1, r2) && re().is_full_seq(r1));
};
auto ends_with_full_seq = [&](expr* r) {
expr* r1 = nullptr, *r2 = nullptr;
while (re().is_concat(r, r1, r2))
r = r2;
return re().is_full_seq(r);
};
auto all_inter_arms_end_with_full_seq = [&](expr* r) {
ptr_buffer<expr> todo;
todo.push_back(r);
while (!todo.empty()) {
expr* r1 = nullptr, *r2 = nullptr;
expr* t = todo.back();
todo.pop_back();
if (re().is_intersection(t, r1, r2)) {
todo.push_back(r1);
todo.push_back(r2);
}
else if (!ends_with_full_seq(t)) {
return false;
}
}
return true;
};
if (re().is_full_seq(a) && re().is_full_seq(b)) {
result = a;
return BR_DONE;
}
if (re().is_full_seq(a) && accepts_empty_word(b)) {
result = a;
return BR_DONE;
}
if (re().is_full_seq(b) && accepts_empty_word(a)) {
result = b;
return BR_DONE;
}
expr* u1 = nullptr, *u2 = nullptr;
if (re().is_full_seq(a) && re().is_union(b, u1, u2) &&
(starts_with_full_seq(u1) || starts_with_full_seq(u2))) {
result = mk_regex_union_normalize(mk_regex_concat(a, u1), mk_regex_concat(a, u2));
return BR_REWRITE2;
}
if (re().is_intersection(a, u1, u2) && re().is_full_seq(b) &&
all_inter_arms_end_with_full_seq(a)) {
result = a;
return BR_DONE;
}
if (re().is_empty(a)) {
result = a;
return BR_DONE;
@ -4525,7 +4575,8 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
result = re().mk_to_re(str().mk_concat(a_str, b_str));
return BR_REWRITE2;
}
expr* a1 = nullptr, *b1 = nullptr;
expr* a1 = nullptr;
expr* b1 = nullptr;
if (re().is_to_re(a, a1) && re().is_to_re(b, b1)) {
result = re().mk_to_re(str().mk_concat(a1, b1));
return BR_DONE;
@ -4534,6 +4585,11 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
result = a;
return BR_DONE;
}
expr* b2 = nullptr, *b3 = nullptr;
if (re().is_star(a, a1) && re().is_concat(b, b1, b2) && re().is_star(b1, b3) && a1 == b3) {
result = b;
return BR_DONE;
}
if (re().is_star(a, a1) && a1 == b) {
result = re().mk_concat(b, a);
return BR_DONE;