3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 07:06:28 +00:00
z3/src/test/seq_rewriter.cpp
Copilot 8c2a425e4b
Smart constructors for regex ranges: canonical form at construction time (#9814)
Regex range expressions (`re.range`) and Boolean operations over them
were left in unsimplified form, defeating downstream optimisations
(bisimulation classical fast-path, derivative engine) and producing
semantically-empty terms not syntactically equal to `re.none`.

## Changes

### `seq_decl_plugin.h` / `seq_decl_plugin.cpp`

- **`seq_util::rex::mk_range(sort*, unsigned lo, unsigned hi)`** — new
smart constructor that normalises at call time:
  - `lo > hi` → `re.empty`
  - `lo == hi` → `str.to_re` (singleton string)
  - `lo < hi` → `re.range`
- **`mk_info_rec` `OP_RE_RANGE`** — concrete non-empty ranges (both
bounds are single-char literals with `lo ≤ hi`) now return `classical =
true`, enabling the XOR-bisimulation `classical_distinguishing`
fast-path on character-predicate leaves. Symbolic/unknown ranges retain
`classical = false`.

### `seq_rewriter.cpp`

- **`mk_re_range`** — singleton collapse: `(re.range "a" "a")` →
`(str.to_re "a")`
- **`mk_regex_inter_normalize`** — range × range intersection: `[a,b] ∩
[c,d]` → `[max(a,c), min(b,d)]`, or `re.none` (disjoint), or `str.to_re`
(boundary singleton); now delegates to `re().mk_range(sort*, lo, hi)`
- **`mk_regex_union_normalize`** — range × range union for
overlapping/adjacent ranges: `[a,b] ∪ [c,d]` → `[min(a,c), max(b,d)]`;
disjoint ranges fall through to existing `merge_regex_sets`; now
delegates to `re().mk_range(sort*, lo, hi)`
- **`mk_re_complement`** — range complement expands to one or two
concrete ranges instead of an opaque `re.comp` node; now delegates to
`re().mk_range(sort*, lo, hi)`:
  - `comp([0, b])` → `[b+1, max]`
  - `comp([a, max])` → `[0, a-1]`
  - `comp([a, b])` → `[0, a-1] ∪ [b+1, max]`

```
(simplify (re.range "z" "a"))                              ; → re.none
(simplify (re.range "a" "a"))                              ; → (str.to_re "a")
(simplify (re.inter (re.range "a" "z") (re.range "f" "k"))); → (re.range "f" "k")
(simplify (re.union (re.range "a" "f") (re.range "g" "k"))); → (re.range "a" "k")
(simplify (re.comp  (re.range "b" "y")))                   ; → (re.union [0,a] [z,max])
```

### Tests

New `src/test/seq_rewriter.cpp` with 14 cases covering all the above
reductions plus downstream propagation (star/concat/union/inter
absorbing empty ranges).

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2026-06-16 13:58:56 -06:00

193 lines
7.5 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*++
Copyright (c) 2024 Microsoft Corporation
Regression tests for seq_rewriter smart constructors for regex ranges.
Tests:
1. Empty range (lo > hi) → re.none
2. Singleton range (lo == hi) → str.to_re lo
3. Range ∩ Range → reduced range or re.none
4. Range Range → merged range for overlapping/adjacent
5. Complement of range → one or two ranges
6. Downstream operators absorb empty ranges correctly
--*/
#include "ast/ast_pp.h"
#include "ast/reg_decl_plugins.h"
#include "ast/rewriter/th_rewriter.h"
#include "ast/seq_decl_plugin.h"
#include <iostream>
// Build a single-char string literal expression.
static expr_ref mk_str(ast_manager& m, seq_util& su, unsigned c) {
return expr_ref(su.str.mk_string(zstring(c)), m);
}
void tst_seq_rewriter() {
ast_manager m;
reg_decl_plugins(m);
th_rewriter rw(m);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
auto range = [&](unsigned lo, unsigned hi) -> expr_ref {
return expr_ref(su.re.mk_range(mk_str(m, su, lo), mk_str(m, su, hi)), m);
};
// Arbitrary regex variable for downstream tests.
app_ref R(m.mk_fresh_const("R", re_sort), m);
// -----------------------------------------------------------------------
// 1. Empty range (lo > hi) → re.none
// -----------------------------------------------------------------------
{
expr_ref e = range('z', 'a');
rw(e);
std::cout << "empty range lo>hi: " << mk_pp(e, m) << "\n";
ENSURE(su.re.is_empty(e));
}
// -----------------------------------------------------------------------
// 2. Singleton range (lo == hi) → str.to_re lo
// -----------------------------------------------------------------------
{
expr_ref e = range('a', 'a');
rw(e);
std::cout << "singleton range: " << mk_pp(e, m) << "\n";
expr* inner = nullptr;
ENSURE(su.re.is_to_re(e, inner));
}
// -----------------------------------------------------------------------
// 3. Range intersection: overlapping → smaller range
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_inter(range('a', 'z'), range('f', 'k')), m);
rw(e);
std::cout << "range inter overlapping: " << mk_pp(e, m) << "\n";
unsigned lo = 0, hi = 0;
ENSURE(su.re.is_range(e, lo, hi) && lo == 'f' && hi == 'k');
}
// -----------------------------------------------------------------------
// 4. Range intersection: disjoint → re.none
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_inter(range('a', 'f'), range('k', 'z')), m);
rw(e);
std::cout << "range inter disjoint: " << mk_pp(e, m) << "\n";
ENSURE(su.re.is_empty(e));
}
// -----------------------------------------------------------------------
// 5. Range intersection: touching at boundary → singleton (str.to_re "f")
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_inter(range('a', 'f'), range('f', 'z')), m);
rw(e);
std::cout << "range inter touching: " << mk_pp(e, m) << "\n";
expr* inner = nullptr;
ENSURE(su.re.is_to_re(e, inner));
}
// -----------------------------------------------------------------------
// 6. Range union: overlapping → merged range
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_union(range('a', 'f'), range('e', 'k')), m);
rw(e);
std::cout << "range union overlapping: " << mk_pp(e, m) << "\n";
unsigned lo = 0, hi = 0;
ENSURE(su.re.is_range(e, lo, hi) && lo == 'a' && hi == 'k');
}
// -----------------------------------------------------------------------
// 7. Range union: adjacent → merged range
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_union(range('a', 'f'), range('g', 'k')), m);
rw(e);
std::cout << "range union adjacent: " << mk_pp(e, m) << "\n";
unsigned lo = 0, hi = 0;
ENSURE(su.re.is_range(e, lo, hi) && lo == 'a' && hi == 'k');
}
// -----------------------------------------------------------------------
// 8. Range union: disjoint → stays as union
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_union(range('a', 'c'), range('m', 'z')), m);
rw(e);
std::cout << "range union disjoint (stays as union): " << mk_pp(e, m) << "\n";
ENSURE(!su.re.is_range(e));
}
// -----------------------------------------------------------------------
// 9. Range complement (general): no longer a complement node
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_complement(range('b', 'y')), m);
rw(e);
std::cout << "range comp general: " << mk_pp(e, m) << "\n";
ENSURE(!su.re.is_complement(e));
}
// -----------------------------------------------------------------------
// 10. Range complement (lo = 0): single range [hi+1, max]
// -----------------------------------------------------------------------
{
expr_ref lo_str(su.str.mk_string(zstring(0u)), m);
expr_ref hi_str(su.str.mk_string(zstring((unsigned)'f')), m);
expr_ref e(su.re.mk_complement(su.re.mk_range(lo_str, hi_str)), m);
rw(e);
std::cout << "range comp lo=min: " << mk_pp(e, m) << "\n";
ENSURE(!su.re.is_complement(e));
ENSURE(su.re.is_range(e));
}
// -----------------------------------------------------------------------
// 11. Downstream: (re.* (re.range "z" "a")) → str.to_re ""
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_star(range('z', 'a')), m);
rw(e);
std::cout << "star of empty range: " << mk_pp(e, m) << "\n";
expr* inner = nullptr;
// star of empty → epsilon (str.to_re "")
ENSURE(su.re.is_to_re(e, inner) && su.str.is_empty(inner));
}
// -----------------------------------------------------------------------
// 12. Downstream: concat absorbs empty range → re.none
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_concat(R, su.re.mk_concat(range('z', 'a'), R)), m);
rw(e);
std::cout << "concat absorbs empty range: " << mk_pp(e, m) << "\n";
ENSURE(su.re.is_empty(e));
}
// -----------------------------------------------------------------------
// 13. Downstream: union absorbs empty range → R
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_union(R, range('z', 'a')), m);
rw(e);
std::cout << "union absorbs empty range: " << mk_pp(e, m) << "\n";
ENSURE(e.get() == R.get());
}
// -----------------------------------------------------------------------
// 14. Downstream: inter absorbs empty range → re.none
// -----------------------------------------------------------------------
{
expr_ref e(su.re.mk_inter(R, range('z', 'a')), m);
rw(e);
std::cout << "inter absorbs empty range: " << mk_pp(e, m) << "\n";
ENSURE(su.re.is_empty(e));
}
std::cout << "tst_seq_rewriter: all tests passed\n";
}