mirror of
https://github.com/Z3Prover/z3
synced 2026-06-21 08:00:27 +00:00
Smart constructors for regex ranges: emptiness, singleton, range ops, complement
This commit is contained in:
parent
d1d4f311f6
commit
ee164a2705
5 changed files with 269 additions and 6 deletions
|
|
@ -3327,8 +3327,21 @@ expr_ref seq_rewriter::mk_regex_union_normalize(expr* r1, expr* r2) {
|
|||
result = r1;
|
||||
else if (re().is_dot_plus(r2) && re().get_info(r1).min_length > 0)
|
||||
result = r2;
|
||||
else
|
||||
result = merge_regex_sets(r1, r2, re().mk_full_seq(r1->get_sort()), test, compose);
|
||||
else {
|
||||
// Range ∪ Range: [a,b] ∪ [c,d] = [min(a,c), max(b,d)] when overlapping or adjacent
|
||||
unsigned lo1_v = 0, hi1_v = 0, lo2_v = 0, hi2_v = 0;
|
||||
if (re().is_range(r1, lo1_v, hi1_v) && re().is_range(r2, lo2_v, hi2_v) &&
|
||||
lo2_v <= hi1_v + 1 && lo1_v <= hi2_v + 1) {
|
||||
unsigned new_lo = std::min(lo1_v, lo2_v);
|
||||
unsigned new_hi = std::max(hi1_v, hi2_v);
|
||||
if (new_lo == new_hi)
|
||||
result = re().mk_to_re(str().mk_string(zstring(new_lo)));
|
||||
else
|
||||
result = re().mk_range(str().mk_string(zstring(new_lo)), str().mk_string(zstring(new_hi)));
|
||||
}
|
||||
else
|
||||
result = merge_regex_sets(r1, r2, re().mk_full_seq(r1->get_sort()), test, compose);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -3357,8 +3370,22 @@ expr_ref seq_rewriter::mk_regex_inter_normalize(expr* r1, expr* r2) {
|
|||
result = r2;
|
||||
else if (re().is_dot_plus(r2) && re().get_info(r1).min_length > 0)
|
||||
result = r1;
|
||||
else
|
||||
result = merge_regex_sets(r1, r2, re().mk_empty(r1->get_sort()), test, compose);
|
||||
else {
|
||||
// Range ∩ Range: [a,b] ∩ [c,d] = [max(a,c), min(b,d)] or empty
|
||||
unsigned lo1_v = 0, hi1_v = 0, lo2_v = 0, hi2_v = 0;
|
||||
if (re().is_range(r1, lo1_v, hi1_v) && re().is_range(r2, lo2_v, hi2_v)) {
|
||||
unsigned new_lo = std::max(lo1_v, lo2_v);
|
||||
unsigned new_hi = std::min(hi1_v, hi2_v);
|
||||
if (new_lo > new_hi)
|
||||
result = re().mk_empty(r1->get_sort());
|
||||
else if (new_lo == new_hi)
|
||||
result = re().mk_to_re(str().mk_string(zstring(new_lo)));
|
||||
else
|
||||
result = re().mk_range(str().mk_string(zstring(new_lo)), str().mk_string(zstring(new_hi)));
|
||||
}
|
||||
else
|
||||
result = merge_regex_sets(r1, r2, re().mk_empty(r1->get_sort()), test, compose);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -4787,6 +4814,34 @@ br_status seq_rewriter::mk_re_complement(expr* a, expr_ref& result) {
|
|||
result = re().mk_plus(re().mk_full_char(a->get_sort()));
|
||||
return BR_DONE;
|
||||
}
|
||||
// Range complement: comp([a,b]) → [0,a-1] ∪ [b+1,max] (or one half when a=0 or b=max)
|
||||
unsigned lo_v = 0, hi_v = 0;
|
||||
if (re().is_range(a, lo_v, hi_v)) {
|
||||
unsigned max_c = u().max_char();
|
||||
sort* srt = a->get_sort();
|
||||
bool has_left = (lo_v > 0);
|
||||
bool has_right = (hi_v < max_c);
|
||||
if (!has_left && !has_right) {
|
||||
// [0, max_c]: complement is empty
|
||||
result = re().mk_empty(srt);
|
||||
return BR_DONE;
|
||||
}
|
||||
if (!has_left) {
|
||||
// [0, b]: complement is [b+1, max]
|
||||
result = re().mk_range(str().mk_string(zstring(hi_v + 1)), str().mk_string(zstring(max_c)));
|
||||
return BR_REWRITE1;
|
||||
}
|
||||
if (!has_right) {
|
||||
// [a, max]: complement is [0, a-1]
|
||||
result = re().mk_range(str().mk_string(zstring(0u)), str().mk_string(zstring(lo_v - 1)));
|
||||
return BR_REWRITE1;
|
||||
}
|
||||
// General: [a, b] → [0, a-1] ∪ [b+1, max]
|
||||
auto left = re().mk_range(str().mk_string(zstring(0u)), str().mk_string(zstring(lo_v - 1)));
|
||||
auto right = re().mk_range(str().mk_string(zstring(hi_v + 1)), str().mk_string(zstring(max_c)));
|
||||
result = re().mk_union(left, right);
|
||||
return BR_REWRITE1;
|
||||
}
|
||||
return BR_FAILED;
|
||||
}
|
||||
|
||||
|
|
@ -5084,6 +5139,11 @@ br_status seq_rewriter::mk_re_range(expr* lo, expr* hi, expr_ref& result) {
|
|||
result = re().mk_empty(srt);
|
||||
return BR_DONE;
|
||||
}
|
||||
// Singleton: re.range "a" "a" → str.to_re "a"
|
||||
if (slo.length() == 1 && shi.length() == 1 && slo[0] == shi[0]) {
|
||||
result = re().mk_to_re(lo);
|
||||
return BR_DONE;
|
||||
}
|
||||
|
||||
return BR_FAILED;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1671,11 +1671,19 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
|
|||
case OP_RE_OPTION:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
return i1.opt();
|
||||
case OP_RE_RANGE:
|
||||
case OP_RE_RANGE: {
|
||||
// A concrete range [lo, hi] with lo <= hi is non-empty and classical.
|
||||
zstring slo, shi;
|
||||
if (u.str.is_string(e->get_arg(0), slo) && slo.length() == 1 &&
|
||||
u.str.is_string(e->get_arg(1), shi) && shi.length() == 1 &&
|
||||
slo[0] <= shi[0])
|
||||
return info(true, l_false, 1, true);
|
||||
// Symbolic or unknown: not classical
|
||||
return info(true, l_false, 1, false);
|
||||
}
|
||||
case OP_RE_FULL_CHAR_SET:
|
||||
case OP_RE_OF_PRED:
|
||||
//TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
|
||||
//TBD: check if the range is unsat
|
||||
return info(true, l_false, 1, false);
|
||||
case OP_RE_CONCAT:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ add_executable(test-z3
|
|||
api_datalog.cpp
|
||||
parametric_datatype.cpp
|
||||
arith_rewriter.cpp
|
||||
seq_rewriter.cpp
|
||||
arith_simplifier_plugin.cpp
|
||||
ast.cpp
|
||||
bdd.cpp
|
||||
|
|
|
|||
|
|
@ -113,6 +113,7 @@
|
|||
X(api_bug) \
|
||||
X(api_special_relations) \
|
||||
X(arith_rewriter) \
|
||||
X(seq_rewriter) \
|
||||
X(check_assumptions) \
|
||||
X(smt_context) \
|
||||
X(theory_dl) \
|
||||
|
|
|
|||
193
src/test/seq_rewriter.cpp
Normal file
193
src/test/seq_rewriter.cpp
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
/*++
|
||||
Copyright (c) 2024 Microsoft Corporation
|
||||
|
||||
Regression tests for seq_rewriter smart constructors for regex ranges.
|
||||
|
||||
Tests:
|
||||
1. Empty range (lo > hi) → re.none
|
||||
2. Singleton range (lo == hi) → str.to_re lo
|
||||
3. Range ∩ Range → reduced range or re.none
|
||||
4. Range ∪ Range → merged range for overlapping/adjacent
|
||||
5. Complement of range → one or two ranges
|
||||
6. Downstream operators absorb empty ranges correctly
|
||||
--*/
|
||||
|
||||
#include "ast/ast_pp.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/rewriter/th_rewriter.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include <iostream>
|
||||
|
||||
// Build a single-char string literal expression.
|
||||
static expr_ref mk_str(ast_manager& m, seq_util& su, unsigned c) {
|
||||
return expr_ref(su.str.mk_string(zstring(c)), m);
|
||||
}
|
||||
|
||||
void tst_seq_rewriter() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
th_rewriter rw(m);
|
||||
seq_util su(m);
|
||||
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
sort* re_sort = su.re.mk_re(str_sort);
|
||||
|
||||
auto range = [&](unsigned lo, unsigned hi) -> expr_ref {
|
||||
return expr_ref(su.re.mk_range(mk_str(m, su, lo), mk_str(m, su, hi)), m);
|
||||
};
|
||||
|
||||
// Arbitrary regex variable for downstream tests.
|
||||
app_ref R(m.mk_fresh_const("R", re_sort), m);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 1. Empty range (lo > hi) → re.none
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e = range('z', 'a');
|
||||
rw(e);
|
||||
std::cout << "empty range lo>hi: " << mk_pp(e, m) << "\n";
|
||||
ENSURE(su.re.is_empty(e));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 2. Singleton range (lo == hi) → str.to_re lo
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e = range('a', 'a');
|
||||
rw(e);
|
||||
std::cout << "singleton range: " << mk_pp(e, m) << "\n";
|
||||
expr* inner = nullptr;
|
||||
ENSURE(su.re.is_to_re(e, inner));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 3. Range intersection: overlapping → smaller range
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_inter(range('a', 'z'), range('f', 'k')), m);
|
||||
rw(e);
|
||||
std::cout << "range inter overlapping: " << mk_pp(e, m) << "\n";
|
||||
unsigned lo = 0, hi = 0;
|
||||
ENSURE(su.re.is_range(e, lo, hi) && lo == 'f' && hi == 'k');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 4. Range intersection: disjoint → re.none
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_inter(range('a', 'f'), range('k', 'z')), m);
|
||||
rw(e);
|
||||
std::cout << "range inter disjoint: " << mk_pp(e, m) << "\n";
|
||||
ENSURE(su.re.is_empty(e));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 5. Range intersection: touching at boundary → singleton (str.to_re "f")
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_inter(range('a', 'f'), range('f', 'z')), m);
|
||||
rw(e);
|
||||
std::cout << "range inter touching: " << mk_pp(e, m) << "\n";
|
||||
expr* inner = nullptr;
|
||||
ENSURE(su.re.is_to_re(e, inner));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 6. Range union: overlapping → merged range
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_union(range('a', 'f'), range('e', 'k')), m);
|
||||
rw(e);
|
||||
std::cout << "range union overlapping: " << mk_pp(e, m) << "\n";
|
||||
unsigned lo = 0, hi = 0;
|
||||
ENSURE(su.re.is_range(e, lo, hi) && lo == 'a' && hi == 'k');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 7. Range union: adjacent → merged range
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_union(range('a', 'f'), range('g', 'k')), m);
|
||||
rw(e);
|
||||
std::cout << "range union adjacent: " << mk_pp(e, m) << "\n";
|
||||
unsigned lo = 0, hi = 0;
|
||||
ENSURE(su.re.is_range(e, lo, hi) && lo == 'a' && hi == 'k');
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 8. Range union: disjoint → stays as union
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_union(range('a', 'c'), range('m', 'z')), m);
|
||||
rw(e);
|
||||
std::cout << "range union disjoint (stays as union): " << mk_pp(e, m) << "\n";
|
||||
ENSURE(!su.re.is_range(e));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 9. Range complement (general): no longer a complement node
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_complement(range('b', 'y')), m);
|
||||
rw(e);
|
||||
std::cout << "range comp general: " << mk_pp(e, m) << "\n";
|
||||
ENSURE(!su.re.is_complement(e));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 10. Range complement (lo = 0): single range [hi+1, max]
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref lo_str(su.str.mk_string(zstring(0u)), m);
|
||||
expr_ref hi_str(su.str.mk_string(zstring((unsigned)'f')), m);
|
||||
expr_ref e(su.re.mk_complement(su.re.mk_range(lo_str, hi_str)), m);
|
||||
rw(e);
|
||||
std::cout << "range comp lo=min: " << mk_pp(e, m) << "\n";
|
||||
ENSURE(!su.re.is_complement(e));
|
||||
ENSURE(su.re.is_range(e));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 11. Downstream: (re.* (re.range "z" "a")) → str.to_re ""
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_star(range('z', 'a')), m);
|
||||
rw(e);
|
||||
std::cout << "star of empty range: " << mk_pp(e, m) << "\n";
|
||||
expr* inner = nullptr;
|
||||
// star of empty → epsilon (str.to_re "")
|
||||
ENSURE(su.re.is_to_re(e, inner) && su.str.is_empty(inner));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 12. Downstream: concat absorbs empty range → re.none
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_concat(R, su.re.mk_concat(range('z', 'a'), R)), m);
|
||||
rw(e);
|
||||
std::cout << "concat absorbs empty range: " << mk_pp(e, m) << "\n";
|
||||
ENSURE(su.re.is_empty(e));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 13. Downstream: union absorbs empty range → R
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_union(R, range('z', 'a')), m);
|
||||
rw(e);
|
||||
std::cout << "union absorbs empty range: " << mk_pp(e, m) << "\n";
|
||||
ENSURE(e.get() == R.get());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 14. Downstream: inter absorbs empty range → re.none
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_inter(R, range('z', 'a')), m);
|
||||
rw(e);
|
||||
std::cout << "inter absorbs empty range: " << mk_pp(e, m) << "\n";
|
||||
ENSURE(su.re.is_empty(e));
|
||||
}
|
||||
|
||||
std::cout << "tst_seq_rewriter: all tests passed\n";
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue