diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index ac0261f61..2b9d738c0 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3344,9 +3344,20 @@ expr_ref seq_rewriter::mk_regex_union_normalize(expr* r1, expr* r2) { else if (false && re().is_intersection(r1, a1, a2) && re().is_intersection(r2, b1, b2) && is_complement(a1, b1) && is_complement(a2, b2)) { result = re().mk_xor(a1, re().mk_complement(a2)); - } - else - result = merge_regex_sets(r1, r2, re().mk_full_seq(r1->get_sort()), test, compose); + } + else { + // Range ∪ Range: [a,b] ∪ [c,d] = [min(a,c), max(b,d)] when overlapping or adjacent + unsigned lo1_v = 0, hi1_v = 0, lo2_v = 0, hi2_v = 0; + if (re().is_range(r1, lo1_v, hi1_v) && re().is_range(r2, lo2_v, hi2_v) && + lo2_v <= hi1_v + 1 && lo1_v <= hi2_v + 1) { + unsigned new_lo = std::min(lo1_v, lo2_v); + unsigned new_hi = std::max(hi1_v, hi2_v); + result = re().mk_range(r1->get_sort(), new_lo, new_hi); + } + else + result = merge_regex_sets(r1, r2, re().mk_full_seq(r1->get_sort()), test, compose); + } + return result; } @@ -3375,8 +3386,17 @@ expr_ref seq_rewriter::mk_regex_inter_normalize(expr* r1, expr* r2) { result = r2; else if (re().is_dot_plus(r2) && re().get_info(r1).min_length > 0) result = r1; - else - result = merge_regex_sets(r1, r2, re().mk_empty(r1->get_sort()), test, compose); + else { + // Range ∩ Range: [a,b] ∩ [c,d] = [max(a,c), min(b,d)] or empty + unsigned lo1_v = 0, hi1_v = 0, lo2_v = 0, hi2_v = 0; + if (re().is_range(r1, lo1_v, hi1_v) && re().is_range(r2, lo2_v, hi2_v)) { + unsigned new_lo = std::max(lo1_v, lo2_v); + unsigned new_hi = std::min(hi1_v, hi2_v); + result = re().mk_range(r1->get_sort(), new_lo, new_hi); + } + else + result = merge_regex_sets(r1, r2, re().mk_empty(r1->get_sort()), test, compose); + } return result; } @@ -4805,6 +4825,34 @@ br_status seq_rewriter::mk_re_complement(expr* a, expr_ref& result) { result = re().mk_plus(re().mk_full_char(a->get_sort())); return BR_DONE; } + // Range complement: comp([a,b]) → [0,a-1] ∪ [b+1,max] (or one half when a=0 or b=max) + unsigned lo_v = 0, hi_v = 0; + if (re().is_range(a, lo_v, hi_v)) { + unsigned max_c = u().max_char(); + sort* srt = a->get_sort(); + bool has_left = (lo_v > 0); + bool has_right = (hi_v < max_c); + if (!has_left && !has_right) { + // [0, max_c]: complement is empty + result = re().mk_empty(srt); + return BR_DONE; + } + if (!has_left) { + // [0, b]: complement is [b+1, max] + result = re().mk_range(srt, hi_v + 1, max_c); + return BR_REWRITE1; + } + if (!has_right) { + // [a, max]: complement is [0, a-1] + result = re().mk_range(srt, 0u, lo_v - 1); + return BR_REWRITE1; + } + // General: [a, b] → [0, a-1] ∪ [b+1, max] + auto left = re().mk_range(srt, 0u, lo_v - 1); + auto right = re().mk_range(srt, hi_v + 1, max_c); + result = re().mk_union(left, right); + return BR_REWRITE1; + } return BR_FAILED; } @@ -5102,6 +5150,11 @@ br_status seq_rewriter::mk_re_range(expr* lo, expr* hi, expr_ref& result) { result = re().mk_empty(srt); return BR_DONE; } + // Singleton: re.range "a" "a" → str.to_re "a" + if (slo.length() == 1 && shi.length() == 1 && slo[0] == shi[0]) { + result = re().mk_to_re(lo); + return BR_DONE; + } return BR_FAILED; } diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 59f5d046c..29949a151 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1208,6 +1208,15 @@ app* seq_util::rex::mk_of_pred(expr* p) { return m.mk_app(m_fid, OP_RE_OF_PRED, 0, nullptr, 1, &p); } +app* seq_util::rex::mk_range(sort* re_sort, unsigned lo, unsigned hi) { + if (lo > hi) + return mk_empty(re_sort); + app* lo_str = u.str.mk_string(zstring(lo)); + if (lo == hi) + return mk_to_re(lo_str); + return mk_range(lo_str, u.str.mk_string(zstring(hi))); +} + bool seq_util::rex::is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const { if (is_loop(n)) { app const* a = to_app(n); @@ -1671,11 +1680,19 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const { case OP_RE_OPTION: i1 = get_info_rec(e->get_arg(0)); return i1.opt(); - case OP_RE_RANGE: + case OP_RE_RANGE: { + // A concrete range [lo, hi] with lo <= hi is non-empty and classical. + zstring slo, shi; + if (u.str.is_string(e->get_arg(0), slo) && slo.length() == 1 && + u.str.is_string(e->get_arg(1), shi) && shi.length() == 1 && + slo[0] <= shi[0]) + return info(true, l_false, 1, true); + // Symbolic or unknown: not classical + return info(true, l_false, 1, false); + } case OP_RE_FULL_CHAR_SET: case OP_RE_OF_PRED: //TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat - //TBD: check if the range is unsat return info(true, l_false, 1, false); case OP_RE_CONCAT: i1 = get_info_rec(e->get_arg(0)); diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 75995ef7b..c643d8356 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -521,6 +521,8 @@ public: app* mk_to_re(expr* s) { return m.mk_app(m_fid, OP_SEQ_TO_RE, 1, &s); } app* mk_in_re(expr* s, expr* r) { return m.mk_app(m_fid, OP_SEQ_IN_RE, s, r); } app* mk_range(expr* s1, expr* s2) { return m.mk_app(m_fid, OP_RE_RANGE, s1, s2); } + // Smart constructor: returns re.empty / str.to_re / re.range based on lo vs hi. + app* mk_range(sort* re_sort, unsigned lo, unsigned hi); app* mk_concat(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_CONCAT, r1, r2); } app* mk_union(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_UNION, r1, r2); } app* mk_inter(expr* r1, expr* r2) { return m.mk_app(m_fid, OP_RE_INTERSECT, r1, r2); } diff --git a/src/tactic/arith/lia2card_tactic.cpp b/src/tactic/arith/lia2card_tactic.cpp index c9c675ac0..b45457349 100644 --- a/src/tactic/arith/lia2card_tactic.cpp +++ b/src/tactic/arith/lia2card_tactic.cpp @@ -20,6 +20,7 @@ Notes: #include "ast/ast_ll_pp.h" #include "ast/pb_decl_plugin.h" #include "ast/arith_decl_plugin.h" +#include "ast/recfun_decl_plugin.h" #include "ast/rewriter/rewriter_def.h" #include "ast/rewriter/expr_safe_replace.h" #include "ast/ast_util.h" @@ -185,7 +186,7 @@ public: expr_safe_replace rep(m); tactic_report report("lia2card", *g); - if (recfun::util(m()).has_rec_defs()) { + if (recfun::util(m).has_rec_defs()) { result.push_back(g.get()); return; } diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index ef5902c0c..d84892cd5 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -24,6 +24,7 @@ add_executable(test-z3 api_datalog.cpp parametric_datatype.cpp arith_rewriter.cpp + seq_rewriter.cpp arith_simplifier_plugin.cpp ast.cpp bdd.cpp diff --git a/src/test/main.cpp b/src/test/main.cpp index 4a5239f19..4eb66798f 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -113,6 +113,7 @@ X(api_bug) \ X(api_special_relations) \ X(arith_rewriter) \ + X(seq_rewriter) \ X(check_assumptions) \ X(smt_context) \ X(theory_dl) \ diff --git a/src/test/seq_rewriter.cpp b/src/test/seq_rewriter.cpp new file mode 100644 index 000000000..b95008cde --- /dev/null +++ b/src/test/seq_rewriter.cpp @@ -0,0 +1,193 @@ +/*++ +Copyright (c) 2024 Microsoft Corporation + +Regression tests for seq_rewriter smart constructors for regex ranges. + +Tests: + 1. Empty range (lo > hi) → re.none + 2. Singleton range (lo == hi) → str.to_re lo + 3. Range ∩ Range → reduced range or re.none + 4. Range ∪ Range → merged range for overlapping/adjacent + 5. Complement of range → one or two ranges + 6. Downstream operators absorb empty ranges correctly +--*/ + +#include "ast/ast_pp.h" +#include "ast/reg_decl_plugins.h" +#include "ast/rewriter/th_rewriter.h" +#include "ast/seq_decl_plugin.h" +#include + +// Build a single-char string literal expression. +static expr_ref mk_str(ast_manager& m, seq_util& su, unsigned c) { + return expr_ref(su.str.mk_string(zstring(c)), m); +} + +void tst_seq_rewriter() { + ast_manager m; + reg_decl_plugins(m); + th_rewriter rw(m); + seq_util su(m); + + sort* str_sort = su.str.mk_string_sort(); + sort* re_sort = su.re.mk_re(str_sort); + + auto range = [&](unsigned lo, unsigned hi) -> expr_ref { + return expr_ref(su.re.mk_range(mk_str(m, su, lo), mk_str(m, su, hi)), m); + }; + + // Arbitrary regex variable for downstream tests. + app_ref R(m.mk_fresh_const("R", re_sort), m); + + // ----------------------------------------------------------------------- + // 1. Empty range (lo > hi) → re.none + // ----------------------------------------------------------------------- + { + expr_ref e = range('z', 'a'); + rw(e); + std::cout << "empty range lo>hi: " << mk_pp(e, m) << "\n"; + ENSURE(su.re.is_empty(e)); + } + + // ----------------------------------------------------------------------- + // 2. Singleton range (lo == hi) → str.to_re lo + // ----------------------------------------------------------------------- + { + expr_ref e = range('a', 'a'); + rw(e); + std::cout << "singleton range: " << mk_pp(e, m) << "\n"; + expr* inner = nullptr; + ENSURE(su.re.is_to_re(e, inner)); + } + + // ----------------------------------------------------------------------- + // 3. Range intersection: overlapping → smaller range + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_inter(range('a', 'z'), range('f', 'k')), m); + rw(e); + std::cout << "range inter overlapping: " << mk_pp(e, m) << "\n"; + unsigned lo = 0, hi = 0; + ENSURE(su.re.is_range(e, lo, hi) && lo == 'f' && hi == 'k'); + } + + // ----------------------------------------------------------------------- + // 4. Range intersection: disjoint → re.none + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_inter(range('a', 'f'), range('k', 'z')), m); + rw(e); + std::cout << "range inter disjoint: " << mk_pp(e, m) << "\n"; + ENSURE(su.re.is_empty(e)); + } + + // ----------------------------------------------------------------------- + // 5. Range intersection: touching at boundary → singleton (str.to_re "f") + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_inter(range('a', 'f'), range('f', 'z')), m); + rw(e); + std::cout << "range inter touching: " << mk_pp(e, m) << "\n"; + expr* inner = nullptr; + ENSURE(su.re.is_to_re(e, inner)); + } + + // ----------------------------------------------------------------------- + // 6. Range union: overlapping → merged range + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_union(range('a', 'f'), range('e', 'k')), m); + rw(e); + std::cout << "range union overlapping: " << mk_pp(e, m) << "\n"; + unsigned lo = 0, hi = 0; + ENSURE(su.re.is_range(e, lo, hi) && lo == 'a' && hi == 'k'); + } + + // ----------------------------------------------------------------------- + // 7. Range union: adjacent → merged range + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_union(range('a', 'f'), range('g', 'k')), m); + rw(e); + std::cout << "range union adjacent: " << mk_pp(e, m) << "\n"; + unsigned lo = 0, hi = 0; + ENSURE(su.re.is_range(e, lo, hi) && lo == 'a' && hi == 'k'); + } + + // ----------------------------------------------------------------------- + // 8. Range union: disjoint → stays as union + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_union(range('a', 'c'), range('m', 'z')), m); + rw(e); + std::cout << "range union disjoint (stays as union): " << mk_pp(e, m) << "\n"; + ENSURE(!su.re.is_range(e)); + } + + // ----------------------------------------------------------------------- + // 9. Range complement (general): no longer a complement node + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_complement(range('b', 'y')), m); + rw(e); + std::cout << "range comp general: " << mk_pp(e, m) << "\n"; + ENSURE(!su.re.is_complement(e)); + } + + // ----------------------------------------------------------------------- + // 10. Range complement (lo = 0): single range [hi+1, max] + // ----------------------------------------------------------------------- + { + expr_ref lo_str(su.str.mk_string(zstring(0u)), m); + expr_ref hi_str(su.str.mk_string(zstring((unsigned)'f')), m); + expr_ref e(su.re.mk_complement(su.re.mk_range(lo_str, hi_str)), m); + rw(e); + std::cout << "range comp lo=min: " << mk_pp(e, m) << "\n"; + ENSURE(!su.re.is_complement(e)); + ENSURE(su.re.is_range(e)); + } + + // ----------------------------------------------------------------------- + // 11. Downstream: (re.* (re.range "z" "a")) → str.to_re "" + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_star(range('z', 'a')), m); + rw(e); + std::cout << "star of empty range: " << mk_pp(e, m) << "\n"; + expr* inner = nullptr; + // star of empty → epsilon (str.to_re "") + ENSURE(su.re.is_to_re(e, inner) && su.str.is_empty(inner)); + } + + // ----------------------------------------------------------------------- + // 12. Downstream: concat absorbs empty range → re.none + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_concat(R, su.re.mk_concat(range('z', 'a'), R)), m); + rw(e); + std::cout << "concat absorbs empty range: " << mk_pp(e, m) << "\n"; + ENSURE(su.re.is_empty(e)); + } + + // ----------------------------------------------------------------------- + // 13. Downstream: union absorbs empty range → R + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_union(R, range('z', 'a')), m); + rw(e); + std::cout << "union absorbs empty range: " << mk_pp(e, m) << "\n"; + ENSURE(e.get() == R.get()); + } + + // ----------------------------------------------------------------------- + // 14. Downstream: inter absorbs empty range → re.none + // ----------------------------------------------------------------------- + { + expr_ref e(su.re.mk_inter(R, range('z', 'a')), m); + rw(e); + std::cout << "inter absorbs empty range: " << mk_pp(e, m) << "\n"; + ENSURE(su.re.is_empty(e)); + } + + std::cout << "tst_seq_rewriter: all tests passed\n"; +}