From ab259b683072b45fc06bd273a95ec896f312c341 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sun, 31 May 2026 17:41:34 -0700 Subject: [PATCH 01/32] add depth guard --- src/model/datatype_factory.cpp | 3 +++ src/model/datatype_factory.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/model/datatype_factory.cpp b/src/model/datatype_factory.cpp index b93703acd..fac858ccc 100644 --- a/src/model/datatype_factory.cpp +++ b/src/model/datatype_factory.cpp @@ -139,6 +139,9 @@ expr * datatype_factory::get_almost_fresh_value(sort * s) { expr * datatype_factory::get_fresh_value(sort * s) { if (!m_util.is_datatype(s)) return m_model.get_fresh_value(s); + if (m_fresh_depth >= m_max_fresh_depth) + return get_last_fresh_value(s); + struct depth_guard { unsigned& d; depth_guard(unsigned& d) : d(d) { ++d; } ~depth_guard() { --d; } } _dg(m_fresh_depth); TRACE(datatype, tout << "generating fresh value for: " << s->get_name() << "\n";); auto& [set, values] = get_value_set(s); // Approach 0) diff --git a/src/model/datatype_factory.h b/src/model/datatype_factory.h index b2a6b75d3..2d8f216b4 100644 --- a/src/model/datatype_factory.h +++ b/src/model/datatype_factory.h @@ -24,6 +24,8 @@ Revision History: class datatype_factory : public struct_factory { datatype_util m_util; obj_map m_last_fresh_value; + unsigned m_fresh_depth = 0; + static const unsigned m_max_fresh_depth = 512; expr * get_last_fresh_value(sort * s); expr * get_almost_fresh_value(sort * s); From 1f28fd0e6be81672005bf286d05dbd7a27dbbde9 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 3 Jun 2026 10:36:19 -0700 Subject: [PATCH 02/32] Add seq::derive class for symbolic regex derivatives Implement a new seq::derive class (seq_derive.h/cpp) that computes symbolic derivatives of regular expressions using ITE-trees, based on the RE# approach (Varatalu, Veanes, Ernits - POPL 2025). Key features: - Two-argument operator()(ele, r): computes derivative of regex r w.r.t. element ele (concrete character or de Bruijn variable for symbolic mode) - ACI canonicalization (flatten, stable_sort, dedup) for union/intersection - ITE-tree combinators for binary/unary operations - Info-based nullability with recursive fallback - Complement absorption rules - Depth-bounded recursion to prevent stack overflow Integration with seq_rewriter: - mk_derivative(ele, r) and mk_derivative(r) now delegate to m_derive - Removed dead mk_derivative_rec function - Added ITE hoisting in mk_re_star, mk_re_concat, mk_re_union0, mk_re_inter0, mk_re_complement - Added depth limiting in Antimirov derivative helpers Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitignore | 2 + src/ast/CMakeLists.txt | 1 + src/ast/rewriter/seq_rewriter.cpp | 336 +++++---------- src/ast/rewriter/seq_rewriter.h | 7 +- src/ast/seq_derive.cpp | 679 ++++++++++++++++++++++++++++++ src/ast/seq_derive.h | 130 ++++++ 6 files changed, 921 insertions(+), 234 deletions(-) create mode 100644 src/ast/seq_derive.cpp create mode 100644 src/ast/seq_derive.h diff --git a/.gitignore b/.gitignore index 2d268c988..df4e3266d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ *~ rebase.cmd +reports/ +crashes/ *.pyc *.pyo # Ignore callgrind files diff --git a/src/ast/CMakeLists.txt b/src/ast/CMakeLists.txt index 6a50c3b05..dd1db135c 100644 --- a/src/ast/CMakeLists.txt +++ b/src/ast/CMakeLists.txt @@ -46,6 +46,7 @@ z3_add_component(ast recfun_decl_plugin.cpp reg_decl_plugins.cpp seq_decl_plugin.cpp + seq_derive.cpp shared_occs.cpp special_relations_decl_plugin.cpp static_features.cpp diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 4453c94a7..b6e472b1a 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2914,15 +2914,11 @@ bool seq_rewriter::check_deriv_normal_form(expr* r, int level) { #endif expr_ref seq_rewriter::mk_derivative(expr* r) { - sort* seq_sort = nullptr, * ele_sort = nullptr; - VERIFY(m_util.is_re(r, seq_sort)); - VERIFY(m_util.is_seq(seq_sort, ele_sort)); - expr_ref v(m().mk_var(0, ele_sort), m()); - return mk_antimirov_deriv(v, r, m().mk_true()); + return m_derive(r); } expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - return mk_antimirov_deriv(ele, r, m().mk_true()); + return m_derive(ele, r); } expr_ref seq_rewriter::mk_antimirov_deriv(expr* e, expr* r, expr* path) { @@ -3138,13 +3134,18 @@ expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr* VERIFY(m_util.is_seq(seq_sort, ele_sort)); expr_ref result(m()); expr* c, * a, * b; - if (re().is_empty(d1)) + if (m_re_deriv_depth >= m_max_re_deriv_depth) { + // Depth limit reached: construct intersection without further decomposition + result = mk_regex_inter_normalize(d1, d2); + } + else if (re().is_empty(d1)) result = d1; else if (re().is_empty(d2)) result = d2; else if (m().is_ite(d1, c, a, b)) { expr_ref path_and_c(simplify_path(e, m().mk_and(path, c)), m()); expr_ref path_and_notc(simplify_path(e, m().mk_and(path, m().mk_not(c))), m()); + ++m_re_deriv_depth; if (m().is_false(path_and_c)) result = mk_antimirov_deriv_intersection(e, b, d2, path); else if (m().is_false(path_and_notc)) @@ -3152,22 +3153,32 @@ expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr* else result = m().mk_ite(c, mk_antimirov_deriv_intersection(e, a, d2, path_and_c), mk_antimirov_deriv_intersection(e, b, d2, path_and_notc)); + --m_re_deriv_depth; } - else if (m().is_ite(d2)) + else if (m().is_ite(d2)) { // swap d1 and d2 + ++m_re_deriv_depth; result = mk_antimirov_deriv_intersection(e, d2, d1, path); + --m_re_deriv_depth; + } else if (d1 == d2 || re().is_full_seq(d2)) result = mk_antimirov_deriv_restrict(e, d1, path); else if (re().is_full_seq(d1)) result = mk_antimirov_deriv_restrict(e, d2, path); - else if (re().is_union(d1, a, b)) + else if (re().is_union(d1, a, b)) { // distribute intersection over the union in d1 + ++m_re_deriv_depth; result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, a, d2, path), mk_antimirov_deriv_intersection(e, b, d2, path)); - else if (re().is_union(d2, a, b)) + --m_re_deriv_depth; + } + else if (re().is_union(d2, a, b)) { // distribute intersection over the union in d2 + ++m_re_deriv_depth; result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, d1, a, path), mk_antimirov_deriv_intersection(e, d1, b, path)); + --m_re_deriv_depth; + } else result = mk_regex_inter_normalize(d1, d2); return result; @@ -3177,13 +3188,22 @@ expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) { expr_ref result(m()); expr_ref _r(r, m()), _d(d, m()); expr* c, * t, * e; - if (m().is_ite(d, c, t, e)) { + if (m_re_deriv_depth >= m_max_re_deriv_depth) { + // Depth limit reached: construct concat without further decomposition + result = mk_re_append(d, r); + } + else if (m().is_ite(d, c, t, e)) { + ++m_re_deriv_depth; auto r2 = mk_antimirov_deriv_concat(e, r); auto r1 = mk_antimirov_deriv_concat(t, r); + --m_re_deriv_depth; result = m().mk_ite(c, r1, r2); } - else if (re().is_union(d, t, e)) + else if (re().is_union(d, t, e)) { + ++m_re_deriv_depth; result = mk_antimirov_deriv_union(mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r)); + --m_re_deriv_depth; + } else result = mk_re_append(d, r); SASSERT(result.get()); @@ -3199,7 +3219,11 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) { auto dotplus = [&]() { return expr_ref(re().mk_plus(re().mk_full_char(d->get_sort())), m()); }; expr_ref result(m()); expr* c, * t, * e; - if (re().is_empty(d)) + if (m_re_deriv_depth >= m_max_re_deriv_depth) { + // Depth limit reached: construct complement without further decomposition + result = re().mk_complement(d); + } + else if (re().is_empty(d)) result = dotstar(); else if (re().is_epsilon(d)) result = dotplus(); @@ -3207,12 +3231,21 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) { result = nothing(); else if (re().is_dot_plus(d)) result = epsilon(); - else if (m().is_ite(d, c, t, e)) + else if (m().is_ite(d, c, t, e)) { + ++m_re_deriv_depth; result = m().mk_ite(c, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); - else if (re().is_union(d, t, e)) + --m_re_deriv_depth; + } + else if (re().is_union(d, t, e)) { + ++m_re_deriv_depth; result = mk_antimirov_deriv_intersection(elem, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e), m().mk_true()); - else if (re().is_intersection(d, t, e)) + --m_re_deriv_depth; + } + else if (re().is_intersection(d, t, e)) { + ++m_re_deriv_depth; result = mk_antimirov_deriv_union(mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); + --m_re_deriv_depth; + } else if (re().is_complement(d, t)) result = t; else @@ -3253,15 +3286,21 @@ expr_ref seq_rewriter::mk_antimirov_deriv_restrict(expr* e, expr* d, expr* cond) result = re().mk_empty(d->get_sort()); else if (re().is_empty(d) || m().is_true(cond)) result = d; + else if (m_re_deriv_depth >= m_max_re_deriv_depth) + result = d; else if (m().is_ite(d, c, a, b)) { expr_ref path_and_c(simplify_path(e, m().mk_and(cond, c)), m()); expr_ref path_and_notc(simplify_path(e, m().mk_and(cond, m().mk_not(c))), m()); + ++m_re_deriv_depth; result = re().mk_ite_simplify(c, mk_antimirov_deriv_restrict(e, a, path_and_c), mk_antimirov_deriv_restrict(e, b, path_and_notc)); + --m_re_deriv_depth; } else if (re().is_union(d, a, b)) { + ++m_re_deriv_depth; expr_ref a1(mk_antimirov_deriv_restrict(e, a, cond), m()); expr_ref b1(mk_antimirov_deriv_restrict(e, b, cond), m()); + --m_re_deriv_depth; result = mk_antimirov_deriv_union(a1, b1); } return result; @@ -3909,221 +3948,6 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) { return result; } -expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { - expr_ref result(m()); - sort* seq_sort = nullptr, *ele_sort = nullptr; - VERIFY(m_util.is_re(r, seq_sort)); - VERIFY(m_util.is_seq(seq_sort, ele_sort)); - SASSERT(ele_sort == ele->get_sort()); - expr* r1 = nullptr, *r2 = nullptr, *p = nullptr; - auto mk_empty = [&]() { return expr_ref(re().mk_empty(r->get_sort()), m()); }; - unsigned lo = 0, hi = 0; - if (re().is_concat(r, r1, r2)) { - expr_ref is_n = is_nullable(r1); - expr_ref dr1 = mk_derivative(ele, r1); - result = mk_der_concat(dr1, r2); - if (m().is_false(is_n)) { - return result; - } - expr_ref dr2 = mk_derivative(ele, r2); - is_n = re_predicate(is_n, seq_sort); - if (re().is_empty(dr2)) { - //do not concatenate [], it is a deade-end - return result; - } - else { - // Instead of mk_der_union here, we use mk_der_antimirov_union to - // force the two cases to be considered separately and lifted to - // the top level. This avoids blowup in cases where determinization - // is expensive. - return mk_der_antimirov_union(result, mk_der_concat(is_n, dr2)); - } - } - else if (re().is_star(r, r1)) { - return mk_der_concat(mk_derivative(ele, r1), r); - } - else if (re().is_plus(r, r1)) { - expr_ref star(re().mk_star(r1), m()); - return mk_derivative(ele, star); - } - else if (re().is_union(r, r1, r2)) { - return mk_der_union(mk_derivative(ele, r1), mk_derivative(ele, r2)); - } - else if (re().is_intersection(r, r1, r2)) { - return mk_der_inter(mk_derivative(ele, r1), mk_derivative(ele, r2)); - } - else if (re().is_diff(r, r1, r2)) { - return mk_der_inter(mk_derivative(ele, r1), mk_der_compl(mk_derivative(ele, r2))); - } - else if (m().is_ite(r, p, r1, r2)) { - // there is no BDD normalization here - result = m().mk_ite(p, mk_derivative(ele, r1), mk_derivative(ele, r2)); - return result; - } - else if (re().is_opt(r, r1)) { - return mk_derivative(ele, r1); - } - else if (re().is_complement(r, r1)) { - return mk_der_compl(mk_derivative(ele, r1)); - } - else if (re().is_loop(r, r1, lo)) { - if (lo > 0) { - lo--; - } - result = mk_derivative(ele, r1); - //do not concatenate with [] (emptyset) - if (re().is_empty(result)) { - return result; - } - else { - //do not create loop r1{0,}, instead create r1* - return mk_der_concat(result, (lo == 0 ? re().mk_star(r1) : re().mk_loop(r1, lo))); - } - } - else if (re().is_loop(r, r1, lo, hi)) { - if (hi == 0) { - return mk_empty(); - } - hi--; - if (lo > 0) { - lo--; - } - result = mk_derivative(ele, r1); - //do not concatenate with [] (emptyset) or handle the rest of the loop if no more iterations remain - if (re().is_empty(result) || hi == 0) { - return result; - } - else { - return mk_der_concat(result, re().mk_loop_proper(r1, lo, hi)); - } - } - else if (re().is_full_seq(r) || - re().is_empty(r)) { - return expr_ref(r, m()); - } - else if (re().is_to_re(r, r1)) { - // r1 is a string here (not a regexp) - expr_ref hd(m()), tl(m()); - if (get_head_tail(r1, hd, tl)) { - // head must be equal; if so, derivative is tail - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv to_re" << std::endl;); - result = m().mk_eq(ele, hd); - result = mk_der_cond(result, ele, seq_sort); - expr_ref r1(re().mk_to_re(tl), m()); - result = mk_der_concat(result, r1); - return result; - } - else if (str().is_empty(r1)) { - //observe: str().is_empty(r1) checks that r = () = epsilon - //while mk_empty() = [], because deriv(epsilon) = [] = nothing - return mk_empty(); - } - else if (str().is_itos(r1)) { - // - // here r1 = (str.from_int r2) and r2 is non-ground - // or else the expression would have been simplified earlier - // so r1 must be nonempty and must consists of decimal digits - // '0' <= elem <= '9' - // if ((isdigit ele) and (ele = (hd r1))) then (to_re (tl r1)) else [] - // - hd = mk_seq_first(r1); - // isolate nested conjunction for deterministic evaluation - auto a0 = u().mk_le(m_util.mk_char('0'), ele); - auto a1 = u().mk_le(ele, m_util.mk_char('9')); - auto a2 = m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))); - auto a3 = m().mk_eq(hd, ele); - auto inner = m().mk_and(a2, a3); - m_br.mk_and(a0, a1, inner, result); - tl = re().mk_to_re(mk_seq_rest(r1)); - return re_and(result, tl); - } - else { - // recall: [] denotes the empty language (nothing) regex, () denotes epsilon or empty sequence - // construct the term (if (r1 != () and (ele = (first r1)) then (to_re (rest r1)) else [])) - hd = mk_seq_first(r1); - m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result); - tl = re().mk_to_re(mk_seq_rest(r1)); - return re_and(result, tl); - } - } - else if (re().is_reverse(r, r1)) { - if (re().is_to_re(r1, r2)) { - // First try to extract hd and tl such that r = hd ++ tl and |tl|=1 - expr_ref hd(m()), tl(m()); - if (get_head_tail_reversed(r2, hd, tl)) { - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv reverse to_re" << std::endl;); - result = m().mk_eq(ele, tl); - result = mk_der_cond(result, ele, seq_sort); - result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd))); - return result; - } - else if (str().is_empty(r2)) { - return mk_empty(); - } - else { - // construct the term (if (r2 != () and (ele = (last r2)) then reverse(to_re (butlast r2)) else [])) - // hd = first of reverse(r2) i.e. last of r2 - // tl = rest of reverse(r2) i.e. butlast of r2 - //hd = str().mk_nth_i(r2, m_autil.mk_sub(str().mk_length(r2), one())); - hd = mk_seq_last(r2); - // factor nested constructor calls to enforce deterministic argument evaluation order - auto a_non_empty = m().mk_not(m().mk_eq(r2, str().mk_empty(seq_sort))); - auto a_eq = m().mk_eq(hd, ele); - m_br.mk_and(a_non_empty, a_eq, result); - tl = re().mk_to_re(mk_seq_butlast(r2)); - return re_and(result, re().mk_reverse(tl)); - } - } - } - else if (re().is_range(r, r1, r2)) { - // r1, r2 are sequences. - zstring s1, s2; - if (str().is_string(r1, s1) && str().is_string(r2, s2)) { - if (s1.length() == 1 && s2.length() == 1) { - expr_ref ch1(m_util.mk_char(s1[0]), m()); - expr_ref ch2(m_util.mk_char(s2[0]), m()); - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv range zstring" << std::endl;); - expr_ref p1(u().mk_le(ch1, ele), m()); - p1 = mk_der_cond(p1, ele, seq_sort); - expr_ref p2(u().mk_le(ele, ch2), m()); - p2 = mk_der_cond(p2, ele, seq_sort); - result = mk_der_inter(p1, p2); - return result; - } - else { - return mk_empty(); - } - } - expr* e1 = nullptr, * e2 = nullptr; - if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) { - SASSERT(u().is_char(e1)); - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv range str" << std::endl;); - expr_ref p1(u().mk_le(e1, ele), m()); - p1 = mk_der_cond(p1, ele, seq_sort); - expr_ref p2(u().mk_le(ele, e2), m()); - p2 = mk_der_cond(p2, ele, seq_sort); - result = mk_der_inter(p1, p2); - return result; - } - } - else if (re().is_full_char(r)) { - return expr_ref(re().mk_to_re(str().mk_empty(seq_sort)), m()); - } - else if (re().is_of_pred(r, p)) { - array_util array(m()); - expr* args[2] = { p, ele }; - result = array.mk_select(2, args); - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv of_pred" << std::endl;); - return mk_der_cond(result, ele, seq_sort); - } - // stuck cases: re.derivative, re variable, - return expr_ref(re().mk_derivative(ele, r), m()); -} /************************************************* ***** End Derivative Code ***** @@ -4571,6 +4395,16 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { } std::swap(a, b); } + // Hoist ite out of concat: concat(ite(c, r1, r2), b) → ite(c, concat(r1, b), concat(r2, b)) + expr* c = nullptr; + if (m().is_ite(a, c, a1, b1)) { + result = m().mk_ite(c, re().mk_concat(a1, b), re().mk_concat(b1, b)); + return BR_REWRITE3; + } + if (m().is_ite(b, c, a1, b1)) { + result = m().mk_ite(c, re().mk_concat(a, a1), re().mk_concat(a, b1)); + return BR_REWRITE3; + } return BR_FAILED; } @@ -4663,6 +4497,21 @@ br_status seq_rewriter::mk_re_union0(expr* a, expr* b, expr_ref& result) { result = b; return BR_DONE; } + // r ∪ ~r → Σ* (complement absorption) + if (are_complements(a, b)) { + result = re().mk_full_seq(a->get_sort()); + return BR_DONE; + } + // Hoist ite out of union: union(ite(c, r1, r2), b) → ite(c, union(r1, b), union(r2, b)) + expr *c = nullptr, *r1 = nullptr, *r2 = nullptr; + if (m().is_ite(a, c, r1, r2)) { + result = m().mk_ite(c, re().mk_union(r1, b), re().mk_union(r2, b)); + return BR_REWRITE3; + } + if (m().is_ite(b, c, r1, r2)) { + result = m().mk_ite(c, re().mk_union(a, r1), re().mk_union(a, r2)); + return BR_REWRITE3; + } return BR_FAILED; } @@ -4705,6 +4554,12 @@ br_status seq_rewriter::mk_re_complement(expr* a, expr_ref& result) { result = re().mk_plus(re().mk_full_char(a->get_sort())); return BR_DONE; } + // Hoist ite out of complement: ~(ite(c, r1, r2)) → ite(c, ~r1, ~r2) + expr* c = nullptr; + if (m().is_ite(a, c, e1, e2)) { + result = m().mk_ite(c, re().mk_complement(e1), re().mk_complement(e2)); + return BR_REWRITE3; + } return BR_FAILED; } @@ -4730,6 +4585,21 @@ br_status seq_rewriter::mk_re_inter0(expr* a, expr* b, expr_ref& result) { result = a; return BR_DONE; } + // r ∩ ~r → ∅ (complement absorption) + if (are_complements(a, b)) { + result = re().mk_empty(a->get_sort()); + return BR_DONE; + } + // Hoist ite out of intersection: inter(ite(c, r1, r2), b) → ite(c, inter(r1, b), inter(r2, b)) + expr *c = nullptr, *r1 = nullptr, *r2 = nullptr; + if (m().is_ite(a, c, r1, r2)) { + result = m().mk_ite(c, re().mk_inter(r1, b), re().mk_inter(r2, b)); + return BR_REWRITE3; + } + if (m().is_ite(b, c, r1, r2)) { + result = m().mk_ite(c, re().mk_inter(a, r1), re().mk_inter(a, r2)); + return BR_REWRITE3; + } return BR_FAILED; } @@ -4884,7 +4754,9 @@ br_status seq_rewriter::mk_re_star(expr* a, expr_ref& result) { result = re().mk_full_seq(b1->get_sort()); return BR_REWRITE2; } - + // Hoist ite out of star: (ite c r1 r2)* → ite(c, r1*, r2*) + result = m().mk_ite(c, re().mk_star(b1), re().mk_star(c1)); + return BR_REWRITE3; } return BR_FAILED; } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 583720911..9668c299c 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -19,6 +19,7 @@ Notes: #pragma once #include "ast/seq_decl_plugin.h" +#include "ast/seq_derive.h" #include "ast/ast_pp.h" #include "ast/arith_decl_plugin.h" #include "ast/rewriter/rewriter_types.h" @@ -130,10 +131,13 @@ class seq_rewriter { seq_util m_util; arith_util m_autil; bool_rewriter m_br; + seq::derive m_derive; // re2automaton m_re2aut; op_cache m_op_cache; expr_ref_vector m_es, m_lhs, m_rhs; bool m_coalesce_chars; + unsigned m_re_deriv_depth { 0 }; + static const unsigned m_max_re_deriv_depth = 512; enum length_comparison { shorter_c, @@ -172,7 +176,6 @@ class seq_rewriter { // Calculate derivative, memoized and enforcing a normal form expr_ref is_nullable_rec(expr* r); - expr_ref mk_derivative_rec(expr* ele, expr* r); expr_ref mk_der_op(decl_kind k, expr* a, expr* b); expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b); expr_ref mk_der_concat(expr* a, expr* b); @@ -340,7 +343,7 @@ class seq_rewriter { public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): - m_util(m), m_autil(m), m_br(m, p), // m_re2aut(m), + m_util(m), m_autil(m), m_br(m, p), m_derive(m), // m_re2aut(m), m_op_cache(m), m_es(m), m_lhs(m), m_rhs(m), m_coalesce_chars(true) { } diff --git a/src/ast/seq_derive.cpp b/src/ast/seq_derive.cpp new file mode 100644 index 000000000..96ec04059 --- /dev/null +++ b/src/ast/seq_derive.cpp @@ -0,0 +1,679 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + seq_derive.cpp + +Abstract: + + Symbolic derivative computation for regular expressions. + Produces an ITE-tree (transition regex) representation following + the approach of RE# (Varatalu, Veanes, Ernits - POPL 2025). + + The symbolic derivative δ(r) maps each character to the resulting + derivative state via an ITE-tree. The free variable (:var 0) represents + the input character. + +Authors: + + Nikolaj Bjorner (nbjorner) 2025-06-03 + +--*/ + +#include "ast/seq_derive.h" +#include "ast/ast_pp.h" +#include "ast/array_decl_plugin.h" +#include "ast/rewriter/bool_rewriter.h" +#include + +namespace seq { + + derive::derive(ast_manager& m) : + m(m), + m_util(m), + m_autil(m), + m_br(m), + m_trail(m), + m_ele(m) { + m_br.set_flat_and_or(false); + } + + void derive::reset() { + m_cache.reset(); + m_trail.reset(); + } + + expr_ref derive::operator()(expr* ele, expr* r) { + SASSERT(m_util.is_re(r)); + m_ele = ele; + m_depth = 0; + expr_ref result = derive_rec(r); + m_ele = nullptr; + return result; + } + + expr_ref derive::operator()(expr* r) { + SASSERT(m_util.is_re(r)); + sort* seq_sort = nullptr, * ele_sort = nullptr; + VERIFY(m_util.is_re(r, seq_sort)); + VERIFY(m_util.is_seq(seq_sort, ele_sort)); + expr_ref v(m.mk_var(0, ele_sort), m); + return (*this)(v, r); + } + + // ------------------------------------------------------- + // Core derivative computation + // ------------------------------------------------------- + + expr_ref derive::derive_rec(expr* r) { + SASSERT(m_util.is_re(r)); + + // Check cache + expr* cached = nullptr; + if (m_cache.find(r, cached)) + return expr_ref(cached, m); + + // Depth check + if (m_depth >= m_max_depth) { + // Return stuck derivative (the derivative operator applied symbolically) + return expr_ref(re().mk_derivative(m_ele, r), m); + } + + ++m_depth; + expr_ref result = derive_core(r); + --m_depth; + + // Cache the result + m_cache.insert(r, result); + m_trail.push_back(r); + m_trail.push_back(result); + return result; + } + + // Forward declaration helper + expr_ref derive::derive_core(expr* r) { + sort* s = nullptr; + VERIFY(m_util.is_re(r, s)); + + auto nothing = [&]() { return expr_ref(re().mk_empty(r->get_sort()), m); }; + auto epsilon = [&]() { return expr_ref(re().mk_to_re(u().str.mk_empty(s)), m); }; + auto dotstar = [&]() { return expr_ref(re().mk_full_seq(r->get_sort()), m); }; + + expr* r1 = nullptr; + expr* r2 = nullptr; + expr* cond = nullptr; + unsigned lo = 0, hi = 0; + + // δ(∅) = ∅, δ(ε) = ∅ + if (re().is_empty(r) || re().is_epsilon(r)) + return nothing(); + + // δ(Σ*) = Σ*, δ(.+) = Σ* + if (re().is_full_seq(r) || re().is_dot_plus(r)) + return dotstar(); + + // δ(.) = ε (full char accepts any single character) + if (re().is_full_char(r)) + return epsilon(); + + // δ(str.to_re(s)) - derivative of a literal string + if (re().is_to_re(r, r1)) + return derive_to_re(r1, s); + + // δ(re.range(lo, hi)) - character range + if (re().is_range(r, r1, r2)) + return derive_range(r1, r2, s); + + // δ(re.of_pred(p)) - predicate-based regex + if (re().is_of_pred(r, r1)) + return derive_of_pred(r1, s); + + // δ(r1 · r2) = δ(r1) · r2 ∪ (if nullable(r1) then δ(r2) else ∅) + if (re().is_concat(r, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d1_r2 = mk_deriv_concat(d1, r2); + expr_ref nullable_r1 = is_nullable(r1); + if (m.is_true(nullable_r1)) + return mk_union(d1_r2, derive_rec(r2)); + if (m.is_false(nullable_r1)) + return d1_r2; + // Conditional: nullable is a Boolean expression + expr_ref d2 = derive_rec(r2); + expr_ref guarded = mk_ite(nullable_r1, d2, nothing()); + return mk_union(d1_r2, guarded); + } + + // δ(r1 ∪ r2) = δ(r1) ∪ δ(r2) + if (re().is_union(r, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d2 = derive_rec(r2); + return mk_union(d1, d2); + } + + // δ(r1 ∩ r2) = δ(r1) ∩ δ(r2) + if (re().is_intersection(r, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d2 = derive_rec(r2); + return mk_inter(d1, d2); + } + + // δ(~r1) = ~δ(r1) + if (re().is_complement(r, r1)) { + expr_ref d1 = derive_rec(r1); + return mk_complement(d1); + } + + // δ(r1*) = δ(r1) · r1* + if (re().is_star(r, r1)) { + expr_ref d1 = derive_rec(r1); + expr_ref star_r1(re().mk_star(r1), m); + return mk_deriv_concat(d1, star_r1); + } + + // δ(r1+) = δ(r1) · r1* + if (re().is_plus(r, r1)) { + expr_ref d1 = derive_rec(r1); + expr_ref star_r1(re().mk_star(r1), m); + return mk_deriv_concat(d1, star_r1); + } + + // δ(r1?) = δ(r1) + if (re().is_opt(r, r1)) + return derive_rec(r1); + + // δ(r1{lo,hi}) + if (re().is_loop(r, r1, lo, hi)) { + if (hi == 0 || hi < lo) + return nothing(); + expr_ref d1 = derive_rec(r1); + expr_ref tail(re().mk_loop_proper(r1, (lo == 0 ? 0 : lo - 1), hi - 1), m); + return mk_deriv_concat(d1, tail); + } + + // δ(r1{lo,}) - unbounded loop + if (re().is_loop(r, r1, lo)) { + expr_ref d1 = derive_rec(r1); + expr_ref tail(re().mk_loop(r1, (lo == 0 ? 0 : lo - 1)), m); + return mk_deriv_concat(d1, tail); + } + + // δ(r1 \ r2) = δ(r1) ∩ ~δ(r2) + if (re().is_diff(r, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d2 = derive_rec(r2); + expr_ref neg_d2 = mk_complement(d2); + return mk_inter(d1, neg_d2); + } + + // δ(ite(c, r1, r2)) = ite(c, δ(r1), δ(r2)) + if (m.is_ite(r, cond, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d2 = derive_rec(r2); + return mk_ite(cond, d1, d2); + } + + // δ(reverse(r1)) - stuck: return symbolic derivative + if (re().is_reverse(r, r1)) + return expr_ref(re().mk_derivative(m_ele, r), m); + + // Stuck/uninterpreted case + return expr_ref(re().mk_derivative(m_ele, r), m); + } + + // ------------------------------------------------------- + // Derivative of specific regex constructs + // ------------------------------------------------------- + + expr_ref derive::derive_to_re(expr* s, sort* seq_sort) { + sort* re_sort = re().mk_re(seq_sort); + // δ(to_re("")) = ∅ + if (u().str.is_empty(s)) + return expr_ref(re().mk_empty(re_sort), m); + + // δ(to_re("c₁c₂...cₙ")) = ite(ele = c₁, to_re("c₂...cₙ"), ∅) + zstring zs; + if (u().str.is_string(s, zs)) { + if (zs.length() == 0) + return expr_ref(re().mk_empty(re_sort), m); + // First character + expr_ref head(m_util.mk_char(zs[0]), m); + expr_ref cond(m.mk_eq(m_ele, head), m); + // Tail string + expr_ref tail_str(u().str.mk_string(zs.extract(1, zs.length() - 1)), m); + expr_ref tail_re(re().mk_to_re(tail_str), m); + expr_ref empty(re().mk_empty(re_sort), m); + return mk_ite(cond, tail_re, empty); + } + + // Non-ground sequence: δ(to_re(s)) = ite(s ≠ "" ∧ ele = s[0], to_re(s[1:]), ∅) + expr_ref empty_seq(u().str.mk_empty(seq_sort), m); + expr_ref is_non_empty(m.mk_not(m.mk_eq(s, empty_seq)), m); + expr_ref zero(m_autil.mk_int(0), m); + expr_ref first(u().str.mk_nth_i(s, zero), m); + expr_ref eq_first(m.mk_eq(m_ele, first), m); + expr_ref guard(m.mk_and(is_non_empty, eq_first), m); + expr_ref one(m_autil.mk_int(1), m); + expr_ref len(u().str.mk_length(s), m); + expr_ref rest_len(m_autil.mk_sub(len, one), m); + expr_ref rest(u().str.mk_substr(s, one, rest_len), m); + expr_ref rest_re(re().mk_to_re(rest), m); + expr_ref empty(re().mk_empty(re_sort), m); + return mk_ite(guard, rest_re, empty); + } + + expr_ref derive::derive_range(expr* lo, expr* hi, sort* seq_sort) { + sort* re_sort = re().mk_re(seq_sort); + expr_ref empty(re().mk_empty(re_sort), m); + expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m); + + // Extract character values from unit strings + expr_ref c_lo(m), c_hi(m); + if (u().str.is_unit_string(lo, c_lo) && u().str.is_unit_string(hi, c_hi)) { + // ite(lo <= ele && ele <= hi, ε, ∅) + expr_ref ge_lo(m_util.mk_le(c_lo, m_ele), m); + expr_ref le_hi(m_util.mk_le(m_ele, c_hi), m); + expr_ref in_range(m.mk_and(ge_lo, le_hi), m); + return mk_ite(in_range, eps, empty); + } + + // Fallback: stuck derivative + return expr_ref(re().mk_derivative(m_ele, re().mk_range(lo, hi)), m); + } + + expr_ref derive::derive_of_pred(expr* pred, sort* seq_sort) { + sort* re_sort = re().mk_re(seq_sort); + expr_ref empty(re().mk_empty(re_sort), m); + expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m); + + // Apply predicate to the element + array_util autil(m); + expr* args[2] = { pred, m_ele }; + expr_ref cond(autil.mk_select(2, args), m); + return mk_ite(cond, eps, empty); + } + + // ------------------------------------------------------- + // Nullability - uses info class from seq_decl_plugin.h + // ------------------------------------------------------- + + expr_ref derive::is_nullable(expr* r) { + // First, try the static info which handles ground/interpreted regex + lbool nb = re().get_info(r).nullable; + if (nb == l_true) + return expr_ref(m.mk_true(), m); + if (nb == l_false) + return expr_ref(m.mk_false(), m); + // info is undetermined (l_undef) — fall back to recursive computation + return is_nullable_rec(r); + } + + expr_ref derive::is_nullable_rec(expr* r) { + expr* r1 = nullptr, * r2 = nullptr, * cond = nullptr; + sort* s = nullptr; + unsigned lo = 0, hi = 0; + + if (re().is_concat(r, r1, r2) || re().is_intersection(r, r1, r2)) { + expr_ref n1 = is_nullable(r1); + expr_ref n2 = is_nullable(r2); + expr_ref result(m); + m_br.mk_and(n1, n2, result); + return result; + } + if (re().is_union(r, r1, r2)) { + expr_ref n1 = is_nullable(r1); + expr_ref n2 = is_nullable(r2); + expr_ref result(m); + m_br.mk_or(n1, n2, result); + return result; + } + if (re().is_complement(r, r1)) { + expr_ref n1 = is_nullable(r1); + expr_ref result(m); + m_br.mk_not(n1, result); + return result; + } + if (re().is_diff(r, r1, r2)) { + expr_ref n1 = is_nullable(r1); + expr_ref n2 = is_nullable(r2); + expr_ref not_n2(m); + m_br.mk_not(n2, not_n2); + expr_ref result(m); + m_br.mk_and(n1, not_n2, result); + return result; + } + if (re().is_to_re(r, r1)) { + if (u().str.is_empty(r1)) + return expr_ref(m.mk_true(), m); + zstring zs; + if (u().str.is_string(r1, zs)) + return expr_ref(m.mk_bool_val(zs.length() == 0), m); + return expr_ref(m.mk_eq(r1, u().str.mk_empty(r1->get_sort())), m); + } + if (m.is_ite(r, cond, r1, r2)) { + expr_ref n1 = is_nullable(r1); + expr_ref n2 = is_nullable(r2); + expr_ref result(m); + m_br.mk_ite(cond, n1, n2, result); + return result; + } + // Unknown: use membership test + if (m_util.is_re(r, s)) + return expr_ref(re().mk_in_re(u().str.mk_empty(s), r), m); + + return expr_ref(m.mk_true(), m); + } + + // ------------------------------------------------------- + // Smart constructors with simplification + // ------------------------------------------------------- + + expr_ref derive::mk_union(expr* a, expr* b) { + // Identity / annihilator + if (a == b) return expr_ref(a, m); + if (re().is_empty(a)) return expr_ref(b, m); + if (re().is_empty(b)) return expr_ref(a, m); + if (re().is_full_seq(a)) return expr_ref(a, m); + if (re().is_full_seq(b)) return expr_ref(b, m); + + // Complement absorption: r ∪ ~r = Σ* + expr* c = nullptr; + if (re().is_complement(a, c) && c == b) + return expr_ref(re().mk_full_seq(a->get_sort()), m); + if (re().is_complement(b, c) && c == a) + return expr_ref(re().mk_full_seq(a->get_sort()), m); + + // ITE combination: if both are ITE with same condition, merge + expr *c1, *t1, *e1, *c2, *t2, *e2; + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { + expr_ref then_br = mk_union(t1, t2); + expr_ref else_br = mk_union(e1, e2); + return mk_ite(c1, then_br, else_br); + } + + // ACI: flatten, sort, deduplicate + expr_ref_vector args(m); + flatten_union(a, args); + flatten_union(b, args); + + // Sort by expr id for canonical form + std::stable_sort(args.data(), args.data() + args.size(), + [](expr* x, expr* y) { return x->get_id() < y->get_id(); }); + + // Deduplicate + unsigned j = 0; + for (unsigned i = 0; i < args.size(); ++i) { + if (j > 0 && args.get(i) == args.get(j - 1)) + continue; // skip duplicate + if (re().is_empty(args.get(i))) + continue; // skip empty + if (re().is_full_seq(args.get(i))) + return expr_ref(args.get(i), m); // universal absorbs + args.set(j++, args.get(i)); + } + args.shrink(j); + + if (args.empty()) + return expr_ref(re().mk_empty(a->get_sort()), m); + + return mk_union_from_sorted(args); + } + + expr_ref derive::mk_inter(expr* a, expr* b) { + // Identity / annihilator + if (a == b) return expr_ref(a, m); + if (re().is_empty(a)) return expr_ref(a, m); + if (re().is_empty(b)) return expr_ref(b, m); + if (re().is_full_seq(a)) return expr_ref(b, m); + if (re().is_full_seq(b)) return expr_ref(a, m); + + // Complement absorption: r ∩ ~r = ∅ + expr* c = nullptr; + if (re().is_complement(a, c) && c == b) + return expr_ref(re().mk_empty(a->get_sort()), m); + if (re().is_complement(b, c) && c == a) + return expr_ref(re().mk_empty(a->get_sort()), m); + + // ITE combination: if both are ITE with same condition, merge + expr *c1, *t1, *e1, *c2, *t2, *e2; + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { + expr_ref then_br = mk_inter(t1, t2); + expr_ref else_br = mk_inter(e1, e2); + return mk_ite(c1, then_br, else_br); + } + + // ACI: flatten, sort, deduplicate + expr_ref_vector args(m); + flatten_inter(a, args); + flatten_inter(b, args); + + std::stable_sort(args.data(), args.data() + args.size(), + [](expr* x, expr* y) { return x->get_id() < y->get_id(); }); + + unsigned j = 0; + for (unsigned i = 0; i < args.size(); ++i) { + if (j > 0 && args.get(i) == args.get(j - 1)) + continue; + if (re().is_full_seq(args.get(i))) + continue; // skip universal + if (re().is_empty(args.get(i))) + return expr_ref(args.get(i), m); // empty absorbs + args.set(j++, args.get(i)); + } + args.shrink(j); + + if (args.empty()) + return expr_ref(re().mk_full_seq(a->get_sort()), m); + + return mk_inter_from_sorted(args); + } + + expr_ref derive::mk_concat(expr* a, expr* b) { + if (re().is_empty(a)) return expr_ref(a, m); + if (re().is_empty(b)) return expr_ref(b, m); + if (re().is_epsilon(a)) return expr_ref(b, m); + if (re().is_epsilon(b)) return expr_ref(a, m); + + // to_re(s1) · to_re(s2) → to_re(s1 ++ s2) + expr* s1 = nullptr, * s2 = nullptr; + if (re().is_to_re(a, s1) && re().is_to_re(b, s2)) + return expr_ref(re().mk_to_re(u().str.mk_concat(s1, s2)), m); + + // r* · r* → r* + expr* a1 = nullptr, * b1 = nullptr; + if (re().is_star(a, a1) && re().is_star(b, b1) && a1 == b1) + return expr_ref(a, m); + + // Right-associate: (a · b) · c → a · (b · c) + if (re().is_concat(a, a1, b1)) { + expr_ref tail = mk_concat(b1, b); + return expr_ref(re().mk_concat(a1, tail), m); + } + + return expr_ref(re().mk_concat(a, b), m); + } + + expr_ref derive::mk_complement(expr* a) { + // ~~r → r + expr* r = nullptr; + if (re().is_complement(a, r)) + return expr_ref(r, m); + // ~∅ → Σ* + if (re().is_empty(a)) + return expr_ref(re().mk_full_seq(a->get_sort()), m); + // ~Σ* → ∅ + if (re().is_full_seq(a)) + return expr_ref(re().mk_empty(a->get_sort()), m); + + // Push through ITE: ~(ite(c, t, e)) → ite(c, ~t, ~e) + expr* c, * t, * e; + if (m.is_ite(a, c, t, e)) { + expr_ref ct = mk_complement(t); + expr_ref ce = mk_complement(e); + return mk_ite(c, ct, ce); + } + + return expr_ref(re().mk_complement(a), m); + } + + expr_ref derive::mk_ite(expr* c, expr* t, expr* e) { + if (m.is_true(c) || t == e) + return expr_ref(t, m); + if (m.is_false(c)) + return expr_ref(e, m); + return expr_ref(m.mk_ite(c, t, e), m); + } + + // ------------------------------------------------------- + // ACI normalization helpers + // ------------------------------------------------------- + + void derive::flatten_union(expr* r, expr_ref_vector& args) { + expr* a = nullptr, * b = nullptr; + if (re().is_union(r, a, b)) { + flatten_union(a, args); + flatten_union(b, args); + } + else { + args.push_back(r); + } + } + + void derive::flatten_inter(expr* r, expr_ref_vector& args) { + expr* a = nullptr, * b = nullptr; + if (re().is_intersection(r, a, b)) { + flatten_inter(a, args); + flatten_inter(b, args); + } + else { + args.push_back(r); + } + } + + expr_ref derive::mk_union_from_sorted(expr_ref_vector& args) { + if (args.empty()) { + // All elements were identity/absorbed - should not happen in practice + // but handle gracefully + UNREACHABLE(); + return expr_ref(m.mk_true(), m); + } + if (args.size() == 1) + return expr_ref(args.get(0), m); + // Build right-associated union + expr_ref result(args.back(), m); + for (unsigned i = args.size() - 1; i > 0; ) { + --i; + result = expr_ref(re().mk_union(args.get(i), result), m); + } + return result; + } + + expr_ref derive::mk_inter_from_sorted(expr_ref_vector& args) { + if (args.empty()) { + UNREACHABLE(); + return expr_ref(m.mk_true(), m); + } + if (args.size() == 1) + return expr_ref(args.get(0), m); + // Build right-associated intersection + expr_ref result(args.back(), m); + for (unsigned i = args.size() - 1; i > 0; ) { + --i; + result = expr_ref(re().mk_inter(args.get(i), result), m); + } + return result; + } + + // ------------------------------------------------------- + // ITE-tree combinators (analogous to REsharp mk_binary/mk_unary) + // ------------------------------------------------------- + + expr_ref derive::ite_combine_binary(expr* d1, expr* d2, + std::function const& op) { + expr *c1, *t1, *e1, *c2, *t2, *e2; + + // Both are leaves (non-ITE) + if (!m.is_ite(d1, c1, t1, e1) && !m.is_ite(d2, c2, t2, e2)) + return op(d1, d2); + + // d1 is ITE, d2 is not + if (m.is_ite(d1, c1, t1, e1) && !m.is_ite(d2, c2, t2, e2)) { + expr_ref then_r = ite_combine_binary(t1, d2, op); + expr_ref else_r = ite_combine_binary(e1, d2, op); + return mk_ite(c1, then_r, else_r); + } + + // d2 is ITE, d1 is not + if (!m.is_ite(d1, c1, t1, e1) && m.is_ite(d2, c2, t2, e2)) { + expr_ref then_r = ite_combine_binary(d1, t2, op); + expr_ref else_r = ite_combine_binary(d1, e2, op); + return mk_ite(c2, then_r, else_r); + } + + // Both are ITE + VERIFY(m.is_ite(d1, c1, t1, e1)); + VERIFY(m.is_ite(d2, c2, t2, e2)); + + if (c1 == c2) { + // Same condition: combine pairwise + expr_ref then_r = ite_combine_binary(t1, t2, op); + expr_ref else_r = ite_combine_binary(e1, e2, op); + return mk_ite(c1, then_r, else_r); + } + + // Order by condition id (larger id on outside for canonical form) + if (c1->get_id() > c2->get_id()) { + expr_ref then_r = ite_combine_binary(t1, d2, op); + expr_ref else_r = ite_combine_binary(e1, d2, op); + return mk_ite(c1, then_r, else_r); + } + else { + expr_ref then_r = ite_combine_binary(d1, t2, op); + expr_ref else_r = ite_combine_binary(d1, e2, op); + return mk_ite(c2, then_r, else_r); + } + } + + expr_ref derive::ite_combine_unary(expr* d, + std::function const& op) { + expr* c, * t, * e; + if (m.is_ite(d, c, t, e)) { + expr_ref then_r = ite_combine_unary(t, op); + expr_ref else_r = ite_combine_unary(e, op); + return mk_ite(c, then_r, else_r); + } + return op(d); + } + + // ------------------------------------------------------- + // Distribute concat through ITE/union structure of derivative + // ------------------------------------------------------- + + expr_ref derive::mk_deriv_concat(expr* d, expr* tail) { + expr_ref _d(d, m), _tail(tail, m); + expr* c, * t, * e; + + if (re().is_empty(d)) + return expr_ref(d, m); + if (re().is_epsilon(d)) + return expr_ref(tail, m); + + if (m.is_ite(d, c, t, e)) { + expr_ref then_r = mk_deriv_concat(t, tail); + expr_ref else_r = mk_deriv_concat(e, tail); + return mk_ite(c, then_r, else_r); + } + + if (re().is_union(d, t, e)) { + expr_ref left = mk_deriv_concat(t, tail); + expr_ref right = mk_deriv_concat(e, tail); + return mk_union(left, right); + } + + return mk_concat(d, tail); + } + +} + + + diff --git a/src/ast/seq_derive.h b/src/ast/seq_derive.h new file mode 100644 index 000000000..b093366cc --- /dev/null +++ b/src/ast/seq_derive.h @@ -0,0 +1,130 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + seq_derive.h + +Abstract: + + Symbolic derivative computation for regular expressions. + Produces an ITE-tree (transition regex) representation where + the free variable is de Bruijn index 0 representing the input character. + + Based on the theory of symbolic derivatives and transition regexes: + - Veanes et al., "On Symbolic Derivatives and Transition Regexes" (LPAR 2024) + - Varatalu, Veanes, Ernits, "RE#" (POPL 2025) + - Stanford, Veanes, Bjørner, "Symbolic Boolean Derivatives" (PLDI 2021) + +Authors: + + Nikolaj Bjorner (nbjorner) 2025-06-03 + +--*/ + +#pragma once + +#include "ast/seq_decl_plugin.h" +#include "ast/arith_decl_plugin.h" +#include "ast/array_decl_plugin.h" +#include "ast/rewriter/bool_rewriter.h" + +namespace seq { + + /** + * Symbolic derivative engine for regular expressions. + * + * Given a regex r, operator()(r) computes a symbolic derivative δ(r) + * represented as an ITE-tree over character predicates (using de Bruijn + * variable 0 for the character). Evaluating the ITE-tree for a concrete + * character 'a' yields the classical Brzozowski derivative δ_a(r). + * + * The ITE-tree structure implicitly defines minterms (equivalence classes + * of characters indistinguishable by the regex). + * + * Key properties: + * - Results are memoized for termination on cyclic derivative graphs + * - Union/intersection operands are sorted for ACI canonicalization + * - Depth-bounded to prevent stack overflow + */ + class derive { + ast_manager& m; + seq_util m_util; + arith_util m_autil; + bool_rewriter m_br; + + // Cache: maps regex expr to its symbolic derivative + obj_map m_cache; + expr_ref_vector m_trail; // pin cached results + + // Depth limiting + unsigned m_depth { 0 }; + static const unsigned m_max_depth = 512; + + seq_util::rex& re() { return m_util.re; } + seq_util& u() { return m_util; } + + // The element (character) for the current derivative computation + expr_ref m_ele; + + // Core derivative computation + expr_ref derive_rec(expr* r); + expr_ref derive_core(expr* r); + + // Helpers for specific regex constructs + expr_ref derive_to_re(expr* s, sort* seq_sort); + expr_ref derive_range(expr* lo, expr* hi, sort* seq_sort); + expr_ref derive_of_pred(expr* pred, sort* seq_sort); + + // Nullable check: returns a Boolean expression + expr_ref is_nullable(expr* r); + expr_ref is_nullable_rec(expr* r); + + // Smart constructors with simplification and ACI canonicalization + expr_ref mk_union(expr* a, expr* b); + expr_ref mk_inter(expr* a, expr* b); + expr_ref mk_concat(expr* a, expr* b); + expr_ref mk_complement(expr* a); + expr_ref mk_ite(expr* c, expr* t, expr* e); + + // Flatten and sort for ACI normal form + void flatten_union(expr* r, expr_ref_vector& args); + void flatten_inter(expr* r, expr_ref_vector& args); + expr_ref mk_union_from_sorted(expr_ref_vector& args); + expr_ref mk_inter_from_sorted(expr_ref_vector& args); + + // ITE-tree binary combinator (analogous to REsharp mk_binary) + // Combines two ITE-tree derivatives with a binary regex operation + expr_ref ite_combine_binary(expr* d1, expr* d2, + std::function const& op); + + // ITE-tree unary combinator (analogous to REsharp mk_unary) + expr_ref ite_combine_unary(expr* d, std::function const& op); + + // Distribute concatenation through ITE/union in derivative + expr_ref mk_deriv_concat(expr* d, expr* tail); + + sort* re_sort(expr* r) { return r->get_sort(); } + sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; } + sort* ele_sort(expr* r) { sort* s = seq_sort(r); sort* e = nullptr; m_util.is_seq(s, e); return e; } + + public: + derive(ast_manager& m); + + /** + * Compute the derivative of regex r with respect to element ele. + * When ele is a de Bruijn variable, produces a symbolic ITE-tree. + * When ele is a concrete character, produces the concrete derivative. + */ + expr_ref operator()(expr* ele, expr* r); + + /** + * Convenience: symbolic derivative using de Bruijn var 0. + */ + expr_ref operator()(expr* r); + + void reset(); + }; + +} + From cb2cf913e3242f784e4a600b54a478756304e015 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 3 Jun 2026 11:04:19 -0700 Subject: [PATCH 03/32] move seq_derive and fix include paths, remove antimirov code --- src/ast/CMakeLists.txt | 1 - src/ast/rewriter/CMakeLists.txt | 1 + src/ast/{ => rewriter}/seq_derive.cpp | 2 +- src/ast/{ => rewriter}/seq_derive.h | 0 src/ast/rewriter/seq_rewriter.cpp | 407 -------------------------- src/ast/rewriter/seq_rewriter.h | 13 +- 6 files changed, 3 insertions(+), 421 deletions(-) rename src/ast/{ => rewriter}/seq_derive.cpp (99%) rename src/ast/{ => rewriter}/seq_derive.h (100%) diff --git a/src/ast/CMakeLists.txt b/src/ast/CMakeLists.txt index dd1db135c..6a50c3b05 100644 --- a/src/ast/CMakeLists.txt +++ b/src/ast/CMakeLists.txt @@ -46,7 +46,6 @@ z3_add_component(ast recfun_decl_plugin.cpp reg_decl_plugins.cpp seq_decl_plugin.cpp - seq_derive.cpp shared_occs.cpp special_relations_decl_plugin.cpp static_features.cpp diff --git a/src/ast/rewriter/CMakeLists.txt b/src/ast/rewriter/CMakeLists.txt index 9d529f9b5..2cdcba510 100644 --- a/src/ast/rewriter/CMakeLists.txt +++ b/src/ast/rewriter/CMakeLists.txt @@ -39,6 +39,7 @@ z3_add_component(rewriter rewriter.cpp seq_axioms.cpp seq_eq_solver.cpp + seq_derive.cpp seq_rewriter.cpp seq_skolem.cpp th_rewriter.cpp diff --git a/src/ast/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp similarity index 99% rename from src/ast/seq_derive.cpp rename to src/ast/rewriter/seq_derive.cpp index 96ec04059..a4c598948 100644 --- a/src/ast/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -21,7 +21,7 @@ Authors: --*/ -#include "ast/seq_derive.h" +#include "ast/rewriter/seq_derive.h" #include "ast/ast_pp.h" #include "ast/array_decl_plugin.h" #include "ast/rewriter/bool_rewriter.h" diff --git a/src/ast/seq_derive.h b/src/ast/rewriter/seq_derive.h similarity index 100% rename from src/ast/seq_derive.h rename to src/ast/rewriter/seq_derive.h diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index b6e472b1a..212f97a8a 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2921,390 +2921,6 @@ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { return m_derive(ele, r); } -expr_ref seq_rewriter::mk_antimirov_deriv(expr* e, expr* r, expr* path) { - // Ensure references are owned - expr_ref _e(e, m()), _path(path, m()), _r(r, m()); - expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, e, r, path), m()); - if (!result) { - mk_antimirov_deriv_rec(e, r, path, result); - m_op_cache.insert(OP_RE_DERIVATIVE, e, r, path, result); - STRACE(seq_regex, tout << "D(" << mk_pp(e, m()) << "," << mk_pp(r, m()) << "," << mk_pp(path, m()) << ")" << std::endl;); - STRACE(seq_regex, tout << "= " << mk_pp(result, m()) << std::endl;); - } - return result; -} - -void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result) { - sort* seq_sort = nullptr, * ele_sort = nullptr; - expr_ref _r(r, m()), _path(path, m()); - VERIFY(m_util.is_re(r, seq_sort)); - VERIFY(m_util.is_seq(seq_sort, ele_sort)); - SASSERT(ele_sort == e->get_sort()); - expr* r1 = nullptr, * r2 = nullptr, * c = nullptr; - expr_ref c1(m()); - expr_ref c2(m()); - auto nothing = [&]() { return expr_ref(re().mk_empty(r->get_sort()), m()); }; - auto epsilon = [&]() { return expr_ref(re().mk_epsilon(seq_sort), m()); }; - auto dotstar = [&]() { return expr_ref(re().mk_full_seq(r->get_sort()), m()); }; - unsigned lo = 0, hi = 0; - if (re().is_empty(r) || re().is_epsilon(r)) - // D(e,[]) = D(e,()) = [] - result = nothing(); - else if (re().is_full_seq(r) || re().is_dot_plus(r)) - // D(e,.*) = D(e,.+) = .* - result = dotstar(); - else if (re().is_full_char(r)) - // D(e,.) = () - result = epsilon(); - else if (re().is_to_re(r, r1)) { - expr_ref h(m()); - expr_ref t(m()); - // here r1 is a sequence - if (get_head_tail(r1, h, t)) { - if (eq_char(e, h)) - result = re().mk_to_re(t); - else if (neq_char(e, h)) - result = nothing(); - else - result = re().mk_ite_simplify(m().mk_eq(e, h), re().mk_to_re(t), nothing()); - } - else { - // observe that the precondition |r1|>0 is is implied by c1 for use of mk_seq_first - { - auto is_non_empty = m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))); - auto eq_first = m().mk_eq(mk_seq_first(r1), e); - m_br.mk_and(is_non_empty, eq_first, c1); - } - m_br.mk_and(path, c1, c2); - if (m().is_false(c2)) - result = nothing(); - else - // observe that the precondition |r1|>0 is implied by c1 for use of mk_seq_rest - result = m().mk_ite(c1, re().mk_to_re(mk_seq_rest(r1)), nothing()); - } - } - else if (re().is_reverse(r, r2)) - if (re().is_to_re(r2, r1)) { - // here r1 is a sequence - // observe that the precondition |r1|>0 of mk_seq_last is implied by c1 - { - auto is_non_empty = m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))); - auto eq_last = m().mk_eq(mk_seq_last(r1), e); - m_br.mk_and(is_non_empty, eq_last, c1); - } - m_br.mk_and(path, c1, c2); - if (m().is_false(c2)) - result = nothing(); - else - // observe that the precondition |r1|>0 of mk_seq_rest is implied by c1 - result = re().mk_ite_simplify(c1, re().mk_reverse(re().mk_to_re(mk_seq_butlast(r1))), nothing()); - } - else { - result = mk_regex_reverse(r2); - if (result.get() == r) - //r2 is an uninterpreted regex that is stuck - //for example if r = (re.reverse R) where R is a regex variable then - //here result.get() == r - result = re().mk_derivative(e, result); - else - result = mk_antimirov_deriv(e, result, path); - } - else if (re().is_concat(r, r1, r2)) { - expr_ref r1nullable(is_nullable(r1), m()); - c1 = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), r2); - expr_ref r1nullable_and_path(m()); - m_br.mk_and(r1nullable, path, r1nullable_and_path); - if (m().is_false(r1nullable_and_path)) - // D(e,r1)r2 - result = c1; - else - // D(e,r1)r2|(ite (r1nullable) (D(e,r2)) []) - // observe that (mk_ite_simplify(true, D(e,r2), []) = D(e,r2) - result = mk_antimirov_deriv_union(c1, re().mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing())); - } - else if (m().is_ite(r, c, r1, r2)) { - { - auto cp = m().mk_and(c, path); - c1 = simplify_path(e, cp); - } - { - auto notc = m().mk_not(c); - auto np = m().mk_and(notc, path); - c2 = simplify_path(e, np); - } - if (m().is_false(c1)) - result = mk_antimirov_deriv(e, r2, c2); - else if (m().is_false(c2)) - result = mk_antimirov_deriv(e, r1, c1); - else - result = re().mk_ite_simplify(c, mk_antimirov_deriv(e, r1, c1), mk_antimirov_deriv(e, r2, c2)); - } - else if (re().is_range(r, r1, r2)) { - expr_ref range(m()); - expr_ref psi(m().mk_false(), m()); - if (str().is_unit_string(r1, c1) && str().is_unit_string(r2, c2)) { - // SASSERT(u().is_char(c1)); - // SASSERT(u().is_char(c2)); - // case: c1 <= e <= c2 - // deterministic evaluation for range bounds - auto a_le = u().mk_le(c1, e); - auto b_le = u().mk_le(e, c2); - auto rng_cond = m().mk_and(a_le, b_le); - range = simplify_path(e, rng_cond); - psi = simplify_path(e, m().mk_and(path, range)); - } - else if (!str().is_string(r1) && str().is_unit_string(r2, c2)) { - SASSERT(u().is_char(c2)); - // r1 nonground: |r1|=1 & r1[0] <= e <= c2 - expr_ref one(m_autil.mk_int(1), m()); - expr_ref zero(m_autil.mk_int(0), m()); - expr_ref r1_length_eq_one(m().mk_eq(str().mk_length(r1), one), m()); - expr_ref r1_0(str().mk_nth_i(r1, zero), m()); - range = simplify_path(e, m().mk_and(r1_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, c2)))); - psi = simplify_path(e, m().mk_and(path, range)); - } - else if (!str().is_string(r2) && str().is_unit_string(r1, c1)) { - SASSERT(u().is_char(c1)); - // r2 nonground: |r2|=1 & c1 <= e <= r2_0 - expr_ref one(m_autil.mk_int(1), m()); - expr_ref zero(m_autil.mk_int(0), m()); - expr_ref r2_length_eq_one(m().mk_eq(str().mk_length(r2), one), m()); - expr_ref r2_0(str().mk_nth_i(r2, zero), m()); - range = simplify_path(e, m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(c1, e), u().mk_le(e, r2_0)))); - psi = simplify_path(e, m().mk_and(path, range)); - } - else if (!str().is_string(r1) && !str().is_string(r2)) { - // both r1 and r2 nonground: |r1|=1 & |r2|=1 & r1[0] <= e <= r2[0] - expr_ref one(m_autil.mk_int(1), m()); - expr_ref zero(m_autil.mk_int(0), m()); - expr_ref r1_length_eq_one(m().mk_eq(str().mk_length(r1), one), m()); - expr_ref r1_0(str().mk_nth_i(r1, zero), m()); - expr_ref r2_length_eq_one(m().mk_eq(str().mk_length(r2), one), m()); - expr_ref r2_0(str().mk_nth_i(r2, zero), m()); - range = simplify_path(e, m().mk_and(r1_length_eq_one, m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, r2_0))))); - psi = simplify_path(e, m().mk_and(path, range)); - } - if (m().is_false(psi)) - result = nothing(); - else - result = re().mk_ite_simplify(range, epsilon(), nothing()); - } - else if (re().is_union(r, r1, r2)) - result = mk_antimirov_deriv_union(mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path)); - else if (re().is_intersection(r, r1, r2)) - result = mk_antimirov_deriv_intersection(e, - mk_antimirov_deriv(e, r1, path), - mk_antimirov_deriv(e, r2, path), m().mk_true()); - else if (re().is_star(r, r1) || re().is_plus(r, r1) || (re().is_loop(r, r1, lo) && 0 <= lo && lo <= 1)) - result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_star(r1)); - else if (re().is_loop(r, r1, lo)) - result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_loop(r1, lo - 1)); - else if (re().is_loop(r, r1, lo, hi)) { - if ((lo == 0 && hi == 0) || hi < lo) - result = nothing(); - else { - expr_ref t(re().mk_loop_proper(r1, (lo == 0 ? 0 : lo - 1), hi - 1), m()); - result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), t); - } - } - else if (re().is_opt(r, r1)) - result = mk_antimirov_deriv(e, r1, path); - else if (re().is_complement(r, r1)) - // D(e,~r1) = ~D(e,r1) - result = mk_antimirov_deriv_negate(e, mk_antimirov_deriv(e, r1, path)); - else if (re().is_diff(r, r1, r2)) - result = mk_antimirov_deriv_intersection(e, - mk_antimirov_deriv(e, r1, path), - mk_antimirov_deriv_negate(e, mk_antimirov_deriv(e, r2, path)), m().mk_true()); - else if (re().is_of_pred(r, r1)) { - array_util array(m()); - expr* args[2] = { r1, e }; - result = array.mk_select(2, args); - // Use mk_der_cond to normalize - result = mk_der_cond(result, e, seq_sort); - } - else - // stuck cases - result = re().mk_derivative(e, r); -} - -expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr* d2, expr* path) { - sort* seq_sort = nullptr, * ele_sort = nullptr; - VERIFY(m_util.is_re(d1, seq_sort)); - VERIFY(m_util.is_seq(seq_sort, ele_sort)); - expr_ref result(m()); - expr* c, * a, * b; - if (m_re_deriv_depth >= m_max_re_deriv_depth) { - // Depth limit reached: construct intersection without further decomposition - result = mk_regex_inter_normalize(d1, d2); - } - else if (re().is_empty(d1)) - result = d1; - else if (re().is_empty(d2)) - result = d2; - else if (m().is_ite(d1, c, a, b)) { - expr_ref path_and_c(simplify_path(e, m().mk_and(path, c)), m()); - expr_ref path_and_notc(simplify_path(e, m().mk_and(path, m().mk_not(c))), m()); - ++m_re_deriv_depth; - if (m().is_false(path_and_c)) - result = mk_antimirov_deriv_intersection(e, b, d2, path); - else if (m().is_false(path_and_notc)) - result = mk_antimirov_deriv_intersection(e, a, d2, path); - else - result = m().mk_ite(c, mk_antimirov_deriv_intersection(e, a, d2, path_and_c), - mk_antimirov_deriv_intersection(e, b, d2, path_and_notc)); - --m_re_deriv_depth; - } - else if (m().is_ite(d2)) { - // swap d1 and d2 - ++m_re_deriv_depth; - result = mk_antimirov_deriv_intersection(e, d2, d1, path); - --m_re_deriv_depth; - } - else if (d1 == d2 || re().is_full_seq(d2)) - result = mk_antimirov_deriv_restrict(e, d1, path); - else if (re().is_full_seq(d1)) - result = mk_antimirov_deriv_restrict(e, d2, path); - else if (re().is_union(d1, a, b)) { - // distribute intersection over the union in d1 - ++m_re_deriv_depth; - result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, a, d2, path), - mk_antimirov_deriv_intersection(e, b, d2, path)); - --m_re_deriv_depth; - } - else if (re().is_union(d2, a, b)) { - // distribute intersection over the union in d2 - ++m_re_deriv_depth; - result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, d1, a, path), - mk_antimirov_deriv_intersection(e, d1, b, path)); - --m_re_deriv_depth; - } - else - result = mk_regex_inter_normalize(d1, d2); - return result; -} - -expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) { - expr_ref result(m()); - expr_ref _r(r, m()), _d(d, m()); - expr* c, * t, * e; - if (m_re_deriv_depth >= m_max_re_deriv_depth) { - // Depth limit reached: construct concat without further decomposition - result = mk_re_append(d, r); - } - else if (m().is_ite(d, c, t, e)) { - ++m_re_deriv_depth; - auto r2 = mk_antimirov_deriv_concat(e, r); - auto r1 = mk_antimirov_deriv_concat(t, r); - --m_re_deriv_depth; - result = m().mk_ite(c, r1, r2); - } - else if (re().is_union(d, t, e)) { - ++m_re_deriv_depth; - result = mk_antimirov_deriv_union(mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r)); - --m_re_deriv_depth; - } - else - result = mk_re_append(d, r); - SASSERT(result.get()); - return result; -} - -expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) { - sort* seq_sort = nullptr; - VERIFY(m_util.is_re(d, seq_sort)); - auto nothing = [&]() { return expr_ref(re().mk_empty(d->get_sort()), m()); }; - auto epsilon = [&]() { return expr_ref(re().mk_epsilon(seq_sort), m()); }; - auto dotstar = [&]() { return expr_ref(re().mk_full_seq(d->get_sort()), m()); }; - auto dotplus = [&]() { return expr_ref(re().mk_plus(re().mk_full_char(d->get_sort())), m()); }; - expr_ref result(m()); - expr* c, * t, * e; - if (m_re_deriv_depth >= m_max_re_deriv_depth) { - // Depth limit reached: construct complement without further decomposition - result = re().mk_complement(d); - } - else if (re().is_empty(d)) - result = dotstar(); - else if (re().is_epsilon(d)) - result = dotplus(); - else if (re().is_full_seq(d)) - result = nothing(); - else if (re().is_dot_plus(d)) - result = epsilon(); - else if (m().is_ite(d, c, t, e)) { - ++m_re_deriv_depth; - result = m().mk_ite(c, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); - --m_re_deriv_depth; - } - else if (re().is_union(d, t, e)) { - ++m_re_deriv_depth; - result = mk_antimirov_deriv_intersection(elem, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e), m().mk_true()); - --m_re_deriv_depth; - } - else if (re().is_intersection(d, t, e)) { - ++m_re_deriv_depth; - result = mk_antimirov_deriv_union(mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); - --m_re_deriv_depth; - } - else if (re().is_complement(d, t)) - result = t; - else - result = re().mk_complement(d); - return result; -} - -expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) { - sort* seq_sort = nullptr, * ele_sort = nullptr; - VERIFY(m_util.is_re(d1, seq_sort)); - VERIFY(m_util.is_seq(seq_sort, ele_sort)); - expr_ref result(m()); - expr* c1, * t1, * e1, * c2, * t2, * e2; - if (m().is_ite(d1, c1, t1, e1) && m().is_ite(d2, c2, t2, e2) && c1 == c2) - // eliminate duplicate branching on exactly the same condition - result = m().mk_ite(c1, mk_antimirov_deriv_union(t1, t2), mk_antimirov_deriv_union(e1, e2)); - else - result = mk_regex_union_normalize(d1, d2); - return result; -} - -// restrict the guards of all conditionals id d and simplify the resulting derivative -// restrict(if(c, a, b), cond) = if(c, restrict(a, cond & c), restrict(b, cond & ~c)) -// restrict(a U b, cond) = restrict(a, cond) U restrict(b, cond) -// where {} U X = X, X U X = X -// restrict(R, cond) = R -// -// restrict(d, false) = [] -// -// it is already assumed that the restriction takes place within a branch -// so the condition is not added explicitly but propagated down in order to eliminate -// infeasible cases -expr_ref seq_rewriter::mk_antimirov_deriv_restrict(expr* e, expr* d, expr* cond) { - expr_ref result(d, m()); - expr_ref _cond(cond, m()); - expr* c, * a, * b; - if (m().is_false(cond)) - result = re().mk_empty(d->get_sort()); - else if (re().is_empty(d) || m().is_true(cond)) - result = d; - else if (m_re_deriv_depth >= m_max_re_deriv_depth) - result = d; - else if (m().is_ite(d, c, a, b)) { - expr_ref path_and_c(simplify_path(e, m().mk_and(cond, c)), m()); - expr_ref path_and_notc(simplify_path(e, m().mk_and(cond, m().mk_not(c))), m()); - ++m_re_deriv_depth; - result = re().mk_ite_simplify(c, mk_antimirov_deriv_restrict(e, a, path_and_c), - mk_antimirov_deriv_restrict(e, b, path_and_notc)); - --m_re_deriv_depth; - } - else if (re().is_union(d, a, b)) { - ++m_re_deriv_depth; - expr_ref a1(mk_antimirov_deriv_restrict(e, a, cond), m()); - expr_ref b1(mk_antimirov_deriv_restrict(e, b, cond), m()); - --m_re_deriv_depth; - result = mk_antimirov_deriv_union(a1, b1); - } - return result; -} expr_ref seq_rewriter::mk_regex_union_normalize(expr* r1, expr* r2) { expr_ref _r1(r1, m()), _r2(r2, m()); @@ -3551,29 +3167,6 @@ expr_ref seq_rewriter::mk_regex_concat(expr* r, expr* s) { return result; } -expr_ref seq_rewriter::mk_in_antimirov(expr* s, expr* d){ - expr_ref result(mk_in_antimirov_rec(s, d), m()); - return result; -} - -expr_ref seq_rewriter::mk_in_antimirov_rec(expr* s, expr* d) { - expr* c, * d1, * d2; - expr_ref result(m()); - if (re().is_full_seq(d) || (str().min_length(s) > 0 && re().is_dot_plus(d))) - // s in .* <==> true, also: s in .+ <==> true when |s|>0 - result = m().mk_true(); - else if (re().is_empty(d) || (str().min_length(s) > 0 && re().is_epsilon(d))) - // s in [] <==> false, also: s in () <==> false when |s|>0 - result = m().mk_false(); - else if (m().is_ite(d, c, d1, d2)) - result = re().mk_ite_simplify(c, mk_in_antimirov_rec(s, d1), mk_in_antimirov_rec(s, d2)); - else if (re().is_union(d, d1, d2)) - m_br.mk_or(mk_in_antimirov_rec(s, d1), mk_in_antimirov_rec(s, d2), result); - else - result = re().mk_in_re(s, d); - return result; -} - /* * calls elim_condition */ diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 9668c299c..70b382457 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -19,7 +19,7 @@ Notes: #pragma once #include "ast/seq_decl_plugin.h" -#include "ast/seq_derive.h" +#include "ast/rewriter/seq_derive.h" #include "ast/ast_pp.h" #include "ast/arith_decl_plugin.h" #include "ast/rewriter/rewriter_types.h" @@ -191,17 +191,6 @@ class seq_rewriter { bool check_deriv_normal_form(expr* r, int level = 3); #endif - void mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result); - - expr_ref mk_antimirov_deriv(expr* e, expr* r, expr* path); - expr_ref mk_in_antimirov_rec(expr* s, expr* d); - expr_ref mk_in_antimirov(expr* s, expr* d); - - expr_ref mk_antimirov_deriv_intersection(expr* elem, expr* d1, expr* d2, expr* path); - expr_ref mk_antimirov_deriv_concat(expr* d, expr* r); - expr_ref mk_antimirov_deriv_negate(expr* elem, expr* d); - expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2); - expr_ref mk_antimirov_deriv_restrict(expr* elem, expr* d1, expr* cond); expr_ref mk_regex_reverse(expr* r); expr_ref mk_regex_concat(expr* r1, expr* r2); From 9aca2edcfc987c5cb6d3a44fcc653b927fa86e64 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 3 Jun 2026 11:32:32 -0700 Subject: [PATCH 04/32] updates per PR comments --- src/ast/rewriter/seq_derive.cpp | 85 +++++---------------------------- src/ast/rewriter/seq_derive.h | 1 - 2 files changed, 12 insertions(+), 74 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index a4c598948..5e7a82ced 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -25,6 +25,7 @@ Authors: #include "ast/ast_pp.h" #include "ast/array_decl_plugin.h" #include "ast/rewriter/bool_rewriter.h" +#include "util/util.h" #include namespace seq { @@ -80,9 +81,8 @@ namespace seq { return expr_ref(re().mk_derivative(m_ele, r), m); } - ++m_depth; + flet _scoped_depth(m_depth, m_depth + 1); expr_ref result = derive_core(r); - --m_depth; // Cache the result m_cache.insert(r, result); @@ -304,64 +304,10 @@ namespace seq { return expr_ref(m.mk_true(), m); if (nb == l_false) return expr_ref(m.mk_false(), m); - // info is undetermined (l_undef) — fall back to recursive computation - return is_nullable_rec(r); - } - - expr_ref derive::is_nullable_rec(expr* r) { - expr* r1 = nullptr, * r2 = nullptr, * cond = nullptr; + // For symbolic regexes, return a membership predicate sort* s = nullptr; - unsigned lo = 0, hi = 0; - - if (re().is_concat(r, r1, r2) || re().is_intersection(r, r1, r2)) { - expr_ref n1 = is_nullable(r1); - expr_ref n2 = is_nullable(r2); - expr_ref result(m); - m_br.mk_and(n1, n2, result); - return result; - } - if (re().is_union(r, r1, r2)) { - expr_ref n1 = is_nullable(r1); - expr_ref n2 = is_nullable(r2); - expr_ref result(m); - m_br.mk_or(n1, n2, result); - return result; - } - if (re().is_complement(r, r1)) { - expr_ref n1 = is_nullable(r1); - expr_ref result(m); - m_br.mk_not(n1, result); - return result; - } - if (re().is_diff(r, r1, r2)) { - expr_ref n1 = is_nullable(r1); - expr_ref n2 = is_nullable(r2); - expr_ref not_n2(m); - m_br.mk_not(n2, not_n2); - expr_ref result(m); - m_br.mk_and(n1, not_n2, result); - return result; - } - if (re().is_to_re(r, r1)) { - if (u().str.is_empty(r1)) - return expr_ref(m.mk_true(), m); - zstring zs; - if (u().str.is_string(r1, zs)) - return expr_ref(m.mk_bool_val(zs.length() == 0), m); - return expr_ref(m.mk_eq(r1, u().str.mk_empty(r1->get_sort())), m); - } - if (m.is_ite(r, cond, r1, r2)) { - expr_ref n1 = is_nullable(r1); - expr_ref n2 = is_nullable(r2); - expr_ref result(m); - m_br.mk_ite(cond, n1, n2, result); - return result; - } - // Unknown: use membership test - if (m_util.is_re(r, s)) - return expr_ref(re().mk_in_re(u().str.mk_empty(s), r), m); - - return expr_ref(m.mk_true(), m); + VERIFY(m_util.is_re(r, s)); + return expr_ref(re().mk_in_re(u().str.mk_empty(s), r), m); } // ------------------------------------------------------- @@ -552,8 +498,6 @@ namespace seq { expr_ref derive::mk_union_from_sorted(expr_ref_vector& args) { if (args.empty()) { - // All elements were identity/absorbed - should not happen in practice - // but handle gracefully UNREACHABLE(); return expr_ref(m.mk_true(), m); } @@ -561,10 +505,8 @@ namespace seq { return expr_ref(args.get(0), m); // Build right-associated union expr_ref result(args.back(), m); - for (unsigned i = args.size() - 1; i > 0; ) { - --i; + for (unsigned i = args.size() - 1; i-- > 0; ) result = expr_ref(re().mk_union(args.get(i), result), m); - } return result; } @@ -577,10 +519,8 @@ namespace seq { return expr_ref(args.get(0), m); // Build right-associated intersection expr_ref result(args.back(), m); - for (unsigned i = args.size() - 1; i > 0; ) { - --i; + for (unsigned i = args.size() - 1; i-- > 0; ) result = expr_ref(re().mk_inter(args.get(i), result), m); - } return result; } @@ -591,29 +531,28 @@ namespace seq { expr_ref derive::ite_combine_binary(expr* d1, expr* d2, std::function const& op) { expr *c1, *t1, *e1, *c2, *t2, *e2; + bool is_ite1 = m.is_ite(d1, c1, t1, e1); + bool is_ite2 = m.is_ite(d2, c2, t2, e2); // Both are leaves (non-ITE) - if (!m.is_ite(d1, c1, t1, e1) && !m.is_ite(d2, c2, t2, e2)) + if (!is_ite1 && !is_ite2) return op(d1, d2); // d1 is ITE, d2 is not - if (m.is_ite(d1, c1, t1, e1) && !m.is_ite(d2, c2, t2, e2)) { + if (is_ite1 && !is_ite2) { expr_ref then_r = ite_combine_binary(t1, d2, op); expr_ref else_r = ite_combine_binary(e1, d2, op); return mk_ite(c1, then_r, else_r); } // d2 is ITE, d1 is not - if (!m.is_ite(d1, c1, t1, e1) && m.is_ite(d2, c2, t2, e2)) { + if (!is_ite1 && is_ite2) { expr_ref then_r = ite_combine_binary(d1, t2, op); expr_ref else_r = ite_combine_binary(d1, e2, op); return mk_ite(c2, then_r, else_r); } // Both are ITE - VERIFY(m.is_ite(d1, c1, t1, e1)); - VERIFY(m.is_ite(d2, c2, t2, e2)); - if (c1 == c2) { // Same condition: combine pairwise expr_ref then_r = ite_combine_binary(t1, t2, op); diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index b093366cc..27a7819c5 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -78,7 +78,6 @@ namespace seq { // Nullable check: returns a Boolean expression expr_ref is_nullable(expr* r); - expr_ref is_nullable_rec(expr* r); // Smart constructors with simplification and ACI canonicalization expr_ref mk_union(expr* a, expr* b); From 7dc25e73d545dcfd1d4b077d01ca08402b93b9d5 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 3 Jun 2026 11:41:37 -0700 Subject: [PATCH 05/32] make reset private --- src/ast/rewriter/seq_derive.cpp | 2 ++ src/ast/rewriter/seq_derive.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 5e7a82ced..cce202f92 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -47,6 +47,8 @@ namespace seq { expr_ref derive::operator()(expr* ele, expr* r) { SASSERT(m_util.is_re(r)); + if (m_trail.size() > 1000) + reset(); m_ele = ele; m_depth = 0; expr_ref result = derive_rec(r); diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 27a7819c5..183fc8ae4 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -107,6 +107,8 @@ namespace seq { sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; } sort* ele_sort(expr* r) { sort* s = seq_sort(r); sort* e = nullptr; m_util.is_seq(s, e); return e; } + void reset(); + public: derive(ast_manager& m); @@ -121,8 +123,6 @@ namespace seq { * Convenience: symbolic derivative using de Bruijn var 0. */ expr_ref operator()(expr* r); - - void reset(); }; } From f8925ca6fadbec93a4259cb66c896ba10bb42061 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 3 Jun 2026 14:25:03 -0700 Subject: [PATCH 06/32] Add simplify_ite_rec and eval for two-phase derivative - Add simplify_ite post-processing in operator() to simplify ITE conditions - Add simplify_ite_rec(cond, sign, r) for propagating condition truth values - Handles c == cond, x=ch1 vs x=ch2 with different constants - Add eval(ele, d) for efficient two-phase: symbolic derivative + concrete eval - mk_derivative uses two-phase pattern: m_derive(r) then m_derive.eval(ele, d) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 137 ++++++++++++++++++++++++++++++ src/ast/rewriter/seq_derive.h | 10 +++ src/ast/rewriter/seq_rewriter.cpp | 4 +- 3 files changed, 150 insertions(+), 1 deletion(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index cce202f92..a8724a646 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -52,6 +52,7 @@ namespace seq { m_ele = ele; m_depth = 0; expr_ref result = derive_rec(r); + result = simplify_ite(result); m_ele = nullptr; return result; } @@ -65,6 +66,14 @@ namespace seq { return (*this)(v, r); } + expr_ref derive::eval(expr* ele, expr* d) { + expr_ref old_ele(m_ele, m); + m_ele = ele; + expr_ref result = simplify_ite(d); + m_ele = old_ele; + return result; + } + // ------------------------------------------------------- // Core derivative computation // ------------------------------------------------------- @@ -469,6 +478,9 @@ namespace seq { return expr_ref(t, m); if (m.is_false(c)) return expr_ref(e, m); + bool cond_val; + if (eval_cond(c, cond_val)) + return cond_val ? expr_ref(t, m) : expr_ref(e, m); return expr_ref(m.mk_ite(c, t, e), m); } @@ -614,6 +626,131 @@ namespace seq { return mk_concat(d, tail); } + // ------------------------------------------------------- + // Post-processing: simplify ITE conditions w.r.t. m_ele + // ------------------------------------------------------- + + bool derive::eval_cond(expr* cond, bool& result) { + expr* lhs = nullptr, * rhs = nullptr, * e1 = nullptr; + unsigned ch1 = 0, ch2 = 0; + + if (m.is_true(cond)) { result = true; return true; } + if (m.is_false(cond)) { result = false; return true; } + + // elem = char or char = elem + if (m.is_eq(cond, lhs, rhs)) { + if (rhs == m_ele) std::swap(lhs, rhs); + if (lhs == m_ele && u().is_const_char(rhs, ch1) && u().is_const_char(m_ele, ch2)) { + result = (ch1 == ch2); + return true; + } + if (lhs == rhs) { result = true; return true; } + } + + // char_le(lhs, rhs) + if (u().is_char_le(cond, lhs, rhs)) { + unsigned vl = 0, vr = 0; + if (lhs == m_ele && u().is_const_char(m_ele, vl) && u().is_const_char(rhs, vr)) { + result = (vl <= vr); return true; + } + if (rhs == m_ele && u().is_const_char(lhs, vl) && u().is_const_char(m_ele, vr)) { + result = (vl <= vr); return true; + } + if (u().is_const_char(lhs, vl) && u().is_const_char(rhs, vr)) { + result = (vl <= vr); return true; + } + } + + // not(e1) + if (m.is_not(cond, e1)) { + bool inner; + if (eval_cond(e1, inner)) { + result = !inner; + return true; + } + } + + // and(...) + if (m.is_and(cond)) { + for (expr* arg : *to_app(cond)) { + bool v; + if (eval_cond(arg, v)) { + if (!v) { result = false; return true; } + } else { + return false; + } + } + result = true; + return true; + } + + // or(...) + if (m.is_or(cond)) { + for (expr* arg : *to_app(cond)) { + bool v; + if (eval_cond(arg, v)) { + if (v) { result = true; return true; } + } else { + return false; + } + } + result = false; + return true; + } + + return false; + } + + expr_ref derive::simplify_ite(expr* d) { + expr* c, * t, * e; + if (!m.is_ite(d, c, t, e)) + return expr_ref(d, m); + + bool cond_val; + if (eval_cond(c, cond_val)) + return simplify_ite(cond_val ? t : e); + + // Simplify branches with knowledge of the condition's truth value + expr_ref st = simplify_ite_rec(c, false, t); + expr_ref se = simplify_ite_rec(c, true, e); + return mk_ite(c, st, se); + } + + expr_ref derive::simplify_ite_rec(expr* cond, bool sign, expr* d) { + expr* c, * t, * e; + if (!m.is_ite(d, c, t, e)) + return expr_ref(d, m); + + // If the ITE condition matches cond directly + if (c == cond) + return sign ? simplify_ite(e) : simplify_ite(t); + + // If cond is (x = ch1) and c is (x = ch2) with ch1 != ch2: + // when sign is false (cond is true, i.e., x = ch1), then c must be false + expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr; + if (!sign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) { + if (u().is_const_char(lhs1)) std::swap(lhs1, rhs1); + if (u().is_const_char(lhs2)) std::swap(lhs2, rhs2); + unsigned ch1 = 0, ch2 = 0; + if (lhs1 == lhs2 && u().is_const_char(rhs1, ch1) && u().is_const_char(rhs2, ch2) && ch1 != ch2) + return simplify_ite_rec(cond, sign, e); + } + + // General case: try to evaluate c given knowledge of cond + bool cond_val; + if (eval_cond(c, cond_val)) + return simplify_ite_rec(cond, sign, cond_val ? t : e); + + // Cannot simplify c: recurse into branches + expr_ref st = simplify_ite_rec(cond, sign, t); + expr_ref se = simplify_ite_rec(cond, sign, e); + + // Now also simplify c's branches with knowledge of c + st = simplify_ite_rec(c, false, st); + se = simplify_ite_rec(c, true, se); + return mk_ite(c, st, se); + } + } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 183fc8ae4..eb5af296b 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -103,6 +103,11 @@ namespace seq { // Distribute concatenation through ITE/union in derivative expr_ref mk_deriv_concat(expr* d, expr* tail); + // Simplify ITE conditions w.r.t. m_ele + expr_ref simplify_ite(expr* d); + expr_ref simplify_ite_rec(expr* cond, bool sign, expr* d); + bool eval_cond(expr* cond, bool& result); + sort* re_sort(expr* r) { return r->get_sort(); } sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; } sort* ele_sort(expr* r) { sort* s = seq_sort(r); sort* e = nullptr; m_util.is_seq(s, e); return e; } @@ -123,6 +128,11 @@ namespace seq { * Convenience: symbolic derivative using de Bruijn var 0. */ expr_ref operator()(expr* r); + + /** + * Evaluate an ITE-tree derivative for a concrete element. + */ + expr_ref eval(expr* ele, expr* d); }; } diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 212f97a8a..03fdfc5d4 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2918,7 +2918,9 @@ expr_ref seq_rewriter::mk_derivative(expr* r) { } expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - return m_derive(ele, r); + // Compute symbolic derivative (cached per regex), then evaluate for concrete element + expr_ref d = m_derive(r); + return m_derive.eval(ele, d); } From a77155a5c47c4c526cd8fdd4f507226a6d727d93 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 3 Jun 2026 15:29:30 -0700 Subject: [PATCH 07/32] Port reverse normalization into derive class Instead of treating reverse(r) as stuck (returning symbolic mk_derivative), normalize it by pushing reverse inward through the regex structure, then compute the derivative of the normalized result. Mirrors mk_re_reverse logic. Handles: concat, union, intersection, diff, ite, opt, complement, star, plus, loop, to_re (string literals, units, concats), and symmetric cases. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 101 +++++++++++++++++++++++++++++++- src/ast/rewriter/seq_derive.h | 3 + 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index a8724a646..aa94968d0 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -224,9 +224,13 @@ namespace seq { return mk_ite(cond, d1, d2); } - // δ(reverse(r1)) - stuck: return symbolic derivative - if (re().is_reverse(r, r1)) + // δ(reverse(r1)) - normalize by pushing reverse inward, then derive + if (re().is_reverse(r, r1)) { + expr_ref norm = normalize_reverse(r1); + if (norm) + return derive_rec(norm); return expr_ref(re().mk_derivative(m_ele, r), m); + } // Stuck/uninterpreted case return expr_ref(re().mk_derivative(m_ele, r), m); @@ -304,6 +308,99 @@ namespace seq { return mk_ite(cond, eps, empty); } + // ------------------------------------------------------- + // Normalize reverse by pushing it inward + // ------------------------------------------------------- + + expr_ref derive::normalize_reverse(expr* r) { + expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * p = nullptr; + unsigned lo = 0, hi = 0; + zstring zs; + + // reverse(reverse(r1)) = r1 + if (re().is_reverse(r, r1)) + return expr_ref(r1, m); + + // reverse(r1 · r2) = reverse(r2) · reverse(r1) + if (re().is_concat(r, r1, r2)) { + expr_ref a(re().mk_reverse(r2), m); + expr_ref b(re().mk_reverse(r1), m); + return expr_ref(re().mk_concat(a, b), m); + } + + // reverse(r1 ∪ r2) = reverse(r1) ∪ reverse(r2) + if (re().is_union(r, r1, r2)) { + expr_ref a(re().mk_reverse(r1), m); + expr_ref b(re().mk_reverse(r2), m); + return expr_ref(re().mk_union(a, b), m); + } + + // reverse(r1 ∩ r2) = reverse(r1) ∩ reverse(r2) + if (re().is_intersection(r, r1, r2)) { + expr_ref a(re().mk_reverse(r1), m); + expr_ref b(re().mk_reverse(r2), m); + return expr_ref(re().mk_inter(a, b), m); + } + + // reverse(r1 \ r2) = reverse(r1) \ reverse(r2) + if (re().is_diff(r, r1, r2)) { + expr_ref a(re().mk_reverse(r1), m); + expr_ref b(re().mk_reverse(r2), m); + return expr_ref(re().mk_diff(a, b), m); + } + + // reverse(ite(c, r1, r2)) = ite(c, reverse(r1), reverse(r2)) + if (m.is_ite(r, p, r1, r2)) + return expr_ref(m.mk_ite(p, re().mk_reverse(r1), re().mk_reverse(r2)), m); + + // reverse(r1?) = reverse(r1)? + if (re().is_opt(r, r1)) + return expr_ref(re().mk_opt(re().mk_reverse(r1)), m); + + // reverse(~r1) = ~reverse(r1) + if (re().is_complement(r, r1)) + return expr_ref(re().mk_complement(re().mk_reverse(r1)), m); + + // reverse(r1*) = reverse(r1)* + if (re().is_star(r, r1)) + return expr_ref(re().mk_star(re().mk_reverse(r1)), m); + + // reverse(r1+) = reverse(r1)+ + if (re().is_plus(r, r1)) + return expr_ref(re().mk_plus(re().mk_reverse(r1)), m); + + // reverse(r1{lo,}) = reverse(r1){lo,} + if (re().is_loop(r, r1, lo)) + return expr_ref(re().mk_loop(re().mk_reverse(r1), lo), m); + + // reverse(r1{lo,hi}) = reverse(r1){lo,hi} + if (re().is_loop(r, r1, lo, hi)) + return expr_ref(re().mk_loop_proper(re().mk_reverse(r1), lo, hi), m); + + // Symmetric: full_seq, empty, range, full_char, of_pred + if (re().is_full_seq(r) || re().is_empty(r) || re().is_range(r) || + re().is_full_char(r) || re().is_of_pred(r)) + return expr_ref(r, m); + + // reverse(to_re(s)) where s is a string literal + if (re().is_to_re(r, s) && u().str.is_string(s, zs)) + return expr_ref(re().mk_to_re(u().str.mk_string(zs.reverse())), m); + + // reverse(to_re(unit)) = to_re(unit) + if (re().is_to_re(r, s) && u().str.is_unit(s)) + return expr_ref(r, m); + + // reverse(to_re(s1 ++ s2)) = reverse(to_re(s2)) · reverse(to_re(s1)) + if (re().is_to_re(r, s) && u().str.is_concat(s, r1, r2)) { + expr_ref a(re().mk_reverse(re().mk_to_re(r2)), m); + expr_ref b(re().mk_reverse(re().mk_to_re(r1)), m); + return expr_ref(re().mk_concat(a, b), m); + } + + // Stuck — cannot normalize further + return expr_ref(nullptr, m); + } + // ------------------------------------------------------- // Nullability - uses info class from seq_decl_plugin.h // ------------------------------------------------------- diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index eb5af296b..3b5369db5 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -103,6 +103,9 @@ namespace seq { // Distribute concatenation through ITE/union in derivative expr_ref mk_deriv_concat(expr* d, expr* tail); + // Normalize reverse(r) by pushing reverse inward + expr_ref normalize_reverse(expr* r); + // Simplify ITE conditions w.r.t. m_ele expr_ref simplify_ite(expr* d); expr_ref simplify_ite_rec(expr* cond, bool sign, expr* d); From 3afd83103a6fe48faff5663459bdd0e1563291f3 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 3 Jun 2026 17:16:23 -0700 Subject: [PATCH 08/32] Address PR review comments: cache, simplify_ite_rec, itos - Cache now indexes by (ele, r) pair using obj_pair_map - Remove eval() function; operator()(ele, r) handles all cases - Rewrite simplify_ite_rec with path vector of signed conditions - Add range-based simplification: (lo <= x, false) + (x <= hi, false) eliminates ite(x = v, t, e) when v is outside [lo, hi] - Add is_itos case in derive_to_re: guards on n >= 0, digit range, and first character match - Port is_reverse normalization (previous commit) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 166 +++++++++++++++++++++++------- src/ast/rewriter/seq_derive.h | 17 ++- src/ast/rewriter/seq_rewriter.cpp | 4 +- 3 files changed, 140 insertions(+), 47 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index aa94968d0..84d0c2e89 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -66,14 +66,6 @@ namespace seq { return (*this)(v, r); } - expr_ref derive::eval(expr* ele, expr* d) { - expr_ref old_ele(m_ele, m); - m_ele = ele; - expr_ref result = simplify_ite(d); - m_ele = old_ele; - return result; - } - // ------------------------------------------------------- // Core derivative computation // ------------------------------------------------------- @@ -81,9 +73,9 @@ namespace seq { expr_ref derive::derive_rec(expr* r) { SASSERT(m_util.is_re(r)); - // Check cache + // Check cache (indexed by both m_ele and r) expr* cached = nullptr; - if (m_cache.find(r, cached)) + if (m_cache.find(m_ele, r, cached)) return expr_ref(cached, m); // Depth check @@ -96,7 +88,8 @@ namespace seq { expr_ref result = derive_core(r); // Cache the result - m_cache.insert(r, result); + m_cache.insert(m_ele, r, result); + m_trail.push_back(m_ele); m_trail.push_back(r); m_trail.push_back(result); return result; @@ -261,6 +254,33 @@ namespace seq { return mk_ite(cond, tail_re, empty); } + // δ(to_re(itos(n))) - derivative of integer-to-string + // itos(n) produces digits '0'-'9' when n >= 0, empty when n < 0 + expr* n = nullptr; + if (u().str.is_itos(s, n)) { + expr_ref empty(re().mk_empty(re_sort), m); + // Guard: n >= 0 and element is a digit and element = s[0] + expr_ref n_ge_0(m_autil.mk_ge(n, m_autil.mk_int(0)), m); + expr_ref char_0(m_util.mk_char('0'), m); + expr_ref char_9(m_util.mk_char('9'), m); + expr_ref ge_0(m_util.mk_le(char_0, m_ele), m); + expr_ref le_9(m_util.mk_le(m_ele, char_9), m); + expr_ref is_digit(m.mk_and(ge_0, le_9), m); + // First character of itos(n) matches ele + expr_ref zero_idx(m_autil.mk_int(0), m); + expr_ref first(u().str.mk_nth_i(s, zero_idx), m); + expr_ref eq_first(m.mk_eq(m_ele, first), m); + // Guard = n >= 0 && is_digit && ele = s[0] + expr_ref guard(m.mk_and(n_ge_0, m.mk_and(is_digit, eq_first)), m); + // Tail: to_re(substr(itos(n), 1, len(itos(n)) - 1)) + expr_ref one(m_autil.mk_int(1), m); + expr_ref len(u().str.mk_length(s), m); + expr_ref rest_len(m_autil.mk_sub(len, one), m); + expr_ref rest(u().str.mk_substr(s, one, rest_len), m); + expr_ref rest_re(re().mk_to_re(rest), m); + return mk_ite(guard, rest_re, empty); + } + // Non-ground sequence: δ(to_re(s)) = ite(s ≠ "" ∧ ele = s[0], to_re(s[1:]), ∅) expr_ref empty_seq(u().str.mk_empty(seq_sort), m); expr_ref is_non_empty(m.mk_not(m.mk_eq(s, empty_seq)), m); @@ -807,44 +827,120 @@ namespace seq { if (eval_cond(c, cond_val)) return simplify_ite(cond_val ? t : e); - // Simplify branches with knowledge of the condition's truth value - expr_ref st = simplify_ite_rec(c, false, t); - expr_ref se = simplify_ite_rec(c, true, e); + // Extract signed conditions from c for the true-branch path + path_t path_t_branch; + if (m.is_and(c)) { + for (expr* arg : *to_app(c)) + path_t_branch.push_back({ arg, false }); + } else { + path_t_branch.push_back({ c, false }); + } + + // Simplify the true branch under path knowledge + expr_ref st = simplify_ite_rec(path_t_branch, t); + + // For the else branch, the whole condition is false + path_t path_e_branch; + path_e_branch.push_back({ c, true }); + expr_ref se = simplify_ite_rec(path_e_branch, e); + return mk_ite(c, st, se); } - expr_ref derive::simplify_ite_rec(expr* cond, bool sign, expr* d) { + expr_ref derive::simplify_ite_rec(path_t& path, expr* d) { expr* c, * t, * e; if (!m.is_ite(d, c, t, e)) return expr_ref(d, m); - // If the ITE condition matches cond directly - if (c == cond) - return sign ? simplify_ite(e) : simplify_ite(t); + // Check if c can be determined from the path + for (auto const& [cond, sign] : path) { + // Direct match: c == cond + if (c == cond) + return sign ? simplify_ite_rec(path, e) : simplify_ite_rec(path, t); - // If cond is (x = ch1) and c is (x = ch2) with ch1 != ch2: - // when sign is false (cond is true, i.e., x = ch1), then c must be false - expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr; - if (!sign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) { - if (u().is_const_char(lhs1)) std::swap(lhs1, rhs1); - if (u().is_const_char(lhs2)) std::swap(lhs2, rhs2); - unsigned ch1 = 0, ch2 = 0; - if (lhs1 == lhs2 && u().is_const_char(rhs1, ch1) && u().is_const_char(rhs2, ch2) && ch1 != ch2) - return simplify_ite_rec(cond, sign, e); + // c is (x = v), cond is (x = w) with sign=false (cond is true, so x=w) + // If v != w, then c is false → take else branch + expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr; + if (!sign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) { + if (m_util.is_const_char(lhs1)) std::swap(lhs1, rhs1); + if (m_util.is_const_char(lhs2)) std::swap(lhs2, rhs2); + if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2)) + return simplify_ite_rec(path, e); + } + + // Range constraint: cond is (lo <= x) or (x <= hi) with sign=false + // and c is (x = v). If v is outside the range, c is false. + unsigned v_val = 0, lo_val = 0, hi_val = 0; + if (!sign && m.is_eq(c, lhs2, rhs2)) { + if (m_util.is_const_char(lhs2)) std::swap(lhs2, rhs2); + if (m_util.is_const_char(rhs2, v_val)) { + // Check if cond is (lo <= x) where x == lhs2 + expr* le_lhs = nullptr, * le_rhs = nullptr; + if (m_util.is_char_le(cond, le_lhs, le_rhs) && le_rhs == lhs2 && + m_util.is_const_char(le_lhs, lo_val) && v_val < lo_val) + return simplify_ite_rec(path, e); + // Check if cond is (x <= hi) where x == lhs2 + if (m_util.is_char_le(cond, le_lhs, le_rhs) && le_lhs == lhs2 && + m_util.is_const_char(le_rhs, hi_val) && v_val > hi_val) + return simplify_ite_rec(path, e); + } + } } - // General case: try to evaluate c given knowledge of cond + // Check if both range bounds are in path and c is (x = v) within range + expr* lhs_c = nullptr, * rhs_c = nullptr; + unsigned v_val = 0; + if (m.is_eq(c, lhs_c, rhs_c)) { + if (m_util.is_const_char(lhs_c)) std::swap(lhs_c, rhs_c); + if (m_util.is_const_char(rhs_c, v_val)) { + unsigned lo_bound = 0, hi_bound = UINT_MAX; + bool has_lo = false, has_hi = false; + for (auto const& [cond, sign] : path) { + if (sign) continue; // only use true conditions + expr* le_lhs = nullptr, * le_rhs = nullptr; + if (m_util.is_char_le(cond, le_lhs, le_rhs)) { + unsigned bound = 0; + if (le_rhs == lhs_c && m_util.is_const_char(le_lhs, bound)) { + lo_bound = bound; has_lo = true; + } + if (le_lhs == lhs_c && m_util.is_const_char(le_rhs, bound)) { + hi_bound = bound; has_hi = true; + } + } + } + if (has_lo && has_hi && lo_bound <= v_val && v_val <= hi_bound) { + // v is in range [lo, hi], so c is satisfiable + // Add (x = v, false) to path and simplify t + path.push_back({ c, false }); + expr_ref st = simplify_ite_rec(path, t); + path.pop_back(); + expr_ref se = simplify_ite_rec(path, e); + return mk_ite(c, st, se); + } + } + } + + // Try to evaluate c directly bool cond_val; if (eval_cond(c, cond_val)) - return simplify_ite_rec(cond, sign, cond_val ? t : e); + return simplify_ite_rec(path, cond_val ? t : e); - // Cannot simplify c: recurse into branches - expr_ref st = simplify_ite_rec(cond, sign, t); - expr_ref se = simplify_ite_rec(cond, sign, e); + // Cannot simplify c: recurse into branches with extended paths + // True branch: add conjuncts of c + path_t extended_path(path); + if (m.is_and(c)) { + for (expr* arg : *to_app(c)) + extended_path.push_back({ arg, false }); + } else { + extended_path.push_back({ c, false }); + } + expr_ref st = simplify_ite_rec(extended_path, t); + + // Else branch: add (c, true) + path.push_back({ c, true }); + expr_ref se = simplify_ite_rec(path, e); + path.pop_back(); - // Now also simplify c's branches with knowledge of c - st = simplify_ite_rec(c, false, st); - se = simplify_ite_rec(c, true, se); return mk_ite(c, st, se); } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 3b5369db5..00a9c6318 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -28,6 +28,7 @@ Authors: #include "ast/arith_decl_plugin.h" #include "ast/array_decl_plugin.h" #include "ast/rewriter/bool_rewriter.h" +#include "util/obj_pair_hashtable.h" namespace seq { @@ -53,8 +54,8 @@ namespace seq { arith_util m_autil; bool_rewriter m_br; - // Cache: maps regex expr to its symbolic derivative - obj_map m_cache; + // Cache: maps (ele, regex) pair to its derivative + obj_pair_map m_cache; expr_ref_vector m_trail; // pin cached results // Depth limiting @@ -106,9 +107,12 @@ namespace seq { // Normalize reverse(r) by pushing reverse inward expr_ref normalize_reverse(expr* r); - // Simplify ITE conditions w.r.t. m_ele + // Path of signed conditions for ITE simplification + using path_t = svector>; + + // Simplify ITE conditions w.r.t. m_ele and path knowledge expr_ref simplify_ite(expr* d); - expr_ref simplify_ite_rec(expr* cond, bool sign, expr* d); + expr_ref simplify_ite_rec(path_t& path, expr* d); bool eval_cond(expr* cond, bool& result); sort* re_sort(expr* r) { return r->get_sort(); } @@ -131,11 +135,6 @@ namespace seq { * Convenience: symbolic derivative using de Bruijn var 0. */ expr_ref operator()(expr* r); - - /** - * Evaluate an ITE-tree derivative for a concrete element. - */ - expr_ref eval(expr* ele, expr* d); }; } diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 03fdfc5d4..212f97a8a 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2918,9 +2918,7 @@ expr_ref seq_rewriter::mk_derivative(expr* r) { } expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - // Compute symbolic derivative (cached per regex), then evaluate for concrete element - expr_ref d = m_derive(r); - return m_derive.eval(ele, d); + return m_derive(ele, r); } From ca238a9107629e710c7f3f26ece4e22af9a2cca1 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 4 Jun 2026 07:45:19 -0700 Subject: [PATCH 09/32] Address PR review: subsumption, is_value, simplify_ite fixes - Add lightweight structural is_subset for union/inter simplification - Use m.is_value instead of is_const_char for swap checks - Move eval_cond to beginning of simplify_ite_rec - Use path.shrink(sz) instead of copying extended_path - Fix normalize_reverse stuck case to return mk_reverse(r) - Expose subsumes() in public API Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 79 ++++++++++++++++++++++++++------- src/ast/rewriter/seq_derive.h | 9 ++++ 2 files changed, 73 insertions(+), 15 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 84d0c2e89..118302ed2 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -220,7 +220,7 @@ namespace seq { // δ(reverse(r1)) - normalize by pushing reverse inward, then derive if (re().is_reverse(r, r1)) { expr_ref norm = normalize_reverse(r1); - if (norm) + if (norm != r) return derive_rec(norm); return expr_ref(re().mk_derivative(m_ele, r), m); } @@ -418,7 +418,7 @@ namespace seq { } // Stuck — cannot normalize further - return expr_ref(nullptr, m); + return expr_ref(re().mk_reverse(r), m); } // ------------------------------------------------------- @@ -442,6 +442,46 @@ namespace seq { // Smart constructors with simplification // ------------------------------------------------------- + // Lightweight structural subsumption: checks if L(a) ⊆ L(b) + // Returns true only when subsumption can be determined structurally. + bool derive::is_subset(expr* a, expr* b) { + if (a == b) return true; + if (re().is_empty(a)) return true; + if (re().is_full_seq(b)) return true; + + // a ⊆ a* (since a* accepts everything a does and more) + expr* b1 = nullptr; + if (re().is_star(b, b1) && a == b1) return true; + + // a* ⊆ b* if a ⊆ b + expr* a1 = nullptr; + if (re().is_star(a, a1) && re().is_star(b, b1) && is_subset(a1, b1)) return true; + + // a ⊆ b1 ∪ b2 if a ⊆ b1 or a ⊆ b2 + if (re().is_union(b, b1, a1)) { + if (is_subset(a, b1) || is_subset(a, a1)) return true; + } + + // a1 ∩ a2 ⊆ b if a1 ⊆ b or a2 ⊆ b + if (re().is_intersection(a, a1, b1)) { + if (is_subset(a1, b) || is_subset(b1, b)) return true; + } + + // concat subsumption: a1·a2 ⊆ b1·b2 when a1 ⊆ b1 and a2 ⊆ b2 + expr* a2 = nullptr, * b2 = nullptr; + if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && + is_subset(a1, b1) && is_subset(a2, b2)) + return true; + + // loop subsumption: r{la,ua} ⊆ r{lb,ub} when lb <= la and ua <= ub + unsigned la, ua, lb, ub; + if (re().is_loop(a, a1, la, ua) && re().is_loop(b, b1, lb, ub) && + a1 == b1 && lb <= la && ua <= ub) + return true; + + return false; + } + expr_ref derive::mk_union(expr* a, expr* b) { // Identity / annihilator if (a == b) return expr_ref(a, m); @@ -457,6 +497,10 @@ namespace seq { if (re().is_complement(b, c) && c == a) return expr_ref(re().mk_full_seq(a->get_sort()), m); + // Subsumption: a ∪ b = b if a ⊆ b, a ∪ b = a if b ⊆ a + if (is_subset(a, b)) return expr_ref(b, m); + if (is_subset(b, a)) return expr_ref(a, m); + // ITE combination: if both are ITE with same condition, merge expr *c1, *t1, *e1, *c2, *t2, *e2; if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { @@ -508,6 +552,10 @@ namespace seq { if (re().is_complement(b, c) && c == a) return expr_ref(re().mk_empty(a->get_sort()), m); + // Subsumption: a ∩ b = a if a ⊆ b, a ∩ b = b if b ⊆ a + if (is_subset(a, b)) return expr_ref(a, m); + if (is_subset(b, a)) return expr_ref(b, m); + // ITE combination: if both are ITE with same condition, merge expr *c1, *t1, *e1, *c2, *t2, *e2; if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { @@ -852,6 +900,11 @@ namespace seq { if (!m.is_ite(d, c, t, e)) return expr_ref(d, m); + // Try to evaluate c directly + bool cond_val; + if (eval_cond(c, cond_val)) + return simplify_ite_rec(path, cond_val ? t : e); + // Check if c can be determined from the path for (auto const& [cond, sign] : path) { // Direct match: c == cond @@ -862,8 +915,8 @@ namespace seq { // If v != w, then c is false → take else branch expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr; if (!sign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) { - if (m_util.is_const_char(lhs1)) std::swap(lhs1, rhs1); - if (m_util.is_const_char(lhs2)) std::swap(lhs2, rhs2); + if (m.is_value(lhs1)) std::swap(lhs1, rhs1); + if (m.is_value(lhs2)) std::swap(lhs2, rhs2); if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2)) return simplify_ite_rec(path, e); } @@ -872,7 +925,7 @@ namespace seq { // and c is (x = v). If v is outside the range, c is false. unsigned v_val = 0, lo_val = 0, hi_val = 0; if (!sign && m.is_eq(c, lhs2, rhs2)) { - if (m_util.is_const_char(lhs2)) std::swap(lhs2, rhs2); + if (m.is_value(lhs2)) std::swap(lhs2, rhs2); if (m_util.is_const_char(rhs2, v_val)) { // Check if cond is (lo <= x) where x == lhs2 expr* le_lhs = nullptr, * le_rhs = nullptr; @@ -891,7 +944,7 @@ namespace seq { expr* lhs_c = nullptr, * rhs_c = nullptr; unsigned v_val = 0; if (m.is_eq(c, lhs_c, rhs_c)) { - if (m_util.is_const_char(lhs_c)) std::swap(lhs_c, rhs_c); + if (m.is_value(lhs_c)) std::swap(lhs_c, rhs_c); if (m_util.is_const_char(rhs_c, v_val)) { unsigned lo_bound = 0, hi_bound = UINT_MAX; bool has_lo = false, has_hi = false; @@ -920,21 +973,17 @@ namespace seq { } } - // Try to evaluate c directly - bool cond_val; - if (eval_cond(c, cond_val)) - return simplify_ite_rec(path, cond_val ? t : e); - // Cannot simplify c: recurse into branches with extended paths // True branch: add conjuncts of c - path_t extended_path(path); + auto sz = path.size(); if (m.is_and(c)) { for (expr* arg : *to_app(c)) - extended_path.push_back({ arg, false }); + path.push_back({ arg, false }); } else { - extended_path.push_back({ c, false }); + path.push_back({ c, false }); } - expr_ref st = simplify_ite_rec(extended_path, t); + expr_ref st = simplify_ite_rec(path, t); + path.shrink(sz); // Else branch: add (c, true) path.push_back({ c, true }); diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 00a9c6318..936dd1236 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -104,6 +104,9 @@ namespace seq { // Distribute concatenation through ITE/union in derivative expr_ref mk_deriv_concat(expr* d, expr* tail); + // Lightweight subsumption check: returns true if L(a) ⊆ L(b) + bool is_subset(expr* a, expr* b); + // Normalize reverse(r) by pushing reverse inward expr_ref normalize_reverse(expr* r); @@ -135,6 +138,12 @@ namespace seq { * Convenience: symbolic derivative using de Bruijn var 0. */ expr_ref operator()(expr* r); + + /** + * Lightweight structural subsumption check: L(a) ⊆ L(b)? + * Returns true only when provable structurally. + */ + bool subsumes(expr* larger, expr* smaller) { return is_subset(smaller, larger); } }; } From 07cea49e4bb4e3961e80c7214c0e77720721fbad Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 4 Jun 2026 08:29:44 -0700 Subject: [PATCH 10/32] Address PR review: push_path helper, lbool eval_cond, fix year - Add push_path(path, c, sign) that decomposes conjuncts/disjuncts - Add simplify_ite_rec(path, c, t, e) helper for cleaner recursion - Change eval_cond signature to return lbool (l_undef = undetermined) - Fix copyright year from 2025 to 2026 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 155 +++++++++++++------------------- src/ast/rewriter/seq_derive.h | 6 +- 2 files changed, 67 insertions(+), 94 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 118302ed2..180665d56 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -1,5 +1,5 @@ /*++ -Copyright (c) 2025 Microsoft Corporation +Copyright (c) 2026 Microsoft Corporation Module Name: @@ -17,7 +17,7 @@ Abstract: Authors: - Nikolaj Bjorner (nbjorner) 2025-06-03 + Nikolaj Bjorner (nbjorner) 2026-06-03 --*/ @@ -643,9 +643,9 @@ namespace seq { return expr_ref(t, m); if (m.is_false(c)) return expr_ref(e, m); - bool cond_val; - if (eval_cond(c, cond_val)) - return cond_val ? expr_ref(t, m) : expr_ref(e, m); + lbool cond_val = eval_cond(c); + if (cond_val == l_true) return expr_ref(t, m); + if (cond_val == l_false) return expr_ref(e, m); return expr_ref(m.mk_ite(c, t, e), m); } @@ -795,75 +795,83 @@ namespace seq { // Post-processing: simplify ITE conditions w.r.t. m_ele // ------------------------------------------------------- - bool derive::eval_cond(expr* cond, bool& result) { + lbool derive::eval_cond(expr* cond) { expr* lhs = nullptr, * rhs = nullptr, * e1 = nullptr; unsigned ch1 = 0, ch2 = 0; - if (m.is_true(cond)) { result = true; return true; } - if (m.is_false(cond)) { result = false; return true; } + if (m.is_true(cond)) return l_true; + if (m.is_false(cond)) return l_false; // elem = char or char = elem if (m.is_eq(cond, lhs, rhs)) { if (rhs == m_ele) std::swap(lhs, rhs); - if (lhs == m_ele && u().is_const_char(rhs, ch1) && u().is_const_char(m_ele, ch2)) { - result = (ch1 == ch2); - return true; - } - if (lhs == rhs) { result = true; return true; } + if (lhs == m_ele && u().is_const_char(rhs, ch1) && u().is_const_char(m_ele, ch2)) + return ch1 == ch2 ? l_true : l_false; + if (lhs == rhs) return l_true; } // char_le(lhs, rhs) if (u().is_char_le(cond, lhs, rhs)) { unsigned vl = 0, vr = 0; - if (lhs == m_ele && u().is_const_char(m_ele, vl) && u().is_const_char(rhs, vr)) { - result = (vl <= vr); return true; - } - if (rhs == m_ele && u().is_const_char(lhs, vl) && u().is_const_char(m_ele, vr)) { - result = (vl <= vr); return true; - } - if (u().is_const_char(lhs, vl) && u().is_const_char(rhs, vr)) { - result = (vl <= vr); return true; - } + if (lhs == m_ele && u().is_const_char(m_ele, vl) && u().is_const_char(rhs, vr)) + return vl <= vr ? l_true : l_false; + if (rhs == m_ele && u().is_const_char(lhs, vl) && u().is_const_char(m_ele, vr)) + return vl <= vr ? l_true : l_false; + if (u().is_const_char(lhs, vl) && u().is_const_char(rhs, vr)) + return vl <= vr ? l_true : l_false; } // not(e1) if (m.is_not(cond, e1)) { - bool inner; - if (eval_cond(e1, inner)) { - result = !inner; - return true; - } + lbool inner = eval_cond(e1); + if (inner != l_undef) + return inner == l_true ? l_false : l_true; } // and(...) if (m.is_and(cond)) { for (expr* arg : *to_app(cond)) { - bool v; - if (eval_cond(arg, v)) { - if (!v) { result = false; return true; } - } else { - return false; - } + lbool v = eval_cond(arg); + if (v == l_false) return l_false; + if (v == l_undef) return l_undef; } - result = true; - return true; + return l_true; } // or(...) if (m.is_or(cond)) { for (expr* arg : *to_app(cond)) { - bool v; - if (eval_cond(arg, v)) { - if (v) { result = true; return true; } - } else { - return false; - } + lbool v = eval_cond(arg); + if (v == l_true) return l_true; + if (v == l_undef) return l_undef; } - result = false; - return true; + return l_false; } - return false; + return l_undef; + } + + void derive::push_path(path_t& path, expr* c, bool sign) { + if (!sign && m.is_and(c)) { + for (expr* arg : *to_app(c)) + push_path(path, arg, false); + } else if (sign && m.is_or(c)) { + for (expr* arg : *to_app(c)) + push_path(path, arg, true); + } else { + path.push_back({ c, sign }); + } + } + + std::pair derive::simplify_ite_rec(path_t& path, expr* c, expr* t, expr* e) { + auto sz = path.size(); + push_path(path, c, false); + expr_ref st = simplify_ite_rec(path, t); + path.shrink(sz); + push_path(path, c, true); + expr_ref se = simplify_ite_rec(path, e); + path.shrink(sz); + return { st, se }; } expr_ref derive::simplify_ite(expr* d) { @@ -871,27 +879,12 @@ namespace seq { if (!m.is_ite(d, c, t, e)) return expr_ref(d, m); - bool cond_val; - if (eval_cond(c, cond_val)) - return simplify_ite(cond_val ? t : e); - - // Extract signed conditions from c for the true-branch path - path_t path_t_branch; - if (m.is_and(c)) { - for (expr* arg : *to_app(c)) - path_t_branch.push_back({ arg, false }); - } else { - path_t_branch.push_back({ c, false }); - } - - // Simplify the true branch under path knowledge - expr_ref st = simplify_ite_rec(path_t_branch, t); - - // For the else branch, the whole condition is false - path_t path_e_branch; - path_e_branch.push_back({ c, true }); - expr_ref se = simplify_ite_rec(path_e_branch, e); + lbool cond_val = eval_cond(c); + if (cond_val == l_true) return simplify_ite(t); + if (cond_val == l_false) return simplify_ite(e); + path_t path; + auto [st, se] = simplify_ite_rec(path, c, t, e); return mk_ite(c, st, se); } @@ -901,9 +894,9 @@ namespace seq { return expr_ref(d, m); // Try to evaluate c directly - bool cond_val; - if (eval_cond(c, cond_val)) - return simplify_ite_rec(path, cond_val ? t : e); + lbool cond_val = eval_cond(c); + if (cond_val == l_true) return simplify_ite_rec(path, t); + if (cond_val == l_false) return simplify_ite_rec(path, e); // Check if c can be determined from the path for (auto const& [cond, sign] : path) { @@ -927,12 +920,10 @@ namespace seq { if (!sign && m.is_eq(c, lhs2, rhs2)) { if (m.is_value(lhs2)) std::swap(lhs2, rhs2); if (m_util.is_const_char(rhs2, v_val)) { - // Check if cond is (lo <= x) where x == lhs2 expr* le_lhs = nullptr, * le_rhs = nullptr; if (m_util.is_char_le(cond, le_lhs, le_rhs) && le_rhs == lhs2 && m_util.is_const_char(le_lhs, lo_val) && v_val < lo_val) return simplify_ite_rec(path, e); - // Check if cond is (x <= hi) where x == lhs2 if (m_util.is_char_le(cond, le_lhs, le_rhs) && le_lhs == lhs2 && m_util.is_const_char(le_rhs, hi_val) && v_val > hi_val) return simplify_ite_rec(path, e); @@ -949,7 +940,7 @@ namespace seq { unsigned lo_bound = 0, hi_bound = UINT_MAX; bool has_lo = false, has_hi = false; for (auto const& [cond, sign] : path) { - if (sign) continue; // only use true conditions + if (sign) continue; expr* le_lhs = nullptr, * le_rhs = nullptr; if (m_util.is_char_le(cond, le_lhs, le_rhs)) { unsigned bound = 0; @@ -962,34 +953,14 @@ namespace seq { } } if (has_lo && has_hi && lo_bound <= v_val && v_val <= hi_bound) { - // v is in range [lo, hi], so c is satisfiable - // Add (x = v, false) to path and simplify t - path.push_back({ c, false }); - expr_ref st = simplify_ite_rec(path, t); - path.pop_back(); - expr_ref se = simplify_ite_rec(path, e); + auto [st, se] = simplify_ite_rec(path, c, t, e); return mk_ite(c, st, se); } } } // Cannot simplify c: recurse into branches with extended paths - // True branch: add conjuncts of c - auto sz = path.size(); - if (m.is_and(c)) { - for (expr* arg : *to_app(c)) - path.push_back({ arg, false }); - } else { - path.push_back({ c, false }); - } - expr_ref st = simplify_ite_rec(path, t); - path.shrink(sz); - - // Else branch: add (c, true) - path.push_back({ c, true }); - expr_ref se = simplify_ite_rec(path, e); - path.pop_back(); - + auto [st, se] = simplify_ite_rec(path, c, t, e); return mk_ite(c, st, se); } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 936dd1236..6e955cfef 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -1,5 +1,5 @@ /*++ -Copyright (c) 2025 Microsoft Corporation +Copyright (c) 2026 Microsoft Corporation Module Name: @@ -116,7 +116,9 @@ namespace seq { // Simplify ITE conditions w.r.t. m_ele and path knowledge expr_ref simplify_ite(expr* d); expr_ref simplify_ite_rec(path_t& path, expr* d); - bool eval_cond(expr* cond, bool& result); + std::pair simplify_ite_rec(path_t& path, expr* c, expr* t, expr* e); + void push_path(path_t& path, expr* c, bool sign); + lbool eval_cond(expr* cond); sort* re_sort(expr* r) { return r->get_sort(); } sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; } From 6aea54fdad922302cc76528a7cecef91aa52d440 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 4 Jun 2026 10:43:08 -0700 Subject: [PATCH 11/32] Fix derivative instability and recursion bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add top-level cache (m_top_cache) to ensure stable AST node identity across repeated derivative calls, preventing state graph divergence - Add get_head_tail helper for derive_to_re with str.is_unit/str.is_concat - Add ITE hoisting in mk_union/mk_inter to keep ITEs at top level - Add De Morgan rule in mk_complement: ~(A∪B) → ~A ∩ ~B - Add ~ε → .+ simplification in mk_complement - Add prefix factoring: a·x ∪ a·y = a·(x∪y) and a·x ∩ a·y = a·(x∩y) - Add r* ∩ .+ = r+ special case in mk_inter - Enhance is_subset with union/intersection distributivity and complement - Remove De Morgan from mk_inter to prevent infinite recursion loop Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 145 ++++++++++++++++++++++++++++++++ src/ast/rewriter/seq_derive.h | 4 + 2 files changed, 149 insertions(+) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 180665d56..424462a4e 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -42,6 +42,7 @@ namespace seq { void derive::reset() { m_cache.reset(); + m_top_cache.reset(); m_trail.reset(); } @@ -49,11 +50,20 @@ namespace seq { SASSERT(m_util.is_re(r)); if (m_trail.size() > 1000) reset(); + // Check top-level cache (post-simplify result) + expr* cached = nullptr; + if (m_top_cache.find(ele, r, cached)) + return expr_ref(cached, m); m_ele = ele; m_depth = 0; expr_ref result = derive_rec(r); result = simplify_ite(result); m_ele = nullptr; + // Cache and pin the final result + m_top_cache.insert(ele, r, result); + m_trail.push_back(ele); + m_trail.push_back(r); + m_trail.push_back(result); return result; } @@ -254,6 +264,27 @@ namespace seq { return mk_ite(cond, tail_re, empty); } + // δ(to_re(unit(c))) = ite(ele = c, ε, ∅) + expr* ch = nullptr; + if (u().str.is_unit(s, ch)) { + expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m); + expr_ref empty(re().mk_empty(re_sort), m); + expr_ref cond(m.mk_eq(m_ele, ch), m); + return mk_ite(cond, eps, empty); + } + + // δ(to_re(s1 ++ s2)) = ite(head matches, to_re(tail ++ s2), ∅) + expr* s1 = nullptr, * s2 = nullptr; + if (u().str.is_concat(s, s1, s2)) { + expr_ref hd(m), tl(m); + if (get_head_tail(s1, s2, hd, tl)) { + expr_ref cond(m.mk_eq(m_ele, hd), m); + expr_ref tail_re(re().mk_to_re(tl), m); + expr_ref empty(re().mk_empty(re_sort), m); + return mk_ite(cond, tail_re, empty); + } + } + // δ(to_re(itos(n))) - derivative of integer-to-string // itos(n) produces digits '0'-'9' when n >= 0, empty when n < 0 expr* n = nullptr; @@ -328,6 +359,34 @@ namespace seq { return mk_ite(cond, eps, empty); } + // Extract head character and remaining tail from a sequence + // s1 is the first part, s2 is the continuation (from str.concat(s1, s2)) + bool derive::get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl) { + expr* ch = nullptr; + expr* a = nullptr, * b = nullptr; + if (u().str.is_unit(s1, ch)) { + hd = ch; + tl = s2; + return true; + } + if (u().str.is_concat(s1, a, b)) { + expr_ref new_s2(u().str.mk_concat(b, s2), m); + return get_head_tail(a, new_s2, hd, tl); + } + zstring zs; + if (u().str.is_string(s1, zs) && zs.length() > 0) { + hd = m_util.mk_char(zs[0]); + if (zs.length() == 1) + tl = s2; + else { + expr_ref rest(u().str.mk_string(zs.extract(1, zs.length() - 1)), m); + tl = u().str.mk_concat(rest, s2); + } + return true; + } + return false; + } + // ------------------------------------------------------- // Normalize reverse by pushing it inward // ------------------------------------------------------- @@ -462,11 +521,21 @@ namespace seq { if (is_subset(a, b1) || is_subset(a, a1)) return true; } + // a1 ∪ a2 ⊆ b if a1 ⊆ b and a2 ⊆ b + if (re().is_union(a, a1, b1)) { + if (is_subset(a1, b) && is_subset(b1, b)) return true; + } + // a1 ∩ a2 ⊆ b if a1 ⊆ b or a2 ⊆ b if (re().is_intersection(a, a1, b1)) { if (is_subset(a1, b) || is_subset(b1, b)) return true; } + // a ⊆ b1 ∩ b2 if a ⊆ b1 and a ⊆ b2 + if (re().is_intersection(b, b1, a1)) { + if (is_subset(a, b1) && is_subset(a, a1)) return true; + } + // concat subsumption: a1·a2 ⊆ b1·b2 when a1 ⊆ b1 and a2 ⊆ b2 expr* a2 = nullptr, * b2 = nullptr; if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && @@ -479,6 +548,10 @@ namespace seq { a1 == b1 && lb <= la && ua <= ub) return true; + // complement: ~a ⊆ ~b if b ⊆ a + if (re().is_complement(a, a1) && re().is_complement(b, b1)) + return is_subset(b1, a1); + return false; } @@ -501,6 +574,13 @@ namespace seq { if (is_subset(a, b)) return expr_ref(b, m); if (is_subset(b, a)) return expr_ref(a, m); + // Prefix factoring: a·x ∪ a·y = a·(x ∪ y) + expr *a1, *a2, *b1, *b2; + if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && a1 == b1) { + expr_ref tail = mk_union(a2, b2); + return mk_deriv_concat(expr_ref(a1, m), tail); + } + // ITE combination: if both are ITE with same condition, merge expr *c1, *t1, *e1, *c2, *t2, *e2; if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { @@ -509,6 +589,18 @@ namespace seq { return mk_ite(c1, then_br, else_br); } + // ITE hoisting: ite(c, t, e) ∪ r = ite(c, t ∪ r, e ∪ r) + if (m.is_ite(a, c1, t1, e1)) { + expr_ref then_br = mk_union(t1, b); + expr_ref else_br = mk_union(e1, b); + return mk_ite(c1, then_br, else_br); + } + if (m.is_ite(b, c2, t2, e2)) { + expr_ref then_br = mk_union(a, t2); + expr_ref else_br = mk_union(a, e2); + return mk_ite(c2, then_br, else_br); + } + // ACI: flatten, sort, deduplicate expr_ref_vector args(m); flatten_union(a, args); @@ -556,6 +648,13 @@ namespace seq { if (is_subset(a, b)) return expr_ref(a, m); if (is_subset(b, a)) return expr_ref(b, m); + // Prefix factoring: a·x ∩ a·y = a·(x ∩ y) + expr *a1, *b1, *a2, *b2; + if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && a1 == b1) { + expr_ref tail = mk_inter(a2, b2); + return mk_deriv_concat(expr_ref(a1, m), tail); + } + // ITE combination: if both are ITE with same condition, merge expr *c1, *t1, *e1, *c2, *t2, *e2; if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { @@ -564,6 +663,18 @@ namespace seq { return mk_ite(c1, then_br, else_br); } + // ITE hoisting: ite(c, t, e) ∩ r = ite(c, t ∩ r, e ∩ r) + if (m.is_ite(a, c1, t1, e1)) { + expr_ref then_br = mk_inter(t1, b); + expr_ref else_br = mk_inter(e1, b); + return mk_ite(c1, then_br, else_br); + } + if (m.is_ite(b, c2, t2, e2)) { + expr_ref then_br = mk_inter(a, t2); + expr_ref else_br = mk_inter(a, e2); + return mk_ite(c2, then_br, else_br); + } + // ACI: flatten, sort, deduplicate expr_ref_vector args(m); flatten_inter(a, args); @@ -587,6 +698,25 @@ namespace seq { if (args.empty()) return expr_ref(re().mk_full_seq(a->get_sort()), m); + // Special: r* ∩ .+ = r+ + expr* star_body = nullptr; + int star_idx = -1, dotplus_idx = -1; + for (unsigned i = 0; i < args.size(); ++i) { + if (re().is_star(args.get(i), star_body)) + star_idx = i; + if (re().is_dot_plus(args.get(i))) + dotplus_idx = i; + } + if (star_idx >= 0 && dotplus_idx >= 0 && star_body) { + args.set(star_idx, re().mk_plus(star_body)); + // Remove .+ by shifting + for (unsigned i = dotplus_idx; i + 1 < args.size(); ++i) + args.set(i, args.get(i + 1)); + args.shrink(args.size() - 1); + if (args.size() == 1) + return expr_ref(args.get(0), m); + } + return mk_inter_from_sorted(args); } @@ -635,6 +765,21 @@ namespace seq { return mk_ite(c, ct, ce); } + // De Morgan: ~(r1 ∪ r2) → ~r1 ∩ ~r2 + expr* r1 = nullptr, * r2 = nullptr; + if (re().is_union(a, r1, r2)) { + expr_ref c1 = mk_complement(r1); + expr_ref c2 = mk_complement(r2); + return mk_inter(c1, c2); + } + + // ~ε → .+ + sort* s = nullptr; + if (re().is_to_re(a, r) && u().str.is_empty(r)) { + VERIFY(m_util.is_re(a, s)); + return expr_ref(re().mk_plus(re().mk_full_char(a->get_sort())), m); + } + return expr_ref(re().mk_complement(a), m); } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 6e955cfef..1c4655470 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -56,6 +56,7 @@ namespace seq { // Cache: maps (ele, regex) pair to its derivative obj_pair_map m_cache; + obj_pair_map m_top_cache; // post-simplify cache expr_ref_vector m_trail; // pin cached results // Depth limiting @@ -104,6 +105,9 @@ namespace seq { // Distribute concatenation through ITE/union in derivative expr_ref mk_deriv_concat(expr* d, expr* tail); + // Extract head character and tail from a sequence expression + bool get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl); + // Lightweight subsumption check: returns true if L(a) ⊆ L(b) bool is_subset(expr* a, expr* b); From ebdbf8331455ac0554449a4784443867bfcbc004 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 4 Jun 2026 12:16:56 -0700 Subject: [PATCH 12/32] Fix regression timeouts via range condition simplification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Simplify trivial range bounds in derive_range: when lo=0, omit the lo<=x condition; when hi=max_char, omit the x<=hi condition. Full charset ranges return epsilon directly. - Add char_le(0,x)=true and char_le(x,max)=true to eval_cond for always-valid bounds. - Add range implication logic to simplify_ite_rec: when path has negated/positive char_le constraints, detect implied or contradicted char_le conditions (e.g., ¬(x<=127) implies 128<=x). - Add is_subset(a, .+) check: non-nullable regexes are subsets of .+ - In update_state_graph, skip recursive exploration of nullable targets to avoid state explosion. These fixes resolve timeouts on 5724 (all problems), 5721 P1, and 5693. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 165 ++++++++++++++++++++++++++++++-- src/ast/rewriter/seq_derive.h | 1 + src/smt/seq_regex.cpp | 10 ++ 3 files changed, 170 insertions(+), 6 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 424462a4e..af1f257e4 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -336,10 +336,22 @@ namespace seq { // Extract character values from unit strings expr_ref c_lo(m), c_hi(m); if (u().str.is_unit_string(lo, c_lo) && u().str.is_unit_string(hi, c_hi)) { - // ite(lo <= ele && ele <= hi, ε, ∅) - expr_ref ge_lo(m_util.mk_le(c_lo, m_ele), m); - expr_ref le_hi(m_util.mk_le(m_ele, c_hi), m); - expr_ref in_range(m.mk_and(ge_lo, le_hi), m); + // Build range condition, simplifying trivial bounds + unsigned lo_val = 0, hi_val = 0; + bool lo_trivial = m_util.is_const_char(c_lo, lo_val) && lo_val == 0; + bool hi_trivial = m_util.is_const_char(c_hi, hi_val) && hi_val == u().max_char(); + + if (lo_trivial && hi_trivial) + return eps; // full charset range — always matches + + expr_ref in_range(m); + if (lo_trivial) + in_range = m_util.mk_le(m_ele, c_hi); + else if (hi_trivial) + in_range = m_util.mk_le(c_lo, m_ele); + else + in_range = m.mk_and(m_util.mk_le(c_lo, m_ele), m_util.mk_le(m_ele, c_hi)); + return mk_ite(in_range, eps, empty); } @@ -508,8 +520,13 @@ namespace seq { if (re().is_empty(a)) return true; if (re().is_full_seq(b)) return true; - // a ⊆ a* (since a* accepts everything a does and more) + // a ⊆ .+ iff a is non-nullable (non-nullable means ε ∉ L(a)) expr* b1 = nullptr; + if (re().is_plus(b, b1) && re().is_full_char(b1) && + re().get_info(a).nullable == l_false) + return true; + + // a ⊆ a* (since a* accepts everything a does and more) if (re().is_star(b, b1) && a == b1) return true; // a* ⊆ b* if a ⊆ b @@ -964,6 +981,12 @@ namespace seq { return vl <= vr ? l_true : l_false; if (u().is_const_char(lhs, vl) && u().is_const_char(rhs, vr)) return vl <= vr ? l_true : l_false; + // char_le(0, x) is always true (chars are unsigned) + if (u().is_const_char(lhs, vl) && vl == 0) + return l_true; + // char_le(x, max_char) is always true + if (u().is_const_char(rhs, vr) && vr == u().max_char()) + return l_true; } // not(e1) @@ -996,6 +1019,66 @@ namespace seq { return l_undef; } + // Evaluate a single atomic condition (char_le or equality) against path constraints. + // Returns l_true if path implies cond, l_false if path contradicts cond, l_undef otherwise. + lbool derive::eval_path_cond(path_t const& path, expr* c) { + expr* c_lhs = nullptr, * c_rhs = nullptr; + if (!m_util.is_char_le(c, c_lhs, c_rhs)) + return l_undef; + + unsigned c_lo = 0, c_hi = 0; + for (auto const& [cond, sign] : path) { + expr* p_lhs = nullptr, * p_rhs = nullptr; + if (!m_util.is_char_le(cond, p_lhs, p_rhs)) + continue; + unsigned p_lo = 0, p_hi = 0; + if (sign) { + // cond is negated: ¬cond is true + // ¬(x <= hi) means x > hi, i.e., x >= hi+1 + if (p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi)) { + // We know x > p_hi (i.e., x >= p_hi+1) + // c is (lo <= x): if lo <= p_hi+1 → c is true (since x >= p_hi+1 >= lo) + if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && c_lo <= p_hi + 1) + return l_true; + // c is (x <= hi2): if hi2 <= p_hi → c is false (since x > p_hi >= hi2) + if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && c_hi <= p_hi) + return l_false; + } + // ¬(lo <= x) means x < lo, i.e., x <= lo-1 + if (m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && p_lo > 0) { + // We know x < p_lo (i.e., x <= p_lo-1) + // c is (x <= hi): if hi >= p_lo-1 → c is true (since x <= p_lo-1 <= hi) + if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && c_hi >= p_lo - 1) + return l_true; + // c is (lo <= x): if lo >= p_lo → c is false (since x < p_lo <= lo) + if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && c_lo >= p_lo) + return l_false; + } + } else { + // cond is true (not negated) + // (x <= hi) is true: we know x <= p_hi + if (p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi)) { + // c is (lo <= x): if lo > p_hi → c is false (x <= p_hi < lo) + if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && c_lo > p_hi) + return l_false; + // c is (x <= hi2): if hi2 >= p_hi → c is true (x <= p_hi <= hi2) + if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && c_hi >= p_hi) + return l_true; + } + // (lo <= x) is true: we know x >= p_lo + if (m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele) { + // c is (x <= hi): if hi < p_lo → c is false (x >= p_lo > hi) + if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && c_hi < p_lo) + return l_false; + // c is (lo <= x): if lo <= p_lo → c is true (x >= p_lo >= lo) + if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && c_lo <= p_lo) + return l_true; + } + } + } + return l_undef; + } + void derive::push_path(path_t& path, expr* c, bool sign) { if (!sign && m.is_and(c)) { for (expr* arg : *to_app(c)) @@ -1043,7 +1126,31 @@ namespace seq { if (cond_val == l_true) return simplify_ite_rec(path, t); if (cond_val == l_false) return simplify_ite_rec(path, e); - // Check if c can be determined from the path + // When c is an AND (range condition), check each conjunct against the path. + // If any conjunct is contradicted by the path, c is false → take else. + // If all conjuncts are implied by the path, c is true → take then. + if (m.is_and(c)) { + lbool and_result = l_true; + for (expr* arg : *to_app(c)) { + lbool arg_val = eval_path_cond(path, arg); + if (arg_val == l_false) { + and_result = l_false; + break; + } + if (arg_val == l_undef) + and_result = l_undef; + } + if (and_result == l_true) return simplify_ite_rec(path, t); + if (and_result == l_false) return simplify_ite_rec(path, e); + } + // When c is a single char_le, also check against the path + else { + lbool c_val = eval_path_cond(path, c); + if (c_val == l_true) return simplify_ite_rec(path, t); + if (c_val == l_false) return simplify_ite_rec(path, e); + } + + // Check if c can be determined from the path (legacy checks for equality conditions) for (auto const& [cond, sign] : path) { // Direct match: c == cond if (c == cond) @@ -1074,6 +1181,52 @@ namespace seq { return simplify_ite_rec(path, e); } } + + // Range implication between char_le conditions: + // If c is char_le(lo, x) [lo <= x] and path has ¬(x <= hi) [x > hi]: + // ¬(x <= hi) means x >= hi+1. If lo <= hi+1, then lo <= x is implied → c is true. + // If c is char_le(x, hi) [x <= hi] and path has ¬(lo <= x) [x < lo]: + // ¬(lo <= x) means x <= lo-1. If lo-1 <= hi, then x <= hi is implied → c is true. + expr* c_lhs = nullptr, * c_rhs = nullptr; + expr* p_lhs = nullptr, * p_rhs = nullptr; + if (m_util.is_char_le(c, c_lhs, c_rhs) && m_util.is_char_le(cond, p_lhs, p_rhs)) { + unsigned c_lo = 0, c_hi = 0, p_lo = 0, p_hi = 0; + if (sign) { + // cond is negated (¬cond is true) + // c is (lo <= x), cond is (x <= hi) with sign=true means ¬(x <= hi) i.e. x > hi i.e. x >= hi+1 + if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && + p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) && + c_lo <= p_hi + 1) + return simplify_ite_rec(path, t); + // c is (x <= hi), cond is (lo <= x) with sign=true means ¬(lo <= x) i.e. x < lo i.e. x <= lo-1 + if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && + m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && + p_lo > 0 && p_lo - 1 <= c_hi) + return simplify_ite_rec(path, t); + } else { + // cond is true (not negated) + // c is (lo <= x), cond is (x <= hi) true: x <= hi. If lo > hi → c is false. + if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && + p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) && + c_lo > p_hi) + return simplify_ite_rec(path, e); + // c is (x <= hi), cond is (lo <= x) true: lo <= x. If hi < lo → c is false. + if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && + m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && + c_hi < p_lo) + return simplify_ite_rec(path, e); + // c is (lo <= x), cond is (lo2 <= x) true: lo2 <= x. If lo <= lo2 → c is true. + if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && + m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && + c_lo <= p_lo) + return simplify_ite_rec(path, t); + // c is (x <= hi), cond is (x <= hi2) true: x <= hi2. If hi >= hi2 → c is true. + if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && + p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) && + c_hi >= p_hi) + return simplify_ite_rec(path, t); + } + } } // Check if both range bounds are in path and c is (x = v) within range diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 1c4655470..27dbeb4ea 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -123,6 +123,7 @@ namespace seq { std::pair simplify_ite_rec(path_t& path, expr* c, expr* t, expr* e); void push_path(path_t& path, expr* c, bool sign); lbool eval_cond(expr* cond); + lbool eval_path_cond(path_t const& path, expr* c); sort* re_sort(expr* r) { return r->get_sort(); } sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 64487a21e..62dbfd3aa 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -859,6 +859,16 @@ namespace smt { m_state_graph.add_edge(r_id, dr_id, maybecycle); } m_state_graph.mark_done(r_id); + // Recursively explore unexplored targets for dead state detection + // Skip targets that are nullable to avoid state explosion + for (auto const& dr: derivatives) { + unsigned dr_id = get_state_id(dr); + if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id)) + continue; + if (re().get_info(dr).nullable == l_true) + continue; + update_state_graph(dr); + } } STRACE(seq_regex, m_state_graph.display(tout);); From dc8179212ef25483c62ae3168fa2571328bf8843 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 4 Jun 2026 16:59:59 -0700 Subject: [PATCH 13/32] Add interval-based range simplification for ITE conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce exclusion intervals alongside the existing path-based condition tracking in simplify_ite_rec. The intervals track which character values are still possible at each point in the ITE tree, enabling simplification of nested range conditions that the per-entry path approach cannot handle. Key additions: - intervals_t type and push_intervals() to maintain live character ranges - eval_range_cond() checks AND-of-char_le conditions against intervals - intersect_intervals/exclude_interval utilities from seq_rewriter pattern - Negated AND handling: ¬(lo<=x ∧ x<=hi) excludes [lo,hi] from intervals The interval check runs before the existing eval_path_cond logic, catching cases like: if(0<=x<=10, t, if(1<=x<=8, t2, e2)) → if(0<=x<=10, t, e2) where the inner range [1,8] is fully contained in the excluded outer range. Fixes remaining regression timeouts on 5728 P2 and 5731 P4. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 197 ++++++++++++++++++++++++++------ src/ast/rewriter/seq_derive.h | 9 +- 2 files changed, 169 insertions(+), 37 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index af1f257e4..3b8fc5aa7 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -1091,14 +1091,146 @@ namespace seq { } } - std::pair derive::simplify_ite_rec(path_t& path, expr* c, expr* t, expr* e) { + void derive::push_intervals(intervals_t& intervals, expr* c, bool sign) { + expr* lhs = nullptr, * rhs = nullptr; + unsigned val = 0; + if (m_util.is_char_le(c, lhs, rhs)) { + if (!sign) { + if (lhs == m_ele && m_util.is_const_char(rhs, val)) + intersect_intervals(0, val, intervals); + else if (rhs == m_ele && m_util.is_const_char(lhs, val)) + intersect_intervals(val, u().max_char(), intervals); + } else { + if (lhs == m_ele && m_util.is_const_char(rhs, val)) + exclude_interval(0, val, intervals, u().max_char()); + else if (rhs == m_ele && m_util.is_const_char(lhs, val)) + exclude_interval(val, u().max_char(), intervals, u().max_char()); + } + } else if (!sign && m.is_and(c)) { + for (expr* arg : *to_app(c)) + push_intervals(intervals, arg, false); + } else if (sign && m.is_or(c)) { + for (expr* arg : *to_app(c)) + push_intervals(intervals, arg, true); + } else if (sign && m.is_and(c)) { + // ¬(and(lo<=x, x<=hi)) → exclude [lo, hi] + unsigned lo = 0, hi = u().max_char(); + bool got_lo = false, got_hi = false; + for (expr* arg : *to_app(c)) { + expr* a_lhs = nullptr, * a_rhs = nullptr; + unsigned a_val = 0; + if (m_util.is_char_le(arg, a_lhs, a_rhs)) { + if (a_lhs == m_ele && m_util.is_const_char(a_rhs, a_val)) + { hi = std::min(hi, a_val); got_hi = true; } + else if (a_rhs == m_ele && m_util.is_const_char(a_lhs, a_val)) + { lo = std::max(lo, a_val); got_lo = true; } + } + } + if (got_lo || got_hi) + exclude_interval(lo, hi, intervals, u().max_char()); + } + } + + void derive::intersect_intervals(unsigned lo, unsigned hi, intervals_t& ranges) { + unsigned j = 0; + for (unsigned i = 0; i < ranges.size(); ++i) { + auto [lo1, hi1] = ranges[i]; + if (hi < lo1) + break; + if (hi1 >= lo) + ranges[j++] = std::make_pair(std::max(lo1, lo), std::min(hi1, hi)); + } + ranges.shrink(j); + } + + void derive::exclude_interval(unsigned lo, unsigned hi, intervals_t& ranges, unsigned max_char) { + if (lo == 0 && hi >= max_char) { ranges.reset(); return; } + if (lo == 0) { intersect_intervals(hi + 1, max_char, ranges); return; } + if (hi >= max_char) { intersect_intervals(0, lo - 1, ranges); return; } + intervals_t right(ranges); + intersect_intervals(0, lo - 1, ranges); + intersect_intervals(hi + 1, max_char, right); + ranges.append(right); + } + + lbool derive::eval_range_cond(intervals_t const& intervals, expr* c) { + if (intervals.empty()) + return l_false; + + expr* lhs = nullptr, * rhs = nullptr; + unsigned val = 0; + + // Handle AND of char_le as range [lo, hi] + if (m.is_and(c)) { + unsigned lo = 0, hi = u().max_char(); + bool got_lo = false, got_hi = false; + bool all_char_le = true; + for (expr* arg : *to_app(c)) { + expr* a_lhs = nullptr, * a_rhs = nullptr; + unsigned a_val = 0; + if (m_util.is_char_le(arg, a_lhs, a_rhs)) { + if (a_lhs == m_ele && m_util.is_const_char(a_rhs, a_val)) + { hi = std::min(hi, a_val); got_hi = true; } + else if (a_rhs == m_ele && m_util.is_const_char(a_lhs, a_val)) + { lo = std::max(lo, a_val); got_lo = true; } + else all_char_le = false; + } else all_char_le = false; + } + if (all_char_le && (got_lo || got_hi)) { + if (lo > hi) return l_false; + bool any_overlap = false; + bool all_contained = true; + for (auto const& [r_lo, r_hi] : intervals) { + if (std::max(r_lo, lo) <= std::min(r_hi, hi)) + any_overlap = true; + if (r_lo < lo || r_hi > hi) + all_contained = false; + } + if (!any_overlap) return l_false; + if (all_contained) return l_true; + } + return l_undef; + } + + // Handle single char_le + if (!m_util.is_char_le(c, lhs, rhs)) + return l_undef; + + if (lhs == m_ele && m_util.is_const_char(rhs, val)) { + // c is (x <= val): true if all hi <= val, false if all lo > val + bool all_le = true, any_le = false; + for (auto const& [r_lo, r_hi] : intervals) { + if (r_lo <= val) any_le = true; + if (r_hi > val) all_le = false; + } + if (all_le) return l_true; + if (!any_le) return l_false; + } else if (rhs == m_ele && m_util.is_const_char(lhs, val)) { + // c is (val <= x): true if all lo >= val, false if all hi < val + bool all_ge = true, any_ge = false; + for (auto const& [r_lo, r_hi] : intervals) { + if (r_hi >= val) any_ge = true; + if (r_lo < val) all_ge = false; + } + if (all_ge) return l_true; + if (!any_ge) return l_false; + } + return l_undef; + } + + std::pair derive::simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e) { auto sz = path.size(); + auto saved_intervals = intervals; push_path(path, c, false); - expr_ref st = simplify_ite_rec(path, t); + push_intervals(intervals, c, false); + expr_ref st = simplify_ite_rec(path, intervals, t); path.shrink(sz); + intervals = saved_intervals; push_path(path, c, true); - expr_ref se = simplify_ite_rec(path, e); + push_intervals(intervals, c, true); + expr_ref se = simplify_ite_rec(path, intervals, e); path.shrink(sz); + intervals = saved_intervals; return { st, se }; } @@ -1112,19 +1244,26 @@ namespace seq { if (cond_val == l_false) return simplify_ite(e); path_t path; - auto [st, se] = simplify_ite_rec(path, c, t, e); + intervals_t intervals; + intervals.push_back(std::make_pair(0u, u().max_char())); + auto [st, se] = simplify_ite_rec(path, intervals, c, t, e); return mk_ite(c, st, se); } - expr_ref derive::simplify_ite_rec(path_t& path, expr* d) { + expr_ref derive::simplify_ite_rec(path_t& path, intervals_t& intervals, expr* d) { expr* c, * t, * e; if (!m.is_ite(d, c, t, e)) return expr_ref(d, m); // Try to evaluate c directly lbool cond_val = eval_cond(c); - if (cond_val == l_true) return simplify_ite_rec(path, t); - if (cond_val == l_false) return simplify_ite_rec(path, e); + if (cond_val == l_true) return simplify_ite_rec(path, intervals, t); + if (cond_val == l_false) return simplify_ite_rec(path, intervals, e); + + // Use interval-based range reasoning (catches AND range vs disjoint intervals) + lbool range_val = eval_range_cond(intervals, c); + if (range_val == l_true) return simplify_ite_rec(path, intervals, t); + if (range_val == l_false) return simplify_ite_rec(path, intervals, e); // When c is an AND (range condition), check each conjunct against the path. // If any conjunct is contradicted by the path, c is false → take else. @@ -1140,21 +1279,21 @@ namespace seq { if (arg_val == l_undef) and_result = l_undef; } - if (and_result == l_true) return simplify_ite_rec(path, t); - if (and_result == l_false) return simplify_ite_rec(path, e); + if (and_result == l_true) return simplify_ite_rec(path, intervals, t); + if (and_result == l_false) return simplify_ite_rec(path, intervals, e); } // When c is a single char_le, also check against the path else { lbool c_val = eval_path_cond(path, c); - if (c_val == l_true) return simplify_ite_rec(path, t); - if (c_val == l_false) return simplify_ite_rec(path, e); + if (c_val == l_true) return simplify_ite_rec(path, intervals, t); + if (c_val == l_false) return simplify_ite_rec(path, intervals, e); } // Check if c can be determined from the path (legacy checks for equality conditions) for (auto const& [cond, sign] : path) { // Direct match: c == cond if (c == cond) - return sign ? simplify_ite_rec(path, e) : simplify_ite_rec(path, t); + return sign ? simplify_ite_rec(path, intervals, e) : simplify_ite_rec(path, intervals, t); // c is (x = v), cond is (x = w) with sign=false (cond is true, so x=w) // If v != w, then c is false → take else branch @@ -1163,7 +1302,7 @@ namespace seq { if (m.is_value(lhs1)) std::swap(lhs1, rhs1); if (m.is_value(lhs2)) std::swap(lhs2, rhs2); if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2)) - return simplify_ite_rec(path, e); + return simplify_ite_rec(path, intervals, e); } // Range constraint: cond is (lo <= x) or (x <= hi) with sign=false @@ -1175,56 +1314,44 @@ namespace seq { expr* le_lhs = nullptr, * le_rhs = nullptr; if (m_util.is_char_le(cond, le_lhs, le_rhs) && le_rhs == lhs2 && m_util.is_const_char(le_lhs, lo_val) && v_val < lo_val) - return simplify_ite_rec(path, e); + return simplify_ite_rec(path, intervals, e); if (m_util.is_char_le(cond, le_lhs, le_rhs) && le_lhs == lhs2 && m_util.is_const_char(le_rhs, hi_val) && v_val > hi_val) - return simplify_ite_rec(path, e); + return simplify_ite_rec(path, intervals, e); } } // Range implication between char_le conditions: - // If c is char_le(lo, x) [lo <= x] and path has ¬(x <= hi) [x > hi]: - // ¬(x <= hi) means x >= hi+1. If lo <= hi+1, then lo <= x is implied → c is true. - // If c is char_le(x, hi) [x <= hi] and path has ¬(lo <= x) [x < lo]: - // ¬(lo <= x) means x <= lo-1. If lo-1 <= hi, then x <= hi is implied → c is true. expr* c_lhs = nullptr, * c_rhs = nullptr; expr* p_lhs = nullptr, * p_rhs = nullptr; if (m_util.is_char_le(c, c_lhs, c_rhs) && m_util.is_char_le(cond, p_lhs, p_rhs)) { unsigned c_lo = 0, c_hi = 0, p_lo = 0, p_hi = 0; if (sign) { - // cond is negated (¬cond is true) - // c is (lo <= x), cond is (x <= hi) with sign=true means ¬(x <= hi) i.e. x > hi i.e. x >= hi+1 if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) && c_lo <= p_hi + 1) - return simplify_ite_rec(path, t); - // c is (x <= hi), cond is (lo <= x) with sign=true means ¬(lo <= x) i.e. x < lo i.e. x <= lo-1 + return simplify_ite_rec(path, intervals, t); if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && p_lo > 0 && p_lo - 1 <= c_hi) - return simplify_ite_rec(path, t); + return simplify_ite_rec(path, intervals, t); } else { - // cond is true (not negated) - // c is (lo <= x), cond is (x <= hi) true: x <= hi. If lo > hi → c is false. if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) && c_lo > p_hi) - return simplify_ite_rec(path, e); - // c is (x <= hi), cond is (lo <= x) true: lo <= x. If hi < lo → c is false. + return simplify_ite_rec(path, intervals, e); if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && c_hi < p_lo) - return simplify_ite_rec(path, e); - // c is (lo <= x), cond is (lo2 <= x) true: lo2 <= x. If lo <= lo2 → c is true. + return simplify_ite_rec(path, intervals, e); if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && c_lo <= p_lo) - return simplify_ite_rec(path, t); - // c is (x <= hi), cond is (x <= hi2) true: x <= hi2. If hi >= hi2 → c is true. + return simplify_ite_rec(path, intervals, t); if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) && c_hi >= p_hi) - return simplify_ite_rec(path, t); + return simplify_ite_rec(path, intervals, t); } } } @@ -1251,14 +1378,14 @@ namespace seq { } } if (has_lo && has_hi && lo_bound <= v_val && v_val <= hi_bound) { - auto [st, se] = simplify_ite_rec(path, c, t, e); + auto [st, se] = simplify_ite_rec(path, intervals, c, t, e); return mk_ite(c, st, se); } } } // Cannot simplify c: recurse into branches with extended paths - auto [st, se] = simplify_ite_rec(path, c, t, e); + auto [st, se] = simplify_ite_rec(path, intervals, c, t, e); return mk_ite(c, st, se); } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 27dbeb4ea..41a3937c3 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -116,14 +116,19 @@ namespace seq { // Path of signed conditions for ITE simplification using path_t = svector>; + using intervals_t = svector>; // Simplify ITE conditions w.r.t. m_ele and path knowledge expr_ref simplify_ite(expr* d); - expr_ref simplify_ite_rec(path_t& path, expr* d); - std::pair simplify_ite_rec(path_t& path, expr* c, expr* t, expr* e); + expr_ref simplify_ite_rec(path_t& path, intervals_t& intervals, expr* d); + std::pair simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e); void push_path(path_t& path, expr* c, bool sign); + void push_intervals(intervals_t& intervals, expr* c, bool sign); lbool eval_cond(expr* cond); lbool eval_path_cond(path_t const& path, expr* c); + lbool eval_range_cond(intervals_t const& intervals, expr* c); + static void intersect_intervals(unsigned lo, unsigned hi, intervals_t& ranges); + static void exclude_interval(unsigned lo, unsigned hi, intervals_t& ranges, unsigned max_char); sort* re_sort(expr* r) { return r->get_sort(); } sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; } From ed2c64208d4284682fbee15a90cafc14bd5931d4 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 4 Jun 2026 18:21:26 -0700 Subject: [PATCH 14/32] intervals Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_derive.cpp | 291 +++++--------------------------- src/ast/rewriter/seq_derive.h | 1 - 2 files changed, 42 insertions(+), 250 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 3b8fc5aa7..97aadb850 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -1021,63 +1021,6 @@ namespace seq { // Evaluate a single atomic condition (char_le or equality) against path constraints. // Returns l_true if path implies cond, l_false if path contradicts cond, l_undef otherwise. - lbool derive::eval_path_cond(path_t const& path, expr* c) { - expr* c_lhs = nullptr, * c_rhs = nullptr; - if (!m_util.is_char_le(c, c_lhs, c_rhs)) - return l_undef; - - unsigned c_lo = 0, c_hi = 0; - for (auto const& [cond, sign] : path) { - expr* p_lhs = nullptr, * p_rhs = nullptr; - if (!m_util.is_char_le(cond, p_lhs, p_rhs)) - continue; - unsigned p_lo = 0, p_hi = 0; - if (sign) { - // cond is negated: ¬cond is true - // ¬(x <= hi) means x > hi, i.e., x >= hi+1 - if (p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi)) { - // We know x > p_hi (i.e., x >= p_hi+1) - // c is (lo <= x): if lo <= p_hi+1 → c is true (since x >= p_hi+1 >= lo) - if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && c_lo <= p_hi + 1) - return l_true; - // c is (x <= hi2): if hi2 <= p_hi → c is false (since x > p_hi >= hi2) - if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && c_hi <= p_hi) - return l_false; - } - // ¬(lo <= x) means x < lo, i.e., x <= lo-1 - if (m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && p_lo > 0) { - // We know x < p_lo (i.e., x <= p_lo-1) - // c is (x <= hi): if hi >= p_lo-1 → c is true (since x <= p_lo-1 <= hi) - if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && c_hi >= p_lo - 1) - return l_true; - // c is (lo <= x): if lo >= p_lo → c is false (since x < p_lo <= lo) - if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && c_lo >= p_lo) - return l_false; - } - } else { - // cond is true (not negated) - // (x <= hi) is true: we know x <= p_hi - if (p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi)) { - // c is (lo <= x): if lo > p_hi → c is false (x <= p_hi < lo) - if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && c_lo > p_hi) - return l_false; - // c is (x <= hi2): if hi2 >= p_hi → c is true (x <= p_hi <= hi2) - if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && c_hi >= p_hi) - return l_true; - } - // (lo <= x) is true: we know x >= p_lo - if (m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele) { - // c is (x <= hi): if hi < p_lo → c is false (x >= p_lo > hi) - if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && c_hi < p_lo) - return l_false; - // c is (lo <= x): if lo <= p_lo → c is true (x >= p_lo >= lo) - if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && c_lo <= p_lo) - return l_true; - } - } - } - return l_undef; - } void derive::push_path(path_t& path, expr* c, bool sign) { if (!sign && m.is_and(c)) { @@ -1092,19 +1035,23 @@ namespace seq { } void derive::push_intervals(intervals_t& intervals, expr* c, bool sign) { - expr* lhs = nullptr, * rhs = nullptr; - unsigned val = 0; - if (m_util.is_char_le(c, lhs, rhs)) { - if (!sign) { - if (lhs == m_ele && m_util.is_const_char(rhs, val)) - intersect_intervals(0, val, intervals); - else if (rhs == m_ele && m_util.is_const_char(lhs, val)) - intersect_intervals(val, u().max_char(), intervals); + unsigned lo = 0, hi = 0; + bool negated = false; + if (m_util.is_char_const_range(m_ele, c, lo, hi, negated)) { + // is_char_const_range returns the range [lo, hi] such that + // c ≡ (lo <= x <= hi) when negated=false, or c ≡ ¬(lo <= x <= hi) when negated=true + bool effective_neg = (negated != sign); + // effective_neg=false means condition is asserted true: intersect with [lo, hi] + // effective_neg=true means condition is negated: exclude [lo, hi] + if (!effective_neg) { + if (lo <= hi) + intersect_intervals(lo, hi, intervals); + else + intervals.reset(); // contradictory range } else { - if (lhs == m_ele && m_util.is_const_char(rhs, val)) - exclude_interval(0, val, intervals, u().max_char()); - else if (rhs == m_ele && m_util.is_const_char(lhs, val)) - exclude_interval(val, u().max_char(), intervals, u().max_char()); + if (lo <= hi) + exclude_interval(lo, hi, intervals, u().max_char()); + // else: excluding empty range is a no-op } } else if (!sign && m.is_and(c)) { for (expr* arg : *to_app(c)) @@ -1112,22 +1059,6 @@ namespace seq { } else if (sign && m.is_or(c)) { for (expr* arg : *to_app(c)) push_intervals(intervals, arg, true); - } else if (sign && m.is_and(c)) { - // ¬(and(lo<=x, x<=hi)) → exclude [lo, hi] - unsigned lo = 0, hi = u().max_char(); - bool got_lo = false, got_hi = false; - for (expr* arg : *to_app(c)) { - expr* a_lhs = nullptr, * a_rhs = nullptr; - unsigned a_val = 0; - if (m_util.is_char_le(arg, a_lhs, a_rhs)) { - if (a_lhs == m_ele && m_util.is_const_char(a_rhs, a_val)) - { hi = std::min(hi, a_val); got_hi = true; } - else if (a_rhs == m_ele && m_util.is_const_char(a_lhs, a_val)) - { lo = std::max(lo, a_val); got_lo = true; } - } - } - if (got_lo || got_hi) - exclude_interval(lo, hi, intervals, u().max_char()); } } @@ -1156,64 +1087,31 @@ namespace seq { lbool derive::eval_range_cond(intervals_t const& intervals, expr* c) { if (intervals.empty()) return l_false; - - expr* lhs = nullptr, * rhs = nullptr; - unsigned val = 0; - - // Handle AND of char_le as range [lo, hi] - if (m.is_and(c)) { - unsigned lo = 0, hi = u().max_char(); - bool got_lo = false, got_hi = false; - bool all_char_le = true; - for (expr* arg : *to_app(c)) { - expr* a_lhs = nullptr, * a_rhs = nullptr; - unsigned a_val = 0; - if (m_util.is_char_le(arg, a_lhs, a_rhs)) { - if (a_lhs == m_ele && m_util.is_const_char(a_rhs, a_val)) - { hi = std::min(hi, a_val); got_hi = true; } - else if (a_rhs == m_ele && m_util.is_const_char(a_lhs, a_val)) - { lo = std::max(lo, a_val); got_lo = true; } - else all_char_le = false; - } else all_char_le = false; - } - if (all_char_le && (got_lo || got_hi)) { - if (lo > hi) return l_false; - bool any_overlap = false; - bool all_contained = true; - for (auto const& [r_lo, r_hi] : intervals) { - if (std::max(r_lo, lo) <= std::min(r_hi, hi)) - any_overlap = true; - if (r_lo < lo || r_hi > hi) - all_contained = false; - } - if (!any_overlap) return l_false; - if (all_contained) return l_true; - } + unsigned lo = 0, hi = 0; + bool negated = false; + if (!m_util.is_char_const_range(m_ele, c, lo, hi, negated)) return l_undef; + if (lo > hi) { + // c asserts x in empty range or c asserts x NOT in empty range + return negated ? l_true : l_false; } - - // Handle single char_le - if (!m_util.is_char_le(c, lhs, rhs)) - return l_undef; - - if (lhs == m_ele && m_util.is_const_char(rhs, val)) { - // c is (x <= val): true if all hi <= val, false if all lo > val - bool all_le = true, any_le = false; - for (auto const& [r_lo, r_hi] : intervals) { - if (r_lo <= val) any_le = true; - if (r_hi > val) all_le = false; - } - if (all_le) return l_true; - if (!any_le) return l_false; - } else if (rhs == m_ele && m_util.is_const_char(lhs, val)) { - // c is (val <= x): true if all lo >= val, false if all hi < val - bool all_ge = true, any_ge = false; - for (auto const& [r_lo, r_hi] : intervals) { - if (r_hi >= val) any_ge = true; - if (r_lo < val) all_ge = false; - } - if (all_ge) return l_true; - if (!any_ge) return l_false; + // Check if [lo, hi] overlaps with intervals and/or contains all intervals + bool any_overlap = false; + bool all_contained = true; // all intervals ⊆ [lo, hi] + for (auto const& [r_lo, r_hi] : intervals) { + if (std::max(r_lo, lo) <= std::min(r_hi, hi)) + any_overlap = true; + if (r_lo < lo || r_hi > hi) + all_contained = false; + } + if (!negated) { + // c asserts x ∈ [lo, hi] + if (!any_overlap) return l_false; + if (all_contained) return l_true; + } else { + // c asserts x ∉ [lo, hi] + if (all_contained) return l_false; // all values are in [lo,hi], so ¬(x∈[lo,hi]) is false + if (!any_overlap) return l_true; // no values are in [lo,hi], so ¬(x∈[lo,hi]) is true } return l_undef; } @@ -1255,48 +1153,21 @@ namespace seq { if (!m.is_ite(d, c, t, e)) return expr_ref(d, m); - // Try to evaluate c directly + // Try to evaluate c directly (handles trivially true/false conditions) lbool cond_val = eval_cond(c); if (cond_val == l_true) return simplify_ite_rec(path, intervals, t); if (cond_val == l_false) return simplify_ite_rec(path, intervals, e); - // Use interval-based range reasoning (catches AND range vs disjoint intervals) + // Use interval-based range reasoning lbool range_val = eval_range_cond(intervals, c); if (range_val == l_true) return simplify_ite_rec(path, intervals, t); if (range_val == l_false) return simplify_ite_rec(path, intervals, e); - // When c is an AND (range condition), check each conjunct against the path. - // If any conjunct is contradicted by the path, c is false → take else. - // If all conjuncts are implied by the path, c is true → take then. - if (m.is_and(c)) { - lbool and_result = l_true; - for (expr* arg : *to_app(c)) { - lbool arg_val = eval_path_cond(path, arg); - if (arg_val == l_false) { - and_result = l_false; - break; - } - if (arg_val == l_undef) - and_result = l_undef; - } - if (and_result == l_true) return simplify_ite_rec(path, intervals, t); - if (and_result == l_false) return simplify_ite_rec(path, intervals, e); - } - // When c is a single char_le, also check against the path - else { - lbool c_val = eval_path_cond(path, c); - if (c_val == l_true) return simplify_ite_rec(path, intervals, t); - if (c_val == l_false) return simplify_ite_rec(path, intervals, e); - } - - // Check if c can be determined from the path (legacy checks for equality conditions) + // Check direct structural matches in path (equality conditions) for (auto const& [cond, sign] : path) { - // Direct match: c == cond if (c == cond) return sign ? simplify_ite_rec(path, intervals, e) : simplify_ite_rec(path, intervals, t); - // c is (x = v), cond is (x = w) with sign=false (cond is true, so x=w) - // If v != w, then c is false → take else branch expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr; if (!sign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) { if (m.is_value(lhs1)) std::swap(lhs1, rhs1); @@ -1304,84 +1175,6 @@ namespace seq { if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2)) return simplify_ite_rec(path, intervals, e); } - - // Range constraint: cond is (lo <= x) or (x <= hi) with sign=false - // and c is (x = v). If v is outside the range, c is false. - unsigned v_val = 0, lo_val = 0, hi_val = 0; - if (!sign && m.is_eq(c, lhs2, rhs2)) { - if (m.is_value(lhs2)) std::swap(lhs2, rhs2); - if (m_util.is_const_char(rhs2, v_val)) { - expr* le_lhs = nullptr, * le_rhs = nullptr; - if (m_util.is_char_le(cond, le_lhs, le_rhs) && le_rhs == lhs2 && - m_util.is_const_char(le_lhs, lo_val) && v_val < lo_val) - return simplify_ite_rec(path, intervals, e); - if (m_util.is_char_le(cond, le_lhs, le_rhs) && le_lhs == lhs2 && - m_util.is_const_char(le_rhs, hi_val) && v_val > hi_val) - return simplify_ite_rec(path, intervals, e); - } - } - - // Range implication between char_le conditions: - expr* c_lhs = nullptr, * c_rhs = nullptr; - expr* p_lhs = nullptr, * p_rhs = nullptr; - if (m_util.is_char_le(c, c_lhs, c_rhs) && m_util.is_char_le(cond, p_lhs, p_rhs)) { - unsigned c_lo = 0, c_hi = 0, p_lo = 0, p_hi = 0; - if (sign) { - if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && - p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) && - c_lo <= p_hi + 1) - return simplify_ite_rec(path, intervals, t); - if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && - m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && - p_lo > 0 && p_lo - 1 <= c_hi) - return simplify_ite_rec(path, intervals, t); - } else { - if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && - p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) && - c_lo > p_hi) - return simplify_ite_rec(path, intervals, e); - if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && - m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && - c_hi < p_lo) - return simplify_ite_rec(path, intervals, e); - if (m_util.is_const_char(c_lhs, c_lo) && c_rhs == m_ele && - m_util.is_const_char(p_lhs, p_lo) && p_rhs == m_ele && - c_lo <= p_lo) - return simplify_ite_rec(path, intervals, t); - if (c_lhs == m_ele && m_util.is_const_char(c_rhs, c_hi) && - p_lhs == m_ele && m_util.is_const_char(p_rhs, p_hi) && - c_hi >= p_hi) - return simplify_ite_rec(path, intervals, t); - } - } - } - - // Check if both range bounds are in path and c is (x = v) within range - expr* lhs_c = nullptr, * rhs_c = nullptr; - unsigned v_val = 0; - if (m.is_eq(c, lhs_c, rhs_c)) { - if (m.is_value(lhs_c)) std::swap(lhs_c, rhs_c); - if (m_util.is_const_char(rhs_c, v_val)) { - unsigned lo_bound = 0, hi_bound = UINT_MAX; - bool has_lo = false, has_hi = false; - for (auto const& [cond, sign] : path) { - if (sign) continue; - expr* le_lhs = nullptr, * le_rhs = nullptr; - if (m_util.is_char_le(cond, le_lhs, le_rhs)) { - unsigned bound = 0; - if (le_rhs == lhs_c && m_util.is_const_char(le_lhs, bound)) { - lo_bound = bound; has_lo = true; - } - if (le_lhs == lhs_c && m_util.is_const_char(le_rhs, bound)) { - hi_bound = bound; has_hi = true; - } - } - } - if (has_lo && has_hi && lo_bound <= v_val && v_val <= hi_bound) { - auto [st, se] = simplify_ite_rec(path, intervals, c, t, e); - return mk_ite(c, st, se); - } - } } // Cannot simplify c: recurse into branches with extended paths diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 41a3937c3..a524f39ee 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -125,7 +125,6 @@ namespace seq { void push_path(path_t& path, expr* c, bool sign); void push_intervals(intervals_t& intervals, expr* c, bool sign); lbool eval_cond(expr* cond); - lbool eval_path_cond(path_t const& path, expr* c); lbool eval_range_cond(intervals_t const& intervals, expr* c); static void intersect_intervals(unsigned lo, unsigned hi, intervals_t& ranges); static void exclude_interval(unsigned lo, unsigned hi, intervals_t& ranges, unsigned max_char); From 120b4e4712fc4456d3f776e03cc07c30b7984016 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 5 Jun 2026 01:37:10 -0700 Subject: [PATCH 15/32] cr updates Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_derive.cpp | 68 +++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 97aadb850..1b16c84b5 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -842,6 +842,17 @@ namespace seq { UNREACHABLE(); return expr_ref(m.mk_true(), m); } + // Remove subsumed elements: if a ⊆ b, drop a from union + for (unsigned i = 0; i < args.size(); ++i) { + for (unsigned j = 0; j < args.size(); ++j) { + if (i != j && args.get(i) && args.get(j) && is_subset(args.get(i), args.get(j))) { + args[i] = args.back(); + args.pop_back(); + --i; + break; + } + } + } if (args.size() == 1) return expr_ref(args.get(0), m); // Build right-associated union @@ -856,6 +867,17 @@ namespace seq { UNREACHABLE(); return expr_ref(m.mk_true(), m); } + // Remove subsuming elements: if a ⊆ b, drop b from intersection + for (unsigned i = 0; i < args.size(); ++i) { + for (unsigned j = 0; j < args.size(); ++j) { + if (i != j && args.get(i) && args.get(j) && is_subset(args.get(i), args.get(j))) { + args[j] = args.back(); + args.pop_back(); + if (j < i) --i; + --j; + } + } + } if (args.size() == 1) return expr_ref(args.get(0), m); // Build right-associated intersection @@ -958,62 +980,58 @@ namespace seq { // ------------------------------------------------------- lbool derive::eval_cond(expr* cond) { - expr* lhs = nullptr, * rhs = nullptr, * e1 = nullptr; - unsigned ch1 = 0, ch2 = 0; + expr* e1 = nullptr; if (m.is_true(cond)) return l_true; if (m.is_false(cond)) return l_false; - // elem = char or char = elem - if (m.is_eq(cond, lhs, rhs)) { - if (rhs == m_ele) std::swap(lhs, rhs); - if (lhs == m_ele && u().is_const_char(rhs, ch1) && u().is_const_char(m_ele, ch2)) - return ch1 == ch2 ? l_true : l_false; - if (lhs == rhs) return l_true; + // Use is_char_const_range to evaluate conditions involving m_ele + unsigned lo = 0, hi = 0, ele_val = 0; + bool negated = false; + if (m_util.is_char_const_range(m_ele, cond, lo, hi, negated) && u().is_const_char(m_ele, ele_val)) { + bool in_range = (lo <= ele_val && ele_val <= hi); + return (in_range != negated) ? l_true : l_false; } - // char_le(lhs, rhs) + // Handle self-equality and constant comparisons not involving m_ele + expr* lhs = nullptr, * rhs = nullptr; + if (m.is_eq(cond, lhs, rhs) && lhs == rhs) + return l_true; + + unsigned vl = 0, vr = 0; if (u().is_char_le(cond, lhs, rhs)) { - unsigned vl = 0, vr = 0; - if (lhs == m_ele && u().is_const_char(m_ele, vl) && u().is_const_char(rhs, vr)) - return vl <= vr ? l_true : l_false; - if (rhs == m_ele && u().is_const_char(lhs, vl) && u().is_const_char(m_ele, vr)) - return vl <= vr ? l_true : l_false; if (u().is_const_char(lhs, vl) && u().is_const_char(rhs, vr)) return vl <= vr ? l_true : l_false; - // char_le(0, x) is always true (chars are unsigned) if (u().is_const_char(lhs, vl) && vl == 0) return l_true; - // char_le(x, max_char) is always true if (u().is_const_char(rhs, vr) && vr == u().max_char()) return l_true; } // not(e1) - if (m.is_not(cond, e1)) { - lbool inner = eval_cond(e1); - if (inner != l_undef) - return inner == l_true ? l_false : l_true; - } + if (m.is_not(cond, e1)) + return ~eval_cond(e1); // and(...) if (m.is_and(cond)) { + lbool r = l_true; for (expr* arg : *to_app(cond)) { lbool v = eval_cond(arg); if (v == l_false) return l_false; - if (v == l_undef) return l_undef; + if (v == l_undef) r = l_undef; } - return l_true; + return r; } // or(...) if (m.is_or(cond)) { + lbool r = l_false; for (expr* arg : *to_app(cond)) { lbool v = eval_cond(arg); if (v == l_true) return l_true; - if (v == l_undef) return l_undef; + if (v == l_undef) r = l_undef; } - return l_false; + return r; } return l_undef; From f40eb62e83c706aaf4ff2312c8815796cab667ec Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 5 Jun 2026 11:49:35 -0700 Subject: [PATCH 16/32] handle more cass with intervals --- src/ast/rewriter/seq_derive.cpp | 145 +++++++++++++++++++++----------- src/ast/rewriter/seq_derive.h | 4 +- 2 files changed, 100 insertions(+), 49 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 1b16c84b5..047c5f5f1 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -1038,46 +1038,85 @@ namespace seq { } // Evaluate a single atomic condition (char_le or equality) against path constraints. - // Returns l_true if path implies cond, l_false if path contradicts cond, l_undef otherwise. + // Returns l_true if path implies (c, !sign), l_false if path contradicts (c, !sign), l_undef otherwise. - void derive::push_path(path_t& path, expr* c, bool sign) { - if (!sign && m.is_and(c)) { - for (expr* arg : *to_app(c)) - push_path(path, arg, false); - } else if (sign && m.is_or(c)) { - for (expr* arg : *to_app(c)) - push_path(path, arg, true); - } else { - path.push_back({ c, sign }); + lbool derive::push_path(path_t& path, expr* c, bool sign) { + // Check if (c, sign) is already determined by the path + for (auto const& [cond, csign] : path) { + if (c == cond) + return csign == sign ? l_true : l_false; + + expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr; + if (!csign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) { + if (m.is_value(lhs1)) std::swap(lhs1, rhs1); + if (m.is_value(lhs2)) std::swap(lhs2, rhs2); + if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2)) + return sign ? l_true : l_false; + } } + + // Composite case: conjunction (sign=false) or disjunction (sign=true) + if (!sign && m.is_and(c)) { + auto sz = path.size(); + lbool r = l_true; + for (expr* arg : *to_app(c)) { + lbool v = push_path(path, arg, false); + if (v == l_false) { path.shrink(sz); return l_false; } + if (v == l_undef) r = l_undef; + } + if (r == l_true) path.shrink(sz); + return r; + } + if (sign && m.is_or(c)) { + auto sz = path.size(); + lbool r = l_true; + for (expr* arg : *to_app(c)) { + lbool v = push_path(path, arg, true); + if (v == l_false) { path.shrink(sz); return l_false; } + if (v == l_undef) r = l_undef; + } + if (r == l_true) path.shrink(sz); + return r; + } + + // Atomic case: not determined, push onto path + path.push_back({ c, sign }); + return l_undef; } - void derive::push_intervals(intervals_t& intervals, expr* c, bool sign) { + lbool derive::push_intervals(intervals_t& intervals, expr* c, bool sign) { + // First check if the condition is already determined by current intervals + lbool range_val = eval_range_cond(intervals, c); + if (range_val != l_undef) + return sign ? ~range_val : range_val; + + // Not determined — modify intervals unsigned lo = 0, hi = 0; bool negated = false; if (m_util.is_char_const_range(m_ele, c, lo, hi, negated)) { - // is_char_const_range returns the range [lo, hi] such that - // c ≡ (lo <= x <= hi) when negated=false, or c ≡ ¬(lo <= x <= hi) when negated=true bool effective_neg = (negated != sign); - // effective_neg=false means condition is asserted true: intersect with [lo, hi] - // effective_neg=true means condition is negated: exclude [lo, hi] if (!effective_neg) { - if (lo <= hi) - intersect_intervals(lo, hi, intervals); - else - intervals.reset(); // contradictory range + if (lo > hi) + return l_false; + intersect_intervals(lo, hi, intervals); } else { if (lo <= hi) exclude_interval(lo, hi, intervals, u().max_char()); - // else: excluding empty range is a no-op } } else if (!sign && m.is_and(c)) { - for (expr* arg : *to_app(c)) - push_intervals(intervals, arg, false); + auto saved = intervals; + for (expr* arg : *to_app(c)) { + lbool v = push_intervals(intervals, arg, false); + if (v == l_false) { intervals = saved; return l_false; } + } } else if (sign && m.is_or(c)) { - for (expr* arg : *to_app(c)) - push_intervals(intervals, arg, true); + auto saved = intervals; + for (expr* arg : *to_app(c)) { + lbool v = push_intervals(intervals, arg, true); + if (v == l_false) { intervals = saved; return l_false; } + } } + return l_undef; } void derive::intersect_intervals(unsigned lo, unsigned hi, intervals_t& ranges) { @@ -1137,13 +1176,43 @@ namespace seq { std::pair derive::simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e) { auto sz = path.size(); auto saved_intervals = intervals; - push_path(path, c, false); - push_intervals(intervals, c, false); + + // Push c with sign=false (then-branch: c is true) + lbool path_val = push_path(path, c, false); + if (path_val != l_undef) { + path.shrink(sz); + expr_ref r = simplify_ite_rec(path, intervals, path_val == l_true ? t : e); + return { r, r }; + } + + lbool intv_val = push_intervals(intervals, c, false); + if (intv_val != l_undef) { + path.shrink(sz); + intervals = saved_intervals; + expr_ref r = simplify_ite_rec(path, intervals, intv_val == l_true ? t : e); + return { r, r }; + } + expr_ref st = simplify_ite_rec(path, intervals, t); path.shrink(sz); intervals = saved_intervals; - push_path(path, c, true); - push_intervals(intervals, c, true); + + // Push c with sign=true (else-branch: c is false) + path_val = push_path(path, c, true); + if (path_val != l_undef) { + path.shrink(sz); + expr_ref r = simplify_ite_rec(path, intervals, path_val == l_true ? e : t); + return { r, r }; + } + + intv_val = push_intervals(intervals, c, true); + if (intv_val != l_undef) { + path.shrink(sz); + intervals = saved_intervals; + expr_ref r = simplify_ite_rec(path, intervals, intv_val == l_true ? e : t); + return { r, r }; + } + expr_ref se = simplify_ite_rec(path, intervals, e); path.shrink(sz); intervals = saved_intervals; @@ -1176,26 +1245,8 @@ namespace seq { if (cond_val == l_true) return simplify_ite_rec(path, intervals, t); if (cond_val == l_false) return simplify_ite_rec(path, intervals, e); - // Use interval-based range reasoning - lbool range_val = eval_range_cond(intervals, c); - if (range_val == l_true) return simplify_ite_rec(path, intervals, t); - if (range_val == l_false) return simplify_ite_rec(path, intervals, e); - - // Check direct structural matches in path (equality conditions) - for (auto const& [cond, sign] : path) { - if (c == cond) - return sign ? simplify_ite_rec(path, intervals, e) : simplify_ite_rec(path, intervals, t); - - expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr; - if (!sign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) { - if (m.is_value(lhs1)) std::swap(lhs1, rhs1); - if (m.is_value(lhs2)) std::swap(lhs2, rhs2); - if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2)) - return simplify_ite_rec(path, intervals, e); - } - } - // Cannot simplify c: recurse into branches with extended paths + // push_path and push_intervals will check subsumption/contradiction auto [st, se] = simplify_ite_rec(path, intervals, c, t, e); return mk_ite(c, st, se); } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index a524f39ee..0c7961ff1 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -122,8 +122,8 @@ namespace seq { expr_ref simplify_ite(expr* d); expr_ref simplify_ite_rec(path_t& path, intervals_t& intervals, expr* d); std::pair simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e); - void push_path(path_t& path, expr* c, bool sign); - void push_intervals(intervals_t& intervals, expr* c, bool sign); + lbool push_path(path_t& path, expr* c, bool sign); + lbool push_intervals(intervals_t& intervals, expr* c, bool sign); lbool eval_cond(expr* cond); lbool eval_range_cond(intervals_t const& intervals, expr* c); static void intersect_intervals(unsigned lo, unsigned hi, intervals_t& ranges); From 583775129fdef11714f7c7e29e0466074de18973 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sat, 6 Jun 2026 11:34:26 -0700 Subject: [PATCH 17/32] conservative expansions --- benchmarks/instance08175.smt2 | 23 +++++ benchmarks/instance08315.smt2 | 22 +++++ benchmarks/instance08965.smt2 | 23 +++++ benchmarks/instance09159.smt2 | 23 +++++ benchmarks/instance11213.smt2 | 22 +++++ benchmarks/instance11705.smt2 | 23 +++++ benchmarks/instance11745.smt2 | 21 +++++ benchmarks/instance11856.smt2 | 21 +++++ benchmarks/instance12204.smt2 | 22 +++++ benchmarks/instance12488.smt2 | 22 +++++ benchmarks/instance12671.smt2 | 21 +++++ benchmarks/instance12768.smt2 | 22 +++++ benchmarks/instance12833.smt2 | 22 +++++ benchmarks/instance13062.smt2 | 23 +++++ benchmarks/instance13106.smt2 | 23 +++++ benchmarks/instance13975.smt2 | 22 +++++ benchmarks/instance14260.smt2 | 23 +++++ benchmarks/instance14326.smt2 | 21 +++++ benchmarks/instance14382.smt2 | 21 +++++ src/ast/rewriter/seq_derive.cpp | 157 +++++++++++++++++++++++++------- src/ast/rewriter/seq_derive.h | 22 ++++- 21 files changed, 564 insertions(+), 35 deletions(-) create mode 100644 benchmarks/instance08175.smt2 create mode 100644 benchmarks/instance08315.smt2 create mode 100644 benchmarks/instance08965.smt2 create mode 100644 benchmarks/instance09159.smt2 create mode 100644 benchmarks/instance11213.smt2 create mode 100644 benchmarks/instance11705.smt2 create mode 100644 benchmarks/instance11745.smt2 create mode 100644 benchmarks/instance11856.smt2 create mode 100644 benchmarks/instance12204.smt2 create mode 100644 benchmarks/instance12488.smt2 create mode 100644 benchmarks/instance12671.smt2 create mode 100644 benchmarks/instance12768.smt2 create mode 100644 benchmarks/instance12833.smt2 create mode 100644 benchmarks/instance13062.smt2 create mode 100644 benchmarks/instance13106.smt2 create mode 100644 benchmarks/instance13975.smt2 create mode 100644 benchmarks/instance14260.smt2 create mode 100644 benchmarks/instance14326.smt2 create mode 100644 benchmarks/instance14382.smt2 diff --git a/benchmarks/instance08175.smt2 b/benchmarks/instance08175.smt2 new file mode 100644 index 000000000..fe34ad2d5 --- /dev/null +++ b/benchmarks/instance08175.smt2 @@ -0,0 +1,23 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.++ (re.* (str.to_re " ")) (str.to_re "=") (re.* (str.to_re " ")) (re.* (str.to_re "\u{22}")) (str.to_re "cid") (re.* (str.to_re " ")) (str.to_re ":") (re.* (str.to_re " ")) (re.+ (re.union (str.to_re "\u{22}") (str.to_re "<") (str.to_re ">") (str.to_re " "))) (str.to_re "\u{a}")))) +(assert (str.in_re X (re.++ ((_ re.loop 1 11) (re.union (re.range "a" "z") (re.range "0" "9"))) (str.to_re "\u{a}")))) +(assert (str.in_re X (re.++ ((_ re.loop 3 3) (re.range "0" "9")) ((_ re.loop 1 1) (re.union (str.to_re "-") (str.to_re "|") (str.to_re "/"))) ((_ re.loop 6 6) (re.range "0" "9")) ((_ re.loop 1 1) (re.union (str.to_re "-") (str.to_re "|") (str.to_re "/"))) ((_ re.loop 6 6) (re.range "0" "9")) (str.to_re "\u{a}")))) +(assert (not (str.in_re X (str.to_re "http://tv.seekmo.com/showme.aspx?keyword=\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "@") (re.union (re.++ ((_ re.loop 2 255) (re.union (re.range "a" "z") (re.range "A" "Z") (re.range "0" "9") (str.to_re "-"))) (str.to_re ".") (re.union (str.to_re "ad") (str.to_re "ae") (str.to_re "af") (str.to_re "ag") (str.to_re "ai") (str.to_re "al") (str.to_re "am") (str.to_re "an") (str.to_re "ao") (str.to_re "aq") (str.to_re "ar") (str.to_re "as") (str.to_re "at") (str.to_re "au") (str.to_re "aw") (str.to_re "az") (str.to_re "ba") (str.to_re "bb") (str.to_re "bd") (str.to_re "be") (str.to_re "bf") (str.to_re "bg") (str.to_re "bh") (str.to_re "bi") (str.to_re "bj") (str.to_re "bm") (str.to_re "bn") (str.to_re "bo") (str.to_re "br") (str.to_re "bs") (str.to_re "bt") (str.to_re "bv") (str.to_re "bw") (str.to_re "by") (str.to_re "bz") (str.to_re "ca") (str.to_re "cc") (str.to_re "cf") (str.to_re "cg") (str.to_re "ch") (str.to_re "ci") (str.to_re "ck") (str.to_re "cl") (str.to_re "cm") (str.to_re "cn") (str.to_re "co") (str.to_re "cr") (str.to_re "cu") (str.to_re "cv") (str.to_re "cx") (str.to_re "cy") (str.to_re "cz") (str.to_re "de") (str.to_re "di") (str.to_re "dk") (str.to_re "dm") (str.to_re "do") (str.to_re "dz") (str.to_re "ec") (str.to_re "ee") (str.to_re "eg") (str.to_re "eh") (str.to_re "er") (str.to_re "es") (str.to_re "et") (str.to_re "fi") (str.to_re "fj") (str.to_re "fk") (str.to_re "fm") (str.to_re "fo") (str.to_re "fr") (str.to_re "ga") (str.to_re "gb") (str.to_re "gd") (str.to_re "ge") (str.to_re "gf") (str.to_re "gh") (str.to_re "gi") (str.to_re "gl") (str.to_re "gm") (str.to_re "gn") (str.to_re "gp") (str.to_re "gq") (str.to_re "gr") (str.to_re "gs") (str.to_re "gt") (str.to_re "gu") (str.to_re "gw") (str.to_re "gy") (str.to_re "hk") (str.to_re "hm") (str.to_re "hn") (str.to_re "hr") (str.to_re "ht") (str.to_re "hu") (str.to_re "id") (str.to_re "ie") (str.to_re "il") (str.to_re "in") (str.to_re "io") (str.to_re "iq") (str.to_re "ir") (str.to_re "is") (str.to_re "it") (str.to_re "jo") (str.to_re "jm") (str.to_re "jp") (str.to_re "ke") (str.to_re "kg") (str.to_re "kh") (str.to_re "ki") (str.to_re "km") (str.to_re "kn") (str.to_re "kp") (str.to_re "kr") (str.to_re "kw") (str.to_re "ky") (str.to_re "kz") (str.to_re "la") (str.to_re "lb") (str.to_re "lc") (str.to_re "li") (str.to_re "lk") (str.to_re "lr") (str.to_re "ls") (str.to_re "lt") (str.to_re "lu") (str.to_re "lv") (str.to_re "ly") (str.to_re "ma") (str.to_re "mc") (str.to_re "md") (str.to_re "mg") (str.to_re "mh") (str.to_re "mk") (str.to_re "ml") (str.to_re "mm") (str.to_re "mn") (str.to_re "mo") (str.to_re "mp") (str.to_re "mq") (str.to_re "mr") (str.to_re "ms") (str.to_re "mt") (str.to_re "mu") (str.to_re "mv") (str.to_re "mw") (str.to_re "mx") (str.to_re "my") (str.to_re "mz") (str.to_re "an") (str.to_re "nc") (str.to_re "ne") (str.to_re "nf") (str.to_re "ng") (str.to_re "ni") (str.to_re "nl") (str.to_re "no") (str.to_re "np") (str.to_re "nr") (str.to_re "nt") (str.to_re "nu") (str.to_re "nz") (str.to_re "om") (str.to_re "pa") (str.to_re "pe") (str.to_re "pf") (str.to_re "pg") (str.to_re "ph") (str.to_re "pk") (str.to_re "pl") (str.to_re "pm") (str.to_re "pn") (str.to_re "pr") (str.to_re "pt") (str.to_re "pw") (str.to_re "py") (str.to_re "qa") (str.to_re "re") (str.to_re "ro") (str.to_re "ru") (str.to_re "rw") (str.to_re "sa") (str.to_re "sb") (str.to_re "sc") (str.to_re "sd") (str.to_re "se") (str.to_re "sq") (str.to_re "sh") (str.to_re "si") (str.to_re "sj") (str.to_re "sk") (str.to_re "sl") (str.to_re "sm") (str.to_re "sn") (str.to_re "so") (str.to_re "sr") (str.to_re "st") (str.to_re "su") (str.to_re "sv") (str.to_re "sy") (str.to_re "sz") (str.to_re "tc") (str.to_re "td") (str.to_re "tf") (str.to_re "tg") (str.to_re "th") (str.to_re "tj") (str.to_re "tk") (str.to_re "tm") (str.to_re "tn") (str.to_re "to") (str.to_re "tp") (str.to_re "tr") (str.to_re "tt") (str.to_re "tv") (str.to_re "tw") (str.to_re "tz") (str.to_re "ua") (str.to_re "ug") (str.to_re "uk") (str.to_re "um") (str.to_re "us") (str.to_re "uy") (str.to_re "uz") (str.to_re "va") (str.to_re "vc") (str.to_re "ve") (str.to_re "vg") (str.to_re "vi") (str.to_re "vn") (str.to_re "vu") (str.to_re "wf") (str.to_re "ws") (str.to_re "ye") (str.to_re "yt") (str.to_re "yu") (str.to_re "za") (str.to_re "zm") (str.to_re "zr") (str.to_re "zw") (str.to_re "arpa") (str.to_re "arts") (str.to_re "biz") (str.to_re "com") (str.to_re "edu") (str.to_re "firm") (str.to_re "gov") (str.to_re "info") (str.to_re "int") (str.to_re "mil") (str.to_re "nato") (str.to_re "net") (str.to_re "nom") (str.to_re "org") (str.to_re "rec") (str.to_re "store") (str.to_re "web"))) (re.++ (re.union (re.++ (str.to_re "25") (re.range "0" "5")) (re.++ (str.to_re "2") (re.range "0" "4") (re.range "0" "9")) (re.++ ((_ re.loop 1 1) (re.range "0" "1")) ((_ re.loop 2 2) (re.range "0" "9"))) (re.++ ((_ re.loop 1 1) (re.range "1" "9")) ((_ re.loop 1 1) (re.range "0" "9"))) (re.range "1" "9")) (str.to_re ".") (re.union (re.++ (str.to_re "25") (re.range "0" "5")) (re.++ (str.to_re "2") (re.range "0" "4") (re.range "0" "9")) (re.++ ((_ re.loop 1 1) (re.range "0" "1")) ((_ re.loop 2 2) (re.range "0" "9"))) (re.++ ((_ re.loop 1 1) (re.range "1" "9")) ((_ re.loop 1 1) (re.range "0" "9"))) (re.range "1" "9") (str.to_re "0")) (str.to_re ".") (re.union (re.++ (str.to_re "25") (re.range "0" "5")) (re.++ (str.to_re "2") (re.range "0" "4") (re.range "0" "9")) (re.++ ((_ re.loop 1 1) (re.range "0" "1")) ((_ re.loop 2 2) (re.range "0" "9"))) (re.++ ((_ re.loop 1 1) (re.range "1" "9")) ((_ re.loop 1 1) (re.range "0" "9"))) (re.range "1" "9") (str.to_re "0")) (str.to_re ".") (re.union (re.++ (str.to_re "25") (re.range "0" "5")) (re.++ (str.to_re "2") (re.range "0" "4") (re.range "0" "9")) (re.++ ((_ re.loop 1 1) (re.range "0" "1")) ((_ re.loop 2 2) (re.range "0" "9"))) (re.++ ((_ re.loop 1 1) (re.range "1" "9")) ((_ re.loop 1 1) (re.range "0" "9"))) (re.range "0" "9")))) (str.to_re "\u{a}") (re.+ (re.union (str.to_re "_") (re.range "a" "z") (re.range "A" "Z") (re.range "0" "9") (str.to_re "-"))) (str.to_re ".") (re.* (re.union (str.to_re "_") (re.range "a" "z") (re.range "A" "Z") (re.range "0" "9") (str.to_re "-")))))) +(check-sat) + +(exit) diff --git a/benchmarks/instance08315.smt2 b/benchmarks/instance08315.smt2 new file mode 100644 index 000000000..45eecdd57 --- /dev/null +++ b/benchmarks/instance08315.smt2 @@ -0,0 +1,22 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status sat) + +(declare-const X String) +(assert (not (str.in_re X (re.++ (str.to_re "\u{a}") (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "3"))) (str.to_re ":") (re.range "0" "5") (re.range "0" "9"))))) +(assert (not (str.in_re X (re.++ (re.union (str.to_re "0") (re.++ (re.range "1" "9") ((_ re.loop 0 3) (re.range "0" "9"))) (re.++ (re.range "1" "5") ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (str.to_re "6") (re.range "0" "5") (re.range "0" "5") (re.union (re.++ (re.range "0" "2") (re.range "0" "9")) (re.++ (str.to_re "3") (re.range "0" "5"))))) (str.to_re "\u{a}"))))) +(assert (not (str.in_re X (re.++ (re.union (str.to_re "ac") (str.to_re "AC") (str.to_re "al") (str.to_re "AL") (str.to_re "am") (str.to_re "AM") (str.to_re "ap") (str.to_re "AP") (str.to_re "ba") (str.to_re "BA") (str.to_re "ce") (str.to_re "CE") (str.to_re "df") (str.to_re "DF") (str.to_re "es") (str.to_re "ES") (str.to_re "go") (str.to_re "GO") (str.to_re "ma") (str.to_re "MA") (str.to_re "mg") (str.to_re "MG") (str.to_re "ms") (str.to_re "MS") (str.to_re "mt") (str.to_re "MT") (str.to_re "pa") (str.to_re "PA") (str.to_re "pb") (str.to_re "PB") (str.to_re "pe") (str.to_re "PE") (str.to_re "pi") (str.to_re "PI") (str.to_re "pr") (str.to_re "PR") (str.to_re "rj") (str.to_re "RJ") (str.to_re "rn") (str.to_re "RN") (str.to_re "ro") (str.to_re "RO") (str.to_re "rr") (str.to_re "RR") (str.to_re "rs") (str.to_re "RS") (str.to_re "sc") (str.to_re "SC") (str.to_re "se") (str.to_re "SE") (str.to_re "sp") (str.to_re "SP") (str.to_re "to") (str.to_re "TO")) (str.to_re "\u{a}"))))) +(assert (not (str.in_re X (re.++ (re.union (re.++ (re.union (re.++ (re.* (str.to_re "0")) (re.range "1" "9")) (re.++ (re.union (str.to_re "1") (str.to_re "2")) (re.range "0" "9")) (re.++ (str.to_re "3") (re.union (str.to_re "0") (str.to_re "1")))) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (re.++ (re.* (str.to_re "0")) (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "8"))) (str.to_re "10") (str.to_re "12")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (re.++ (re.* (str.to_re "0")) (re.range "1" "9")) (re.++ (re.union (str.to_re "1") (str.to_re "2")) (re.range "0" "9")) (str.to_re "30")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (re.++ (re.* (str.to_re "0")) (re.union (str.to_re "4") (str.to_re "6") (str.to_re "9"))) (str.to_re "11")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (re.++ (re.* (str.to_re "0")) (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "8"))) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (str.to_re "29") (re.union (str.to_re ".") (str.to_re "-") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "0") (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "0") (str.to_re "4") (str.to_re "8")) (str.to_re "00")) (re.++ (str.to_re "29") (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "2") (str.to_re "6")) (str.to_re "00")) (re.++ (str.to_re "29") (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.range "0" "9") (re.range "0" "9") (str.to_re "0") (re.union (str.to_re "4") (str.to_re "8"))) (re.++ (str.to_re "29") (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.range "0" "9") (re.range "0" "9") (re.union (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "0") (str.to_re "4") (str.to_re "8"))) (re.++ (str.to_re "29") (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.range "0" "9") (re.range "0" "9") (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "2") (str.to_re "6")))) (str.to_re "\u{a}"))))) +(check-sat) + +(exit) diff --git a/benchmarks/instance08965.smt2 b/benchmarks/instance08965.smt2 new file mode 100644 index 000000000..4449c9d4e --- /dev/null +++ b/benchmarks/instance08965.smt2 @@ -0,0 +1,23 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.++ (str.to_re "qr/") (re.union (str.to_re "Alabama") (str.to_re "Alaska") (str.to_re "Arizona") (str.to_re "Arkansas") (str.to_re "California") (str.to_re "Colorado") (str.to_re "Connecticut") (str.to_re "Delaware") (str.to_re "Florida") (str.to_re "Georgia") (str.to_re "Hawaii") (str.to_re "Idaho") (str.to_re "Illinois") (str.to_re "Indiana") (str.to_re "Iowa") (str.to_re "Kansas") (str.to_re "Kentucky") (str.to_re "Louisiana") (str.to_re "Maine") (str.to_re "Maryland") (str.to_re "Massachusetts") (str.to_re "Michigan") (str.to_re "Minnesota") (str.to_re "Mississippi") (str.to_re "Missouri") (str.to_re "Montana") (str.to_re "Nebraska") (str.to_re "Nevada") (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Hampshire")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Jersey")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Mexico")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "York")) (re.++ (str.to_re "North") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Carolina")) (re.++ (str.to_re "North") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Dakota")) (str.to_re "Ohio") (str.to_re "Oklahoma") (str.to_re "Oregon") (str.to_re "Pennsylvania") (re.++ (str.to_re "Rhode") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Island")) (re.++ (str.to_re "South") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Carolina")) (re.++ (str.to_re "South") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Dakota")) (str.to_re "Tennessee") (str.to_re "Texas") (str.to_re "Utah") (str.to_re "Vermont") (str.to_re "Virginia") (str.to_re "Washington") (re.++ (str.to_re "West") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Virginia")) (str.to_re "Wisconsin") (str.to_re "Wyoming")) (str.to_re "/\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "6") ((_ re.loop 7 7) (re.range "0" "9")) (str.to_re "\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "/Referer:") (re.+ (re.union (str.to_re "\u{d}") (str.to_re "\u{a}"))) (str.to_re "/.html\u{d}/Hsm\u{a}") ((_ re.loop 32 32) (re.union (str.to_re "_") (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (re.* (re.union (str.to_re "_") (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_")))))) +(assert (str.in_re X (re.++ (str.to_re "Wareztv.seekmo.com") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Keylogging\u{13}TRUSTYFILES.COM\u{a}")))) +(assert (not (str.in_re X (re.++ (str.to_re "Flooded") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "Host:") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "AppName/GRSI|Server|\u{13}Host:origin=sidefindHost:User-Agent:\u{a}"))))) +(check-sat) + +(exit) diff --git a/benchmarks/instance09159.smt2 b/benchmarks/instance09159.smt2 new file mode 100644 index 000000000..2d5e138d9 --- /dev/null +++ b/benchmarks/instance09159.smt2 @@ -0,0 +1,23 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status sat) + +(declare-const X String) +(assert (not (str.in_re X (re.++ (str.to_re "__") (re.+ re.allchar) (str.to_re "__\u{a}"))))) +(assert (not (str.in_re X (re.++ (str.to_re "/.avi") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}"))))) +(assert (not (str.in_re X (re.++ (str.to_re "User") (re.* re.allchar) (str.to_re "User-Agent:") (re.* re.allchar) (str.to_re "ResultATTENTION:riggiymd/wdhi.vhi\u{a}"))))) +(assert (str.in_re X (re.++ (re.union (re.++ (str.to_re "A") (re.union (str.to_re "BW") (str.to_re "FG") (str.to_re "GO") (str.to_re "IA") (re.++ (str.to_re "L") (re.union (str.to_re "A") (str.to_re "B"))) (re.++ (str.to_re "N") (re.union (str.to_re "D") (str.to_re "T"))) (re.++ (str.to_re "R") (re.union (str.to_re "E") (str.to_re "G") (str.to_re "M"))) (str.to_re "SM") (re.++ (str.to_re "T") (re.union (str.to_re "A") (str.to_re "F") (str.to_re "G"))) (re.++ (str.to_re "U") (re.union (str.to_re "S") (str.to_re "T"))) (str.to_re "ZE"))) (re.++ (str.to_re "B") (re.union (str.to_re "DI") (re.++ (str.to_re "E") (re.union (str.to_re "L") (str.to_re "N"))) (str.to_re "FA") (re.++ (str.to_re "G") (re.union (str.to_re "D") (str.to_re "R"))) (re.++ (str.to_re "H") (re.union (str.to_re "R") (str.to_re "S"))) (str.to_re "IH") (re.++ (str.to_re "L") (re.union (str.to_re "M") (str.to_re "R") (str.to_re "Z"))) (str.to_re "MU") (str.to_re "OL") (re.++ (str.to_re "R") (re.union (str.to_re "A") (str.to_re "B") (str.to_re "N"))) (str.to_re "TN") (str.to_re "VT") (str.to_re "WA"))) (re.++ (str.to_re "C") (re.union (re.++ (str.to_re "A") (re.union (str.to_re "F") (str.to_re "N"))) (str.to_re "CK") (re.++ (str.to_re "H") (re.union (str.to_re "E") (str.to_re "L") (str.to_re "N"))) (str.to_re "IV") (str.to_re "MR") (re.++ (str.to_re "O") (re.union (str.to_re "D") (str.to_re "G") (str.to_re "K") (str.to_re "L") (str.to_re "M"))) (str.to_re "PV") (str.to_re "RI") (str.to_re "UB") (str.to_re "XR") (re.++ (str.to_re "Y") (re.union (str.to_re "M") (str.to_re "P"))) (str.to_re "ZE"))) (re.++ (str.to_re "D") (re.union (str.to_re "EU") (str.to_re "JI") (str.to_re "MA") (str.to_re "NK") (str.to_re "OM") (str.to_re "ZA"))) (re.++ (str.to_re "E") (re.union (str.to_re "CU") (str.to_re "GY") (str.to_re "RI") (re.++ (str.to_re "S") (re.union (str.to_re "H") (str.to_re "P") (str.to_re "T"))) (str.to_re "TH"))) (re.++ (str.to_re "F") (re.union (str.to_re "IN") (str.to_re "JI") (str.to_re "LK") (re.++ (str.to_re "R") (re.union (str.to_re "A") (str.to_re "O"))) (str.to_re "SM"))) (re.++ (str.to_re "G") (re.union (str.to_re "AB") (str.to_re "BR") (str.to_re "EO") (str.to_re "GY") (str.to_re "HA") (re.++ (str.to_re "I") (re.union (str.to_re "B") (str.to_re "N"))) (str.to_re "LP") (str.to_re "MB") (str.to_re "NQ") (str.to_re "NB") (re.++ (str.to_re "R") (re.union (str.to_re "C") (str.to_re "D") (str.to_re "L"))) (str.to_re "TM") (re.++ (str.to_re "U") (re.union (str.to_re "F") (str.to_re "M") (str.to_re "Y"))))) (re.++ (str.to_re "H") (re.union (str.to_re "KG") (str.to_re "MD") (str.to_re "ND") (str.to_re "RV") (str.to_re "TI") (str.to_re "UN"))) (re.++ (str.to_re "I") (re.union (str.to_re "DN") (str.to_re "MN") (str.to_re "ND") (str.to_re "OT") (re.++ (str.to_re "R") (re.union (str.to_re "L") (str.to_re "N") (str.to_re "Q"))) (re.++ (str.to_re "S") (re.union (str.to_re "L") (str.to_re "R"))) (str.to_re "TA"))) (re.++ (str.to_re "J") (re.union (str.to_re "AM") (str.to_re "EY") (str.to_re "OR") (str.to_re "PN"))) (re.++ (str.to_re "K") (re.union (str.to_re "AZ") (str.to_re "EN") (str.to_re "GZ") (str.to_re "HM") (str.to_re "IR") (str.to_re "NA") (str.to_re "OR") (str.to_re "WT"))) (re.++ (str.to_re "L") (re.union (str.to_re "AO") (re.++ (str.to_re "B") (re.union (str.to_re "N") (str.to_re "R") (str.to_re "Y"))) (str.to_re "CA") (str.to_re "IE") (str.to_re "KA") (str.to_re "SO") (str.to_re "TU") (str.to_re "UX") (str.to_re "VA"))) (re.++ (str.to_re "M") (re.union (re.++ (str.to_re "A") (re.union (str.to_re "C") (str.to_re "F") (str.to_re "R"))) (str.to_re "CO") (re.++ (str.to_re "D") (re.union (str.to_re "A") (str.to_re "G") (str.to_re "V"))) (str.to_re "EX") (str.to_re "HL") (str.to_re "KD") (re.++ (str.to_re "L") (re.union (str.to_re "I") (str.to_re "T"))) (str.to_re "MR") (re.++ (str.to_re "N") (re.union (str.to_re "E") (str.to_re "G") (str.to_re "P"))) (str.to_re "OZ") (str.to_re "RT") (str.to_re "SR") (str.to_re "TQ") (str.to_re "US") (str.to_re "WI") (re.++ (str.to_re "Y") (re.union (str.to_re "S") (str.to_re "T"))))) (re.++ (str.to_re "N") (re.union (str.to_re "AM") (str.to_re "CL") (str.to_re "ER") (str.to_re "FK") (str.to_re "GA") (re.++ (str.to_re "I") (re.union (str.to_re "C") (str.to_re "U"))) (str.to_re "LD") (str.to_re "OR") (str.to_re "PL") (str.to_re "RU") (str.to_re "ZL"))) (str.to_re "OMN") (re.++ (str.to_re "P") (re.union (re.++ (str.to_re "A") (re.union (str.to_re "K") (str.to_re "N"))) (str.to_re "CN") (str.to_re "ER") (str.to_re "HL") (str.to_re "LW") (str.to_re "NG") (str.to_re "OL") (re.++ (str.to_re "R") (re.union (str.to_re "I") (str.to_re "K") (str.to_re "T") (str.to_re "Y"))) (str.to_re "SE") (str.to_re "YF"))) (str.to_re "QAT") (re.++ (str.to_re "R") (re.union (str.to_re "EU") (str.to_re "OU") (str.to_re "US") (str.to_re "WA"))) (re.++ (str.to_re "S") (re.union (str.to_re "AU") (str.to_re "DN") (str.to_re "EN") (re.++ (str.to_re "G") (re.union (str.to_re "P") (str.to_re "S"))) (str.to_re "HN") (str.to_re "JM") (re.++ (str.to_re "L") (re.union (str.to_re "B") (str.to_re "E") (str.to_re "V"))) (str.to_re "MR") (str.to_re "OM") (str.to_re "PM") (str.to_re "RB") (str.to_re "TP") (str.to_re "UR") (re.++ (str.to_re "V") (re.union (str.to_re "K") (str.to_re "N"))) (re.++ (str.to_re "W") (re.union (str.to_re "E") (str.to_re "Z"))) (re.++ (str.to_re "Y") (re.union (str.to_re "C") (str.to_re "R"))))) (re.++ (str.to_re "T") (re.union (re.++ (str.to_re "C") (re.union (str.to_re "A") (str.to_re "D"))) (str.to_re "GO") (str.to_re "HA") (str.to_re "JK") (re.++ (str.to_re "K") (re.union (str.to_re "L") (str.to_re "M"))) (str.to_re "LS") (str.to_re "ON") (str.to_re "TO") (re.++ (str.to_re "U") (re.union (str.to_re "N") (str.to_re "R") (str.to_re "V"))) (str.to_re "WN") (str.to_re "ZA"))) (re.++ (str.to_re "U") (re.union (str.to_re "EN") (str.to_re "GA") (str.to_re "KR") (str.to_re "MI") (str.to_re "RY") (str.to_re "SA") (str.to_re "ZB"))) (re.++ (str.to_re "V") (re.union (str.to_re "AT") (str.to_re "CT") (str.to_re "GB") (str.to_re "IR") (str.to_re "NM") (str.to_re "UT"))) (re.++ (str.to_re "W") (re.union (str.to_re "LF") (str.to_re "SM"))) (str.to_re "YEM") (re.++ (str.to_re "Z") (re.union (str.to_re "AF") (str.to_re "MB") (str.to_re "WE")))) (str.to_re "\u{a}")))) +(assert (not (str.in_re X (re.++ (re.* (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re "-") ((_ re.loop 2 2) (re.range "0" "9")))) (str.to_re "\u{a}"))))) +(check-sat) + +(exit) diff --git a/benchmarks/instance11213.smt2 b/benchmarks/instance11213.smt2 new file mode 100644 index 000000000..0bdec4a17 --- /dev/null +++ b/benchmarks/instance11213.smt2 @@ -0,0 +1,22 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (not (str.in_re X (re.++ (str.to_re "//") ((_ re.loop 48 48) (re.union (str.to_re "-") (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (str.to_re "/U\u{a}"))))) +(assert (str.in_re X (re.++ (re.opt (re.++ (re.opt (str.to_re " ")) (re.opt (str.to_re "+")) (str.to_re "34"))) (re.union (re.++ (str.to_re "6") (re.union ((_ re.loop 8 8) (re.range "0" "9")) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 6 6) (re.range "0" "9"))) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 3 3) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 3 3) (re.range "0" "9"))))) (re.++ (str.to_re "9") (re.union ((_ re.loop 8 8) (re.range "0" "9")) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 6 6) (re.range "0" "9"))) (re.++ (re.range "1" "9") (str.to_re " ") ((_ re.loop 7 7) (re.range "0" "9"))) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 3 3) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 3 3) (re.range "0" "9"))) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 2 2) (re.range "0" "9")))))) (str.to_re "\u{a}")))) +(assert (str.in_re X (re.++ (re.+ (re.union (re.range "a" "z") (re.range "A" "Z") (re.range "0" "9") (str.to_re ".") (str.to_re "-") (str.to_re "_") (str.to_re "'") (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "\u{a}")))) +(assert (str.in_re X (re.union (re.++ (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "general") (str.to_re "unicode") (str.to_re "roman") (str.to_re "slovak") (str.to_re "czech") (str.to_re "icelandic") (re.++ (re.union (str.to_re "latv") (str.to_re "pers")) (str.to_re "ian")) (re.++ (re.union (str.to_re "dan") (str.to_re "pol") (str.to_re "span") (str.to_re "swed") (str.to_re "turk")) (str.to_re "ish")) (str.to_re "spanish2") (re.++ (re.union (str.to_re "esto") (str.to_re "lithua") (str.to_re "roma") (str.to_re "slove")) (str.to_re "nian_ci"))) (str.to_re "u") (re.union (str.to_re "cs2") (str.to_re "tf8"))) (re.++ (re.union (re.++ (str.to_re "mac") (re.union (str.to_re "ce") (str.to_re "roman"))) (re.++ (str.to_re "cp") (re.union (re.++ (str.to_re "8") (re.union (re.++ (str.to_re "5") (re.union (str.to_re "0") (str.to_re "2"))) (str.to_re "66"))) (str.to_re "1256"))) (str.to_re "armscii8") (str.to_re "geostd8") (str.to_re "ascii") (str.to_re "keybcs2") (str.to_re "greek") (str.to_re "hebrew") (re.++ (str.to_re "koi8") (re.union (str.to_re "r") (str.to_re "u")))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "general_ci"))) (re.++ (re.union (str.to_re "dec8") (str.to_re "swe7")) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "swedish_ci"))) (re.++ (re.union (str.to_re "hp8") (str.to_re "latin5")) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "english_ci"))) (re.++ (re.union (str.to_re "big5") (re.++ (str.to_re "gb") (re.union (str.to_re "2312") (str.to_re "k")))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "chinese_ci"))) (re.++ (re.union (str.to_re "cp932") (str.to_re "eucjpms") (re.++ (re.union (str.to_re "s") (str.to_re "u")) (str.to_re "jis"))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "japanese_ci"))) (re.++ (str.to_re "euckr_") (re.union (str.to_re "bin") (str.to_re "korean_ci"))) (re.++ (str.to_re "tis620_") (re.union (str.to_re "bin") (str.to_re "thai_ci"))) (re.++ (str.to_re "latin1_") (re.union (str.to_re "bin") (re.++ (re.union (re.++ (re.union (str.to_re "dan") (str.to_re "span") (str.to_re "swed")) (str.to_re "ish")) (re.++ (str.to_re "german") (re.union (str.to_re "1") (str.to_re "2")))) (str.to_re "_ci")) (re.++ (str.to_re "general_c") (re.union (str.to_re "i") (str.to_re "s"))))) (re.++ (str.to_re "cp1250_") (re.union (str.to_re "bin") (str.to_re "czech_cs") (str.to_re "general_ci"))) (re.++ (str.to_re "latin2_") (re.union (str.to_re "bin") (str.to_re "czech_cs") (re.++ (re.union (str.to_re "general") (str.to_re "hungarian") (str.to_re "croatian")) (str.to_re "_ci")))) (re.++ (str.to_re "cp1257_") (re.union (str.to_re "bin") (re.++ (re.union (str.to_re "general") (str.to_re "lithuanian")) (str.to_re "_ci")))) (re.++ (str.to_re "latin7_") (re.union (str.to_re "bin") (re.++ (str.to_re "general_c") (re.union (str.to_re "i") (str.to_re "s"))) (str.to_re "estonian_cs"))) (re.++ (str.to_re "\u{a}cp1251_") (re.union (str.to_re "bin") (re.++ (re.union (str.to_re "general") (str.to_re "bulgarian") (str.to_re "ukrainian")) (str.to_re "_ci")) (str.to_re "general_cs")))))) +(check-sat) + +(exit) diff --git a/benchmarks/instance11705.smt2 b/benchmarks/instance11705.smt2 new file mode 100644 index 000000000..2c344ae0e --- /dev/null +++ b/benchmarks/instance11705.smt2 @@ -0,0 +1,23 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.++ (str.to_re "pjpoptwql/rlnj") (re.+ (re.range "0" "9")) (str.to_re "waiting") (re.+ (re.range "0" "9")) (str.to_re "ocllceclbhs/gth") (re.+ (re.union (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (str.to_re "gdvsotuqwsg/dxt.hdUser-Agent:\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "Host:") (re.range "0" "9") (str.to_re "Keylogger") (re.* re.allchar) (str.to_re "Onetrustyfiles.com\u{a}")))) +(assert (str.in_re X (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re "-") (re.union (str.to_re "Jan") (str.to_re "Feb") (str.to_re "Mar") (str.to_re "Apr") (str.to_re "May") (str.to_re "Jun") (str.to_re "Jul") (str.to_re "Aug") (str.to_re "Sep") (str.to_re "Oct") (str.to_re "Nov") (str.to_re "Dec") (str.to_re "jan") (str.to_re "feb") (str.to_re "mar") (str.to_re "apr") (str.to_re "may") (str.to_re "jun") (str.to_re "jul") (str.to_re "aug") (str.to_re "sep") (str.to_re "oct") (str.to_re "nov") (str.to_re "dec") (str.to_re "JAN") (str.to_re "FEB") (str.to_re "MAR") (str.to_re "APR") (str.to_re "MAY") (str.to_re "JUN") (str.to_re "JUL") (str.to_re "AUG") (str.to_re "SEP") (str.to_re "OCT") (str.to_re "NOV") (str.to_re "DEC")) (str.to_re "-") ((_ re.loop 4 4) (re.range "0" "9")) (str.to_re "\u{a}")))) +(assert (not (str.in_re X (re.++ (re.range "\u{80}" "\u{ff}") (str.to_re "\u{a}"))))) +(assert (str.in_re X (re.++ (str.to_re "/.xlw") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance11745.smt2 b/benchmarks/instance11745.smt2 new file mode 100644 index 000000000..246a3f777 --- /dev/null +++ b/benchmarks/instance11745.smt2 @@ -0,0 +1,21 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.union (str.to_re "EMA") (str.to_re "QCY") (str.to_re "SQZ") (str.to_re "ORM") (str.to_re "NQT") (str.to_re "WTN") (str.to_re "CBG") (str.to_re "QFO") (str.to_re "BEQ") (str.to_re "LKZ") (str.to_re "LTN") (str.to_re "KNF") (str.to_re "MHZ") (str.to_re "NWI") (str.to_re "CLF") (str.to_re "QUY") (str.to_re "SEN") (str.to_re "STN") (str.to_re "BEQ") (str.to_re "BQH") (str.to_re "LHR") (str.to_re "NHT") (str.to_re "LCY") (str.to_re "MME") (str.to_re "NCL") (str.to_re "BWF") (str.to_re "BLK") (str.to_re "CAX") (str.to_re "LPL") (str.to_re "MAN") (str.to_re "BBP") (str.to_re "BEX") (str.to_re "BZZ") (str.to_re "LGW") (str.to_re "SOU") (str.to_re "FAB") (str.to_re "OXF") (str.to_re "ESH") (str.to_re "QLA") (str.to_re "LYX") (str.to_re "KRH") (str.to_re "ODH") (str.to_re "RCS") (str.to_re "QUC") (str.to_re "BBS") (str.to_re "GLO") (str.to_re "EXT") (str.to_re "FFD") (str.to_re "BOH") (str.to_re "LYE") (str.to_re "NQY") (str.to_re "LEQ") (str.to_re "ISC") (str.to_re "UPV") (str.to_re "BRS") (str.to_re "YEO") (str.to_re "CVT") (str.to_re "BHX") (str.to_re "DSA") (str.to_re "HUY") (str.to_re "LBA") (str.to_re "HRT") (str.to_re "BFS") (str.to_re "BHD") (str.to_re "LDY") (str.to_re "ENK") (str.to_re "ABZ") (str.to_re "OBN") (str.to_re "BEB") (str.to_re "BRR") (str.to_re "CAL") (str.to_re "COL") (str.to_re "CSA") (str.to_re "NRL") (str.to_re "INV") (str.to_re "SCS") (str.to_re "DND") (str.to_re "LSI") (str.to_re "EOI") (str.to_re "EDI") (str.to_re "FIE") (str.to_re "FOA") (str.to_re "ILY") (str.to_re "FSS") (str.to_re "NDY") (str.to_re "ADX") (str.to_re "LMO") (str.to_re "OUK") (str.to_re "PSV") (str.to_re "PPW") (str.to_re "PIK") (str.to_re "GLA") (str.to_re "KOI") (str.to_re "PSL") (str.to_re "SYY") (str.to_re "SKL") (str.to_re "SOY") (str.to_re "LWK") (str.to_re "TRE") (str.to_re "WRY") (str.to_re "WHS") (str.to_re "WIC") (str.to_re "HAW") (str.to_re "CEG") (str.to_re "VLY") (str.to_re "SWS") (str.to_re "CWL") (str.to_re "DGX") (str.to_re "ACI") (str.to_re "GCI") (str.to_re "IOM") (str.to_re "JER\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "/User-Agent: ") ((_ re.loop 9 9) (re.range "A" "Z")) (str.to_re "\u{d}\u{a}/Hm\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "Admin") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "daosearch.comMyPostwww.raxsearch.comref=%user_id\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance11856.smt2 b/benchmarks/instance11856.smt2 new file mode 100644 index 000000000..0c9a4d020 --- /dev/null +++ b/benchmarks/instance11856.smt2 @@ -0,0 +1,21 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status sat) + +(declare-const X String) +(assert (not (str.in_re X (re.++ (str.to_re "/filename=") (re.* (re.comp (str.to_re "\u{a}"))) (str.to_re ".addin/i\u{a}"))))) +(assert (not (str.in_re X (re.++ (str.to_re "//") ((_ re.loop 5 5) (re.range "0" "9")) (str.to_re ".jar/U\u{a}"))))) +(assert (str.in_re X (re.++ (re.union (str.to_re "big5") (re.++ (str.to_re "euc") (re.union (str.to_re "kr") (str.to_re "jpms"))) (str.to_re "binary") (str.to_re "greek") (str.to_re "tis620") (str.to_re "hebrew") (str.to_re "ascii") (str.to_re "swe7") (re.++ (str.to_re "koi8") (re.union (str.to_re "r") (str.to_re "u"))) (re.++ (re.union (str.to_re "u") (str.to_re "keyb")) (str.to_re "cs2")) (re.++ (re.union (str.to_re "dec") (str.to_re "hp") (str.to_re "utf") (str.to_re "geostd") (str.to_re "armscii")) (str.to_re "8")) (re.++ (str.to_re "gb") (re.union (str.to_re "k") (str.to_re "2312"))) (re.++ (str.to_re "cp") (re.union (re.++ (str.to_re "8") (re.union (re.++ (str.to_re "5") (re.union (str.to_re "0") (str.to_re "2"))) (str.to_re "66"))) (str.to_re "932") (re.++ (str.to_re "125") (re.union (str.to_re "0") (str.to_re "1") (str.to_re "6") (str.to_re "7"))))) (re.++ (str.to_re "latin") (re.union (str.to_re "1") (str.to_re "2") (str.to_re "5") (str.to_re "7"))) (re.++ (re.union (str.to_re "u") (str.to_re "s")) (str.to_re "jis")) (re.++ (str.to_re "mac") (re.union (str.to_re "ce") (str.to_re "roman")))) (str.to_re "\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance12204.smt2 b/benchmarks/instance12204.smt2 new file mode 100644 index 000000000..4a192538c --- /dev/null +++ b/benchmarks/instance12204.smt2 @@ -0,0 +1,22 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (not (str.in_re X (re.++ (re.* (re.range "0" "9")) (re.+ (re.range "1" "9")) (re.* (re.range "0" "9")) (str.to_re "\u{a}"))))) +(assert (not (str.in_re X (re.++ (str.to_re "/.svg") (re.opt (str.to_re "z")) (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}"))))) +(assert (str.in_re X (re.++ (str.to_re "<") (re.opt (str.to_re "/")) (re.union (str.to_re "a") (str.to_re "abbr") (str.to_re "acronym") (str.to_re "address") (str.to_re "applet") (str.to_re "area") (str.to_re "b") (str.to_re "base") (str.to_re "basefont") (str.to_re "bdo") (str.to_re "big") (str.to_re "blockquote") (str.to_re "body") (str.to_re "br") (str.to_re "button") (str.to_re "caption") (str.to_re "center") (str.to_re "cite") (str.to_re "code") (str.to_re "col") (str.to_re "colgroup") (str.to_re "dd") (str.to_re "del") (str.to_re "dir") (str.to_re "div") (str.to_re "dfn") (str.to_re "dl") (str.to_re "dt") (str.to_re "em") (str.to_re "fieldset") (str.to_re "font") (str.to_re "form") (str.to_re "frame") (str.to_re "frameset") (re.++ (str.to_re "h") (re.range "1" "6")) (str.to_re "head") (str.to_re "hr") (str.to_re "html") (str.to_re "i") (str.to_re "iframe") (str.to_re "img") (str.to_re "input") (str.to_re "ins") (str.to_re "isindex") (str.to_re "kbd") (str.to_re "label") (str.to_re "legend") (str.to_re "li") (str.to_re "link") (str.to_re "map") (str.to_re "menu") (str.to_re "meta") (str.to_re "noframes") (str.to_re "noscript") (str.to_re "object") (str.to_re "ol") (str.to_re "optgroup") (str.to_re "option") (str.to_re "p") (str.to_re "param") (str.to_re "pre") (str.to_re "q") (str.to_re "s") (str.to_re "samp") (str.to_re "script") (str.to_re "select") (str.to_re "small") (str.to_re "span") (str.to_re "strike") (str.to_re "strong") (str.to_re "style") (str.to_re "sub") (str.to_re "sup") (str.to_re "table") (str.to_re "tbody") (str.to_re "td") (str.to_re "textarea") (str.to_re "tfoot") (str.to_re "th") (str.to_re "thead") (str.to_re "title") (str.to_re "tr") (str.to_re "tt") (str.to_re "u") (str.to_re "ul") (str.to_re "var") (str.to_re "xmp")) (re.* (re.union (re.* (re.union (re.++ (str.to_re "\u{22}") (re.* (re.comp (str.to_re "\u{22}"))) (str.to_re "\u{22}")) (re.++ (str.to_re "'") (re.* (re.comp (str.to_re "'"))) (str.to_re "'")))) (str.to_re "\u{22}") (str.to_re "'") (str.to_re ">"))) (str.to_re ">\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "/SOAPAction:") (re.* (re.union (str.to_re "\u{d}") (str.to_re "\u{a}"))) (str.to_re "Get") (re.union (str.to_re "ServerTime") (str.to_re "FileList") (str.to_re "File")) (str.to_re "\u{22}/i\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance12488.smt2 b/benchmarks/instance12488.smt2 new file mode 100644 index 000000000..b62061cb5 --- /dev/null +++ b/benchmarks/instance12488.smt2 @@ -0,0 +1,22 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (not (str.in_re X (re.++ (re.range "1" "9") (re.range "0" "9") (re.range "0" "9") (re.range "0" "9") (re.range "0" "9") (re.range "0" "9") (str.to_re "\u{a}"))))) +(assert (str.in_re X (re.++ (str.to_re "/.s3m") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}")))) +(assert (str.in_re X (re.++ ((_ re.loop 7 8) (re.range "0" "9")) (str.to_re "\u{a}0") (re.union (str.to_re "2") (str.to_re "|") (str.to_re "3") (str.to_re "4") (str.to_re "5") (str.to_re "6") (str.to_re "8") (str.to_re "9") (str.to_re "7"))))) +(assert (str.in_re X (re.++ (re.+ (re.union (str.to_re "_") (re.range "a" "z") (re.range "0" "9") (str.to_re "-"))) (re.* (re.++ (str.to_re ".") (re.+ (re.union (str.to_re "_") (re.range "a" "z") (re.range "0" "9") (str.to_re "-"))))) (str.to_re "@") (re.+ (re.union (re.range "a" "z") (re.range "0" "9"))) (re.opt (re.++ (re.opt (str.to_re "-")) (re.+ (re.union (re.range "a" "z") (re.range "0" "9"))))) (re.* (re.++ (str.to_re ".") (re.+ (re.union (re.range "a" "z") (re.range "0" "9"))) (re.opt (re.++ (re.opt (str.to_re "-")) (re.+ (re.union (re.range "a" "z") (re.range "0" "9"))))))) (str.to_re ".") (re.union ((_ re.loop 2 2) (re.range "a" "z")) (re.++ (str.to_re "xn") ((_ re.loop 2 2) (str.to_re "-")) ((_ re.loop 4 18) (re.union (re.range "a" "z") (re.range "0" "9")))) (str.to_re "arpa") (str.to_re "aero") (str.to_re "asia") (str.to_re "biz") (str.to_re "cat") (str.to_re "com") (str.to_re "coop") (str.to_re "edu") (str.to_re "gov") (str.to_re "info") (str.to_re "int") (str.to_re "jobs") (str.to_re "mil") (str.to_re "mobi") (str.to_re "museum") (str.to_re "name") (str.to_re "net") (str.to_re "org") (str.to_re "pro") (str.to_re "tel") (str.to_re "travel") (str.to_re "xxx")) (str.to_re "\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance12671.smt2 b/benchmarks/instance12671.smt2 new file mode 100644 index 000000000..164fbb7ed --- /dev/null +++ b/benchmarks/instance12671.smt2 @@ -0,0 +1,21 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.union (re.++ (re.union (re.++ (re.union (re.range "1" "9") (re.++ (str.to_re "0") (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (str.to_re "30")) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "P") (str.to_re ",") (str.to_re "p")) (re.union (str.to_re "R") (str.to_re ",") (str.to_re "r"))) (re.++ (re.union (str.to_re "J") (str.to_re ",") (str.to_re "j")) (re.union (str.to_re "U") (str.to_re ",") (str.to_re "u")) (re.union (str.to_re "N") (str.to_re ",") (str.to_re "n"))) (re.++ (re.union (str.to_re "S") (str.to_re ",") (str.to_re "s")) (re.union (str.to_re "E") (str.to_re ",") (str.to_re "e")) (re.union (str.to_re "P") (str.to_re ",") (str.to_re "p"))) (re.++ (re.union (str.to_re "N") (str.to_re ",") (str.to_re "n")) (re.union (str.to_re "O") (str.to_re ",") (str.to_re "o")) (re.union (str.to_re "V") (str.to_re ",") (str.to_re "v"))))) (re.++ (re.union (re.range "1" "9") (re.++ (str.to_re "0") (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (re.++ (str.to_re "3") (re.range "0" "1"))) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "J") (str.to_re ",") (str.to_re "j")) (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "N") (str.to_re ",") (str.to_re "n"))) (re.++ (re.union (str.to_re "M") (str.to_re ",") (str.to_re "m")) (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "R") (str.to_re ",") (str.to_re "r"))) (re.++ (re.union (str.to_re "M") (str.to_re ",") (str.to_re "m")) (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "Y") (str.to_re ",") (str.to_re "y"))) (re.++ (re.union (str.to_re "J") (str.to_re ",") (str.to_re "j")) (re.union (str.to_re "U") (str.to_re ",") (str.to_re "u")) (re.union (str.to_re "L") (str.to_re ",") (str.to_re "l"))) (re.++ (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "U") (str.to_re ",") (str.to_re "u")) (re.union (str.to_re "G") (str.to_re ",") (str.to_re "g"))) (re.++ (re.union (str.to_re "O") (str.to_re ",") (str.to_re "o")) (re.union (str.to_re "C") (str.to_re ",") (str.to_re "c")) (re.union (str.to_re "T") (str.to_re ",") (str.to_re "t"))) (re.++ (re.union (str.to_re "D") (str.to_re ",") (str.to_re "d")) (re.union (str.to_re "E") (str.to_re ",") (str.to_re "e")) (re.union (str.to_re "C") (str.to_re ",") (str.to_re "c")))))) (str.to_re "-") ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (re.range "1" "9") (re.++ (str.to_re "0") (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "8"))) (str.to_re "--") ((_ re.loop 2 2) (re.range "0" "9")) (re.union (re.++ (re.union (str.to_re "0") (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "1") (str.to_re "2") (str.to_re "3") (str.to_re "5") (str.to_re "6") (str.to_re "7") (str.to_re "9"))) (re.++ (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "0") (str.to_re "1") (str.to_re "3") (str.to_re "4") (str.to_re "5") (str.to_re "7") (str.to_re "8") (str.to_re "9")))) (re.union (str.to_re "F") (str.to_re ",") (str.to_re "f")) (re.union (str.to_re "E") (str.to_re ",") (str.to_re "e")) (re.union (str.to_re "B") (str.to_re ",") (str.to_re "b"))) (re.++ (str.to_re "\u{a}") (re.union (re.range "1" "9") (re.++ (str.to_re "0") (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "9"))) (str.to_re "--") ((_ re.loop 2 2) (re.range "0" "9")) (re.union (re.++ (re.union (str.to_re "0") (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "0") (str.to_re "4") (str.to_re "8"))) (re.++ (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "2") (str.to_re "6")))) (re.union (str.to_re "F") (str.to_re ",") (str.to_re "f")) (re.union (str.to_re "E") (str.to_re ",") (str.to_re "e")) (re.union (str.to_re "B") (str.to_re ",") (str.to_re "b")))))) +(assert (str.in_re X (re.union (re.++ (re.union (re.range "0" "9") (re.++ (re.range "0" "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "3"))) (str.to_re ":") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (re.union (str.to_re "AM") (str.to_re "PM") (str.to_re "am") (str.to_re "pm") (str.to_re "aM") (str.to_re "Am") (str.to_re "pM") (re.++ (str.to_re "P") ((_ re.loop 2 2) (str.to_re "m")))) (re.range "0" "5") (re.range "0" "9")) (re.++ (str.to_re "\u{a}") (re.union (re.range "0" "9") (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "3"))) (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (re.union (str.to_re "AM") (str.to_re "PM") (str.to_re "am") (str.to_re "pm") (str.to_re "aM") (str.to_re "Am") (str.to_re "pM") (re.++ (str.to_re "P") ((_ re.loop 2 2) (str.to_re "m")))))))) +(assert (str.in_re X (re.++ (str.to_re "Points") (re.+ (re.range "0" "9")) (str.to_re "Host:") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "toBasicwww.webcruiser.cc\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance12768.smt2 b/benchmarks/instance12768.smt2 new file mode 100644 index 000000000..361639b63 --- /dev/null +++ b/benchmarks/instance12768.smt2 @@ -0,0 +1,22 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.++ (str.to_re "/User-Agent:") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "Ryeol") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "HTTP") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "Client") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "Class/smiH\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "Spyware") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "ToolBar") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "User-Agent:MM_RECO.EXEToClientonAlert\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "ver") (re.+ (re.range "0" "9")) (str.to_re "sports") (re.+ (re.union (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (str.to_re "whenu.com\u{13}wp-includes/feed.php?\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "qr/") (re.union (str.to_re "Alabama") (str.to_re "Alaska") (str.to_re "Arizona") (str.to_re "Arkansas") (str.to_re "California") (str.to_re "Colorado") (str.to_re "Connecticut") (str.to_re "Delaware") (str.to_re "Florida") (str.to_re "Georgia") (str.to_re "Hawaii") (str.to_re "Idaho") (str.to_re "Illinois") (str.to_re "Indiana") (str.to_re "Iowa") (str.to_re "Kansas") (str.to_re "Kentucky") (str.to_re "Louisiana") (str.to_re "Maine") (str.to_re "Maryland") (str.to_re "Massachusetts") (str.to_re "Michigan") (str.to_re "Minnesota") (str.to_re "Mississippi") (str.to_re "Missouri") (str.to_re "Montana") (str.to_re "Nebraska") (str.to_re "Nevada") (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Hampshire")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Jersey")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Mexico")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "York")) (re.++ (str.to_re "North") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Carolina")) (re.++ (str.to_re "North") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Dakota")) (str.to_re "Ohio") (str.to_re "Oklahoma") (str.to_re "Oregon") (str.to_re "Pennsylvania") (re.++ (str.to_re "Rhode") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Island")) (re.++ (str.to_re "South") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Carolina")) (re.++ (str.to_re "South") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Dakota")) (str.to_re "Tennessee") (str.to_re "Texas") (str.to_re "Utah") (str.to_re "Vermont") (str.to_re "Virginia") (str.to_re "Washington") (re.++ (str.to_re "West") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Virginia")) (str.to_re "Wisconsin") (str.to_re "Wyoming")) (str.to_re "/\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance12833.smt2 b/benchmarks/instance12833.smt2 new file mode 100644 index 000000000..1cbbd4099 --- /dev/null +++ b/benchmarks/instance12833.smt2 @@ -0,0 +1,22 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.++ (str.to_re "|Connected") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "adblock.linkz.com\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "//u\u{a}") (re.range "\u{0}" "\u{ff}") (re.* (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))))) +(assert (str.in_re X (re.++ (re.* (re.++ (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.union (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7"))) (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8"))) (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8"))) (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7"))))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))))) (str.to_re "\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "/.pui") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance13062.smt2 b/benchmarks/instance13062.smt2 new file mode 100644 index 000000000..06bb16d4e --- /dev/null +++ b/benchmarks/instance13062.smt2 @@ -0,0 +1,23 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (not (str.in_re X (re.++ (str.to_re "/filename") (re.* (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "=") (re.* (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (re.* (re.union (str.to_re "\u{d}") (str.to_re "\u{a}"))) (str.to_re ".swf") (re.union (str.to_re "\u{22}") (str.to_re "'") (str.to_re ";") (str.to_re "\u{d}") (str.to_re "\u{a}") (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "/i\u{a}"))))) +(assert (not (str.in_re X (re.++ (re.union (re.++ (str.to_re "A") (re.union (str.to_re "L") (str.to_re "K") (str.to_re "S") (str.to_re "Z") (str.to_re "R") (str.to_re "A") (str.to_re "E") (str.to_re "P"))) (re.++ (str.to_re "C") (re.union (str.to_re "A") (str.to_re "O") (str.to_re "T"))) (re.++ (str.to_re "D") (re.union (str.to_re "E") (str.to_re "C"))) (re.++ (str.to_re "F") (re.union (str.to_re "L") (str.to_re "M"))) (re.++ (str.to_re "G") (re.union (str.to_re "A") (str.to_re "U"))) (str.to_re "HI") (re.++ (str.to_re "I") (re.union (str.to_re "A") (str.to_re "D") (str.to_re "L") (str.to_re "N"))) (re.++ (str.to_re "K") (re.union (str.to_re "S") (str.to_re "Y"))) (str.to_re "LA") (re.++ (str.to_re "M") (re.union (str.to_re "A") (str.to_re "D") (str.to_re "E") (str.to_re "H") (str.to_re "I") (str.to_re "N") (str.to_re "O") (str.to_re "P") (str.to_re "S") (str.to_re "T"))) (re.++ (str.to_re "N") (re.union (str.to_re "C") (str.to_re "D") (str.to_re "E") (str.to_re "H") (str.to_re "J") (str.to_re "M") (str.to_re "V") (str.to_re "Y"))) (re.++ (str.to_re "O") (re.union (str.to_re "H") (str.to_re "K") (str.to_re "R"))) (re.++ (str.to_re "P") (re.union (str.to_re "A") (str.to_re "R") (str.to_re "W"))) (str.to_re "RI") (re.++ (str.to_re "S") (re.union (str.to_re "C") (str.to_re "D"))) (re.++ (str.to_re "T") (re.union (str.to_re "N") (str.to_re "X"))) (str.to_re "UT") (re.++ (str.to_re "V") (re.union (str.to_re "A") (str.to_re "I") (str.to_re "T"))) (re.++ (str.to_re "W") (re.union (str.to_re "A") (str.to_re "I") (str.to_re "V") (str.to_re "Y")))) (str.to_re "\u{a}"))))) +(assert (str.in_re X (re.++ (str.to_re "/.wmv") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}")))) +(assert (not (str.in_re X (re.++ (str.to_re "User-Agent:") (re.* re.allchar) (str.to_re "Host:") (re.* re.allchar) (str.to_re "w3whowww.sogou.com\u{a}"))))) +(assert (str.in_re X (re.++ (str.to_re "Guarded") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "ready") (re.+ (re.union (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (str.to_re "PARSERHost:A-311ServerUser-Agent:scn.mystoretoolbar.com\u{13}\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance13106.smt2 b/benchmarks/instance13106.smt2 new file mode 100644 index 000000000..f0a3462e3 --- /dev/null +++ b/benchmarks/instance13106.smt2 @@ -0,0 +1,23 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.++ (str.to_re "security") (re.+ (re.range "0" "9")) (str.to_re "Redirector\u{22}ServerHost:X-Mailer:\u{13}\u{a}")))) +(assert (str.in_re X (re.++ (re.opt (re.++ (str.to_re "1") (re.union (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "-") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re ".") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "/") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.opt (re.++ (re.opt (str.to_re "(")) ((_ re.loop 1 1) (re.range "2" "9")) ((_ re.loop 2 2) (re.range "0" "9")) (re.opt (str.to_re ")")) (re.opt (re.union (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "-") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re ".") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "/") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))))) ((_ re.loop 3 3) (re.range "0" "9")) (re.opt (re.union (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "-") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re ".") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "/") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) ((_ re.loop 4 4) (re.range "0" "9")) (str.to_re "\u{a}")))) +(assert (str.in_re X (re.++ (str.to_re "/filename=") (re.* (re.comp (str.to_re "\u{a}"))) (str.to_re ".svg/i\u{a}")))) +(assert (not (str.in_re X (str.to_re "MyHost:toHost:WinSessionwww.urlblaze.netResultHost:\u{a}")))) +(assert (not (str.in_re X (re.++ (str.to_re "/filename=") (re.opt (re.union (str.to_re "\u{22}") (str.to_re "'"))) (re.* (re.comp (str.to_re "\u{a}"))) (str.to_re ".pif") (re.union (str.to_re "\u{22}") (str.to_re "'") (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "/si\u{a}"))))) +(check-sat) + +(exit) diff --git a/benchmarks/instance13975.smt2 b/benchmarks/instance13975.smt2 new file mode 100644 index 000000000..af0baede7 --- /dev/null +++ b/benchmarks/instance13975.smt2 @@ -0,0 +1,22 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.++ (re.opt (str.to_re "D")) (re.opt (re.union (str.to_re "-") (str.to_re "D"))) ((_ re.loop 5 5) (re.range "0" "9")) (str.to_re "\u{a}")))) +(assert (str.in_re X (re.union (re.++ (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "general") (str.to_re "unicode") (str.to_re "roman") (str.to_re "slovak") (str.to_re "czech") (str.to_re "icelandic") (re.++ (re.union (str.to_re "latv") (str.to_re "pers")) (str.to_re "ian")) (re.++ (re.union (str.to_re "dan") (str.to_re "pol") (str.to_re "span") (str.to_re "swed") (str.to_re "turk")) (str.to_re "ish")) (str.to_re "spanish2") (re.++ (re.union (str.to_re "esto") (str.to_re "lithua") (str.to_re "roma") (str.to_re "slove")) (str.to_re "nian_ci"))) (str.to_re "u") (re.union (str.to_re "cs2") (str.to_re "tf8"))) (re.++ (re.union (re.++ (str.to_re "mac") (re.union (str.to_re "ce") (str.to_re "roman"))) (re.++ (str.to_re "cp") (re.union (re.++ (str.to_re "8") (re.union (re.++ (str.to_re "5") (re.union (str.to_re "0") (str.to_re "2"))) (str.to_re "66"))) (str.to_re "1256"))) (str.to_re "armscii8") (str.to_re "geostd8") (str.to_re "ascii") (str.to_re "keybcs2") (str.to_re "greek") (str.to_re "hebrew") (re.++ (str.to_re "koi8") (re.union (str.to_re "r") (str.to_re "u")))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "general_ci"))) (re.++ (re.union (str.to_re "dec8") (str.to_re "swe7")) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "swedish_ci"))) (re.++ (re.union (str.to_re "hp8") (str.to_re "latin5")) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "english_ci"))) (re.++ (re.union (str.to_re "big5") (re.++ (str.to_re "gb") (re.union (str.to_re "2312") (str.to_re "k")))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "chinese_ci"))) (re.++ (re.union (str.to_re "cp932") (str.to_re "eucjpms") (re.++ (re.union (str.to_re "s") (str.to_re "u")) (str.to_re "jis"))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "japanese_ci"))) (re.++ (str.to_re "euckr_") (re.union (str.to_re "bin") (str.to_re "korean_ci"))) (re.++ (str.to_re "tis620_") (re.union (str.to_re "bin") (str.to_re "thai_ci"))) (re.++ (str.to_re "latin1_") (re.union (str.to_re "bin") (re.++ (re.union (re.++ (re.union (str.to_re "dan") (str.to_re "span") (str.to_re "swed")) (str.to_re "ish")) (re.++ (str.to_re "german") (re.union (str.to_re "1") (str.to_re "2")))) (str.to_re "_ci")) (re.++ (str.to_re "general_c") (re.union (str.to_re "i") (str.to_re "s"))))) (re.++ (str.to_re "cp1250_") (re.union (str.to_re "bin") (str.to_re "czech_cs") (str.to_re "general_ci"))) (re.++ (str.to_re "latin2_") (re.union (str.to_re "bin") (str.to_re "czech_cs") (re.++ (re.union (str.to_re "general") (str.to_re "hungarian") (str.to_re "croatian")) (str.to_re "_ci")))) (re.++ (str.to_re "cp1257_") (re.union (str.to_re "bin") (re.++ (re.union (str.to_re "general") (str.to_re "lithuanian")) (str.to_re "_ci")))) (re.++ (str.to_re "latin7_") (re.union (str.to_re "bin") (re.++ (str.to_re "general_c") (re.union (str.to_re "i") (str.to_re "s"))) (str.to_re "estonian_cs"))) (re.++ (str.to_re "\u{a}cp1251_") (re.union (str.to_re "bin") (re.++ (re.union (str.to_re "general") (str.to_re "bulgarian") (str.to_re "ukrainian")) (str.to_re "_ci")) (str.to_re "general_cs")))))) +(assert (not (str.in_re X (re.++ (str.to_re "/.cgm") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}"))))) +(assert (str.in_re X (re.++ (re.union (str.to_re "\u{5c}") (str.to_re "s") (str.to_re "+") (str.to_re ",")) (str.to_re "\u{a}")))) +(check-sat) + +(exit) diff --git a/benchmarks/instance14260.smt2 b/benchmarks/instance14260.smt2 new file mode 100644 index 000000000..bf36054fc --- /dev/null +++ b/benchmarks/instance14260.smt2 @@ -0,0 +1,23 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (not (str.in_re X (re.++ (str.to_re "")) (re.* re.allchar) (str.to_re "\u{a}"))))) +(assert (str.in_re X (re.++ (re.union (re.++ (re.range "0" "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "3"))) (str.to_re ":\u{a}") (re.range "0" "5") (re.range "0" "9")))) +(assert (str.in_re X (re.++ (str.to_re "Password=\u{22}") (re.union (re.++ (str.to_re "{") (re.+ re.allchar) (str.to_re "}") (re.+ (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z"))) (re.* (str.to_re "="))) (re.+ (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z")))) (str.to_re "\u{22}\u{a}")))) +(assert (str.in_re X (re.++ (re.union (re.++ (re.union (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (re.++ (str.to_re "3") (re.range "0" "1"))) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "n") (str.to_re "N"))) (re.++ (re.union (str.to_re "m") (str.to_re "M")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "r") (str.to_re "R"))) (re.++ (re.union (str.to_re "m") (str.to_re "M")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "y") (str.to_re "Y"))) (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "l") (str.to_re "L"))) (re.++ (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "g") (str.to_re "G"))) (re.++ (re.union (str.to_re "o") (str.to_re "O")) (re.union (str.to_re "c") (str.to_re "C")) (re.union (str.to_re "t") (str.to_re "T"))) (re.++ (re.union (str.to_re "d") (str.to_re "D")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "c") (str.to_re "C"))))) (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (str.to_re "30")) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "p") (str.to_re "P")) (re.union (str.to_re "r") (str.to_re "R"))) (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "n") (str.to_re "N"))) (re.++ (re.union (str.to_re "s") (str.to_re "S")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "p") (str.to_re "P"))) (re.++ (re.union (str.to_re "n") (str.to_re "N")) (re.union (str.to_re "o") (str.to_re "O")) (re.union (str.to_re "v") (str.to_re "V"))))) (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "8"))) (str.to_re "-") (re.union (str.to_re "f") (str.to_re "F")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "b") (str.to_re "B")))) (str.to_re "-20") (re.union (re.++ (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "0") (str.to_re "1") (str.to_re "3") (str.to_re "4") (str.to_re "5") (str.to_re "7") (str.to_re "8") (str.to_re "9"))) (re.++ (re.union (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "1") (str.to_re "2") (str.to_re "3") (str.to_re "5") (str.to_re "6") (str.to_re "7") (str.to_re "9"))))) (re.++ (re.union (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (re.++ (str.to_re "3") (re.range "0" "1"))) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "n") (str.to_re "N"))) (re.++ (re.union (str.to_re "m") (str.to_re "M")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "r") (str.to_re "R"))) (re.++ (re.union (str.to_re "m") (str.to_re "M")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "y") (str.to_re "Y"))) (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "l") (str.to_re "L"))) (re.++ (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "g") (str.to_re "G"))) (re.++ (re.union (str.to_re "o") (str.to_re "O")) (re.union (str.to_re "c") (str.to_re "C")) (re.union (str.to_re "t") (str.to_re "T"))) (re.++ (re.union (str.to_re "d") (str.to_re "D")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "c") (str.to_re "C"))))) (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (str.to_re "30")) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "p") (str.to_re "P")) (re.union (str.to_re "r") (str.to_re "R"))) (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "n") (str.to_re "N"))) (re.++ (re.union (str.to_re "s") (str.to_re "S")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "p") (str.to_re "P"))) (re.++ (re.union (str.to_re "n") (str.to_re "N")) (re.union (str.to_re "o") (str.to_re "O")) (re.union (str.to_re "v") (str.to_re "V"))))) (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "9"))) (str.to_re "-") (re.union (str.to_re "f") (str.to_re "F")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "b") (str.to_re "B")))) (str.to_re "-20") (re.union (re.++ (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "2") (str.to_re "6"))) (re.++ (re.union (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "0") (str.to_re "4") (str.to_re "8")))))) (str.to_re "\u{a}")))) +(assert (not (str.in_re X (re.union (str.to_re "100") ((_ re.loop 1 2) (re.range "0" "9")) (re.++ ((_ re.loop 1 2) (re.range "0" "9")) (str.to_re ",") ((_ re.loop 1 3) (re.range "0" "9")) (str.to_re "\u{a}")))))) +(check-sat) + +(exit) diff --git a/benchmarks/instance14326.smt2 b/benchmarks/instance14326.smt2 new file mode 100644 index 000000000..35b8ab5c2 --- /dev/null +++ b/benchmarks/instance14326.smt2 @@ -0,0 +1,21 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status sat) + +(declare-const X String) +(assert (not (str.in_re X (re.union (re.++ (re.union (str.to_re "Jan") (str.to_re "Feb") (str.to_re "Mar") (str.to_re "Apr") (str.to_re "May") (str.to_re "Jun") (str.to_re "Jul") (str.to_re "Aug") (str.to_re "Sep") (str.to_re "Oct") (str.to_re "Nov") (str.to_re "Dec")) (re.* (str.to_re ",")) (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (re.* (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (str.to_re "jan") (str.to_re "feb") (str.to_re "mar") (str.to_re "apr") (str.to_re "may") (str.to_re "jun") (str.to_re "jul") (str.to_re "aug") (str.to_re "sep") (str.to_re "oct") (str.to_re "nov") (str.to_re "dec")) (re.* (str.to_re ",")) (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (str.to_re "January") (str.to_re "February") (str.to_re "March") (str.to_re "April") (str.to_re "May") (str.to_re "June") (str.to_re "July") (str.to_re "August") (str.to_re "September") (str.to_re "October") (str.to_re "November") (str.to_re "December")) (re.* (str.to_re ",")) (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (str.to_re "january") (str.to_re "february") (str.to_re "march") (str.to_re "april") (str.to_re "may") (str.to_re "june") (str.to_re "july") (str.to_re "august") (str.to_re "september") (str.to_re "october") (str.to_re "november") (str.to_re "december")) (re.* (str.to_re ",")) (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) ((_ re.loop 4 4) (re.range "0" "9")) (str.to_re "\u{a}")))))) +(assert (not (str.in_re X (re.++ (str.to_re "//jdb/inf.php?id=") ((_ re.loop 32 32) (re.union (re.range "a" "f") (re.range "0" "9"))) (str.to_re "/Ui\u{a}"))))) +(assert (not (str.in_re X (re.++ (re.* (re.opt (re.range "0" "9"))) (re.opt (re.++ (str.to_re ".") (re.union ((_ re.loop 1 1) (re.range "0" "9")) ((_ re.loop 2 2) (re.range "0" "9"))))) (str.to_re "\u{a}"))))) +(check-sat) + +(exit) diff --git a/benchmarks/instance14382.smt2 b/benchmarks/instance14382.smt2 new file mode 100644 index 000000000..72c67b77c --- /dev/null +++ b/benchmarks/instance14382.smt2 @@ -0,0 +1,21 @@ +(set-info :smt-lib-version 2.6) +(set-logic QF_S) +(set-info :source | +Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu +Generated on: 2020-10-01 +Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark +Application: Evaluate solvers on real-world regex queries +Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau +Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. +|) +(set-info :license "https://creativecommons.org/licenses/by/4.0/") +(set-info :category "industrial") +(set-info :status unsat) + +(declare-const X String) +(assert (str.in_re X (re.++ (str.to_re "~/") (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z") (str.to_re "_")) (re.* (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z") (str.to_re "/") (str.to_re "_") (str.to_re "-"))) (str.to_re ".") (re.+ (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z") (str.to_re "_") (str.to_re "-"))) (str.to_re "\u{a}")))) +(assert (str.in_re X (re.++ (re.opt (str.to_re "-")) (re.opt (str.to_re ",")) (re.union (re.++ ((_ re.loop 1 3) (re.range "0" "9")) (re.* (re.++ (str.to_re ".") ((_ re.loop 3 3) (re.range "0" "9"))))) (re.+ (re.range "0" "9"))) (re.opt (re.++ (str.to_re ",") ((_ re.loop 2 2) (re.range "0" "9")))) (str.to_re "\u{a}")))) +(assert (not (str.in_re X (re.++ (re.union (str.to_re "big5") (re.++ (str.to_re "euc") (re.union (str.to_re "kr") (str.to_re "jpms"))) (str.to_re "binary") (str.to_re "greek") (str.to_re "tis620") (str.to_re "hebrew") (str.to_re "ascii") (str.to_re "swe7") (re.++ (str.to_re "koi8") (re.union (str.to_re "r") (str.to_re "u"))) (re.++ (re.union (str.to_re "u") (str.to_re "keyb")) (str.to_re "cs2")) (re.++ (re.union (str.to_re "dec") (str.to_re "hp") (str.to_re "utf") (str.to_re "geostd") (str.to_re "armscii")) (str.to_re "8")) (re.++ (str.to_re "gb") (re.union (str.to_re "k") (str.to_re "2312"))) (re.++ (str.to_re "cp") (re.union (re.++ (str.to_re "8") (re.union (re.++ (str.to_re "5") (re.union (str.to_re "0") (str.to_re "2"))) (str.to_re "66"))) (str.to_re "932") (re.++ (str.to_re "125") (re.union (str.to_re "0") (str.to_re "1") (str.to_re "6") (str.to_re "7"))))) (re.++ (str.to_re "latin") (re.union (str.to_re "1") (str.to_re "2") (str.to_re "5") (str.to_re "7"))) (re.++ (re.union (str.to_re "u") (str.to_re "s")) (str.to_re "jis")) (re.++ (str.to_re "mac") (re.union (str.to_re "ce") (str.to_re "roman")))) (str.to_re "\u{a}"))))) +(check-sat) + +(exit) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 047c5f5f1..9f60d5be7 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -43,6 +43,10 @@ namespace seq { void derive::reset() { m_cache.reset(); m_top_cache.reset(); + m_union_cache.reset(); + m_inter_cache.reset(); + m_concat_cache.reset(); + m_complement_cache.reset(); m_trail.reset(); } @@ -573,6 +577,22 @@ namespace seq { } expr_ref derive::mk_union(expr* a, expr* b) { + // Check op cache + expr* cached = nullptr; + if (m_union_cache.find(a, b, cached)) + return expr_ref(cached, m); + + expr_ref result = mk_union_core(a, b); + + // Store in cache + m_union_cache.insert(a, b, result); + m_trail.push_back(a); + m_trail.push_back(b); + m_trail.push_back(result); + return result; + } + + expr_ref derive::mk_union_core(expr* a, expr* b) { // Identity / annihilator if (a == b) return expr_ref(a, m); if (re().is_empty(a)) return expr_ref(b, m); @@ -606,16 +626,29 @@ namespace seq { return mk_ite(c1, then_br, else_br); } - // ITE hoisting: ite(c, t, e) ∪ r = ite(c, t ∪ r, e ∪ r) + // Conservative ITE hoisting via subsumption: + // Only hoist when at least one branch simplifies by is_subset. if (m.is_ite(a, c1, t1, e1)) { - expr_ref then_br = mk_union(t1, b); - expr_ref else_br = mk_union(e1, b); - return mk_ite(c1, then_br, else_br); + bool t1_sub_b = is_subset(t1, b); // t1 ∪ b = b + bool b_sub_t1 = is_subset(b, t1); // t1 ∪ b = t1 + bool e1_sub_b = is_subset(e1, b); // e1 ∪ b = b + bool b_sub_e1 = is_subset(b, e1); // e1 ∪ b = e1 + if (t1_sub_b || b_sub_t1 || e1_sub_b || b_sub_e1) { + expr_ref then_br = t1_sub_b ? expr_ref(b, m) : b_sub_t1 ? expr_ref(t1, m) : mk_union(t1, b); + expr_ref else_br = e1_sub_b ? expr_ref(b, m) : b_sub_e1 ? expr_ref(e1, m) : mk_union(e1, b); + return mk_ite(c1, then_br, else_br); + } } if (m.is_ite(b, c2, t2, e2)) { - expr_ref then_br = mk_union(a, t2); - expr_ref else_br = mk_union(a, e2); - return mk_ite(c2, then_br, else_br); + bool t2_sub_a = is_subset(t2, a); // a ∪ t2 = a + bool a_sub_t2 = is_subset(a, t2); // a ∪ t2 = t2 + bool e2_sub_a = is_subset(e2, a); // a ∪ e2 = a + bool a_sub_e2 = is_subset(a, e2); // a ∪ e2 = e2 + if (t2_sub_a || a_sub_t2 || e2_sub_a || a_sub_e2) { + expr_ref then_br = t2_sub_a ? expr_ref(a, m) : a_sub_t2 ? expr_ref(t2, m) : mk_union(a, t2); + expr_ref else_br = e2_sub_a ? expr_ref(a, m) : a_sub_e2 ? expr_ref(e2, m) : mk_union(a, e2); + return mk_ite(c2, then_br, else_br); + } } // ACI: flatten, sort, deduplicate @@ -647,6 +680,22 @@ namespace seq { } expr_ref derive::mk_inter(expr* a, expr* b) { + // Check op cache + expr* cached = nullptr; + if (m_inter_cache.find(a, b, cached)) + return expr_ref(cached, m); + + expr_ref result = mk_inter_core(a, b); + + // Store in cache + m_inter_cache.insert(a, b, result); + m_trail.push_back(a); + m_trail.push_back(b); + m_trail.push_back(result); + return result; + } + + expr_ref derive::mk_inter_core(expr* a, expr* b) { // Identity / annihilator if (a == b) return expr_ref(a, m); if (re().is_empty(a)) return expr_ref(a, m); @@ -680,16 +729,24 @@ namespace seq { return mk_ite(c1, then_br, else_br); } - // ITE hoisting: ite(c, t, e) ∩ r = ite(c, t ∩ r, e ∩ r) - if (m.is_ite(a, c1, t1, e1)) { - expr_ref then_br = mk_inter(t1, b); - expr_ref else_br = mk_inter(e1, b); - return mk_ite(c1, then_br, else_br); - } - if (m.is_ite(b, c2, t2, e2)) { - expr_ref then_br = mk_inter(a, t2); - expr_ref else_br = mk_inter(a, e2); - return mk_ite(c2, then_br, else_br); + // ITE hoisting for intersection with depth bound. + // Unconditional hoisting needed for re.inter/re.diff/re.comp patterns, + // but bounded to prevent exponential blowup on union-heavy patterns. + if (m_inter_hoist_depth < m_max_inter_hoist_depth) { + if (m.is_ite(a, c1, t1, e1)) { + m_inter_hoist_depth++; + expr_ref then_br = mk_inter(t1, b); + expr_ref else_br = mk_inter(e1, b); + m_inter_hoist_depth--; + return mk_ite(c1, then_br, else_br); + } + if (m.is_ite(b, c2, t2, e2)) { + m_inter_hoist_depth++; + expr_ref then_br = mk_inter(a, t2); + expr_ref else_br = mk_inter(a, e2); + m_inter_hoist_depth--; + return mk_ite(c2, then_br, else_br); + } } // ACI: flatten, sort, deduplicate @@ -763,6 +820,21 @@ namespace seq { } expr_ref derive::mk_complement(expr* a) { + // Check op cache + expr* cached = nullptr; + if (m_complement_cache.find(a, cached)) + return expr_ref(cached, m); + + expr_ref result = mk_complement_core(a); + + // Store in cache + m_complement_cache.insert(a, result); + m_trail.push_back(a); + m_trail.push_back(result); + return result; + } + + expr_ref derive::mk_complement_core(expr* a) { // ~~r → r expr* r = nullptr; if (re().is_complement(a, r)) @@ -952,6 +1024,22 @@ namespace seq { // ------------------------------------------------------- expr_ref derive::mk_deriv_concat(expr* d, expr* tail) { + // Check op cache + expr* cached = nullptr; + if (m_concat_cache.find(d, tail, cached)) + return expr_ref(cached, m); + + expr_ref result = mk_deriv_concat_core(d, tail); + + // Store in cache + m_concat_cache.insert(d, tail, result); + m_trail.push_back(d); + m_trail.push_back(tail); + m_trail.push_back(result); + return result; + } + + expr_ref derive::mk_deriv_concat_core(expr* d, expr* tail) { expr_ref _d(d, m), _tail(tail, m); expr* c, * t, * e; @@ -1173,7 +1261,7 @@ namespace seq { return l_undef; } - std::pair derive::simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e) { + std::pair derive::simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e, unsigned depth) { auto sz = path.size(); auto saved_intervals = intervals; @@ -1181,7 +1269,7 @@ namespace seq { lbool path_val = push_path(path, c, false); if (path_val != l_undef) { path.shrink(sz); - expr_ref r = simplify_ite_rec(path, intervals, path_val == l_true ? t : e); + expr_ref r = simplify_ite_rec(path, intervals, path_val == l_true ? t : e, depth); return { r, r }; } @@ -1189,11 +1277,12 @@ namespace seq { if (intv_val != l_undef) { path.shrink(sz); intervals = saved_intervals; - expr_ref r = simplify_ite_rec(path, intervals, intv_val == l_true ? t : e); + expr_ref r = simplify_ite_rec(path, intervals, intv_val == l_true ? t : e, depth); return { r, r }; } - expr_ref st = simplify_ite_rec(path, intervals, t); + // Then-branch increases depth + expr_ref st = simplify_ite_rec(path, intervals, t, depth + 1); path.shrink(sz); intervals = saved_intervals; @@ -1201,7 +1290,7 @@ namespace seq { path_val = push_path(path, c, true); if (path_val != l_undef) { path.shrink(sz); - expr_ref r = simplify_ite_rec(path, intervals, path_val == l_true ? e : t); + expr_ref r = simplify_ite_rec(path, intervals, path_val == l_true ? e : t, depth); return { r, r }; } @@ -1209,11 +1298,12 @@ namespace seq { if (intv_val != l_undef) { path.shrink(sz); intervals = saved_intervals; - expr_ref r = simplify_ite_rec(path, intervals, intv_val == l_true ? e : t); + expr_ref r = simplify_ite_rec(path, intervals, intv_val == l_true ? e : t, depth); return { r, r }; } - expr_ref se = simplify_ite_rec(path, intervals, e); + // Else-branch does NOT increase depth (covers disjoint cases) + expr_ref se = simplify_ite_rec(path, intervals, e, depth); path.shrink(sz); intervals = saved_intervals; return { st, se }; @@ -1231,27 +1321,28 @@ namespace seq { path_t path; intervals_t intervals; intervals.push_back(std::make_pair(0u, u().max_char())); - auto [st, se] = simplify_ite_rec(path, intervals, c, t, e); + auto [st, se] = simplify_ite_rec(path, intervals, c, t, e, 0); return mk_ite(c, st, se); } - expr_ref derive::simplify_ite_rec(path_t& path, intervals_t& intervals, expr* d) { + expr_ref derive::simplify_ite_rec(path_t& path, intervals_t& intervals, expr* d, unsigned depth) { expr* c, * t, * e; if (!m.is_ite(d, c, t, e)) return expr_ref(d, m); + // Depth limit reached — return without further simplification + if (depth >= m_max_simp_depth) + return expr_ref(d, m); + // Try to evaluate c directly (handles trivially true/false conditions) lbool cond_val = eval_cond(c); - if (cond_val == l_true) return simplify_ite_rec(path, intervals, t); - if (cond_val == l_false) return simplify_ite_rec(path, intervals, e); + if (cond_val == l_true) return simplify_ite_rec(path, intervals, t, depth); + if (cond_val == l_false) return simplify_ite_rec(path, intervals, e, depth); // Cannot simplify c: recurse into branches with extended paths // push_path and push_intervals will check subsumption/contradiction - auto [st, se] = simplify_ite_rec(path, intervals, c, t, e); + auto [st, se] = simplify_ite_rec(path, intervals, c, t, e, depth); return mk_ite(c, st, se); } -} - - - +} \ No newline at end of file diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 0c7961ff1..122619809 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -59,10 +59,24 @@ namespace seq { obj_pair_map m_top_cache; // post-simplify cache expr_ref_vector m_trail; // pin cached results + // Op cache for ITE-hoisting operations (union, inter, concat, complement) + obj_pair_map m_union_cache; + obj_pair_map m_inter_cache; + obj_pair_map m_concat_cache; + obj_map m_complement_cache; + // Depth limiting unsigned m_depth { 0 }; static const unsigned m_max_depth = 512; + // Simplify ITE recursion depth limit + unsigned m_simp_depth { 0 }; + static const unsigned m_max_simp_depth = 8; + + // Intersection ITE hoisting depth limit + unsigned m_inter_hoist_depth { 0 }; + static const unsigned m_max_inter_hoist_depth = 4; + seq_util::rex& re() { return m_util.re; } seq_util& u() { return m_util; } @@ -83,9 +97,12 @@ namespace seq { // Smart constructors with simplification and ACI canonicalization expr_ref mk_union(expr* a, expr* b); + expr_ref mk_union_core(expr* a, expr* b); expr_ref mk_inter(expr* a, expr* b); + expr_ref mk_inter_core(expr* a, expr* b); expr_ref mk_concat(expr* a, expr* b); expr_ref mk_complement(expr* a); + expr_ref mk_complement_core(expr* a); expr_ref mk_ite(expr* c, expr* t, expr* e); // Flatten and sort for ACI normal form @@ -104,6 +121,7 @@ namespace seq { // Distribute concatenation through ITE/union in derivative expr_ref mk_deriv_concat(expr* d, expr* tail); + expr_ref mk_deriv_concat_core(expr* d, expr* tail); // Extract head character and tail from a sequence expression bool get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl); @@ -120,8 +138,8 @@ namespace seq { // Simplify ITE conditions w.r.t. m_ele and path knowledge expr_ref simplify_ite(expr* d); - expr_ref simplify_ite_rec(path_t& path, intervals_t& intervals, expr* d); - std::pair simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e); + expr_ref simplify_ite_rec(path_t& path, intervals_t& intervals, expr* d, unsigned depth); + std::pair simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e, unsigned depth); lbool push_path(path_t& path, expr* c, bool sign); lbool push_intervals(intervals_t& intervals, expr* c, bool sign); lbool eval_cond(expr* cond); From ee67a94a9ccb5135bf9d6b168f64f263ef21690d Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sat, 6 Jun 2026 15:26:12 -0700 Subject: [PATCH 18/32] tuning simplification processing Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_derive.cpp | 257 +++++++++++++++++++++++++++----- src/ast/rewriter/seq_derive.h | 7 +- src/smt/seq_regex.cpp | 77 ++++++++-- src/smt/seq_regex.h | 2 +- 4 files changed, 293 insertions(+), 50 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 9f60d5be7..408b1e922 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -52,7 +52,7 @@ namespace seq { expr_ref derive::operator()(expr* ele, expr* r) { SASSERT(m_util.is_re(r)); - if (m_trail.size() > 1000) + if (m_trail.size() > 100000) reset(); // Check top-level cache (post-simplify result) expr* cached = nullptr; @@ -576,6 +576,109 @@ namespace seq { return false; } + // Extract character range [lo, hi] from a derivative condition. + // Conditions are of the form: + // char_le(lo_expr, ele) && char_le(ele, hi_expr) → range [lo, hi] + // char_le(lo_expr, ele) → range [lo, max_char] + // char_le(ele, hi_expr) → range [0, hi] + // Returns false if not a recognizable range condition. + bool derive::extract_char_range(expr* cond, unsigned& lo, unsigned& hi) { + expr* e1 = nullptr, *e2 = nullptr, *lhs = nullptr, *rhs = nullptr; + lo = 0; + hi = u().max_char(); + + // Negation: ~(range [a,b]) = [0,a-1] ∪ [b+1,max] + // We don't handle negation here — it's handled via pred_implies logic + if (m.is_not(cond, e1)) + return false; + + // Conjunction: and(char_le(lo, x), char_le(x, hi)) + if (m.is_and(cond, e1, e2)) { + expr *a1, *a2, *b1, *b2; + unsigned v; + if (u().is_char_le(e1, a1, a2) && u().is_char_le(e2, b1, b2)) { + // e1: a1 <= a2, e2: b1 <= b2 + // Expect: lo <= ele (a1=const, a2=var) and ele <= hi (b1=var, b2=const) + // OR: ele <= hi (a1=var, a2=const) and lo <= ele (b1=const, b2=var) + if (u().is_const_char(a1, v) && u().is_const_char(b2, lo)) { + // e1: const <= a2, e2: b1 <= const + // This is: v <= ele and ele <= lo — wrong naming, let me fix + lo = v; + hi = 0; + if (u().is_const_char(b2, hi)) + return true; + } + } + // Try more carefully: extract from each conjunct + lo = 0; + hi = u().max_char(); + if (u().is_char_le(e1, a1, a2)) { + if (u().is_const_char(a1, v) && !u().is_const_char(a2, v)) + lo = std::max(lo, v); // v <= ele + else if (!u().is_const_char(a1, v) && u().is_const_char(a2, v)) + hi = std::min(hi, v); // ele <= v + } + if (u().is_char_le(e2, b1, b2)) { + unsigned v2; + if (u().is_const_char(b1, v2) && !u().is_const_char(b2, v2)) + lo = std::max(lo, v2); // v2 <= ele + else if (!u().is_const_char(b1, v2) && u().is_const_char(b2, v2)) + hi = std::min(hi, v2); // ele <= v2 + } + return lo <= hi; + } + + // Single char_le + if (u().is_char_le(cond, lhs, rhs)) { + unsigned v; + if (u().is_const_char(lhs, v) && !u().is_const_char(rhs, v)) { + lo = v; // v <= ele + return true; + } + if (!u().is_const_char(lhs, v) && u().is_const_char(rhs, v)) { + hi = v; // ele <= v + return true; + } + } + + return false; + } + + // Predicate implication for character range conditions. + // Returns true if: whenever cond_a is true, cond_b must also be true. + // Used for BDD-merge of derivative ITE trees. + bool derive::pred_implies(expr* a, expr* b) { + if (a == b) return true; + + expr *nota = nullptr, *notb = nullptr; + + // ~a implies ~b iff b implies a + if (m.is_not(a, nota) && m.is_not(b, notb)) + return pred_implies(notb, nota); + + unsigned lo_a, hi_a, lo_b, hi_b; + + // a implies b: range_a ⊆ range_b + if (extract_char_range(a, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) + return lo_b <= lo_a && hi_a <= hi_b; + + // a implies ~b: range_a ∩ range_b = ∅ + if (m.is_not(b, notb)) { + if (extract_char_range(a, lo_a, hi_a) && extract_char_range(notb, lo_b, hi_b)) + return hi_a < lo_b || hi_b < lo_a; + } + + // ~a implies b: complement of range_a ⊆ range_b + // This is true when range_b covers everything outside range_a + // i.e., lo_b == 0 and hi_b >= max_char, minus range_a... complex, skip for now + if (m.is_not(a, nota)) { + if (extract_char_range(nota, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) + return lo_b <= 0 && hi_b >= u().max_char(); // only if b is universal + } + + return false; + } + expr_ref derive::mk_union(expr* a, expr* b) { // Check op cache expr* cached = nullptr; @@ -618,8 +721,10 @@ namespace seq { return mk_deriv_concat(expr_ref(a1, m), tail); } - // ITE combination: if both are ITE with same condition, merge + // ITE handling for union expr *c1, *t1, *e1, *c2, *t2, *e2; + + // Same condition merge (cheap, always correct) if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { expr_ref then_br = mk_union(t1, t2); expr_ref else_br = mk_union(e1, e2); @@ -629,10 +734,10 @@ namespace seq { // Conservative ITE hoisting via subsumption: // Only hoist when at least one branch simplifies by is_subset. if (m.is_ite(a, c1, t1, e1)) { - bool t1_sub_b = is_subset(t1, b); // t1 ∪ b = b - bool b_sub_t1 = is_subset(b, t1); // t1 ∪ b = t1 - bool e1_sub_b = is_subset(e1, b); // e1 ∪ b = b - bool b_sub_e1 = is_subset(b, e1); // e1 ∪ b = e1 + bool t1_sub_b = is_subset(t1, b); + bool b_sub_t1 = is_subset(b, t1); + bool e1_sub_b = is_subset(e1, b); + bool b_sub_e1 = is_subset(b, e1); if (t1_sub_b || b_sub_t1 || e1_sub_b || b_sub_e1) { expr_ref then_br = t1_sub_b ? expr_ref(b, m) : b_sub_t1 ? expr_ref(t1, m) : mk_union(t1, b); expr_ref else_br = e1_sub_b ? expr_ref(b, m) : b_sub_e1 ? expr_ref(e1, m) : mk_union(e1, b); @@ -640,10 +745,10 @@ namespace seq { } } if (m.is_ite(b, c2, t2, e2)) { - bool t2_sub_a = is_subset(t2, a); // a ∪ t2 = a - bool a_sub_t2 = is_subset(a, t2); // a ∪ t2 = t2 - bool e2_sub_a = is_subset(e2, a); // a ∪ e2 = a - bool a_sub_e2 = is_subset(a, e2); // a ∪ e2 = e2 + bool t2_sub_a = is_subset(t2, a); + bool a_sub_t2 = is_subset(a, t2); + bool e2_sub_a = is_subset(e2, a); + bool a_sub_e2 = is_subset(a, e2); if (t2_sub_a || a_sub_t2 || e2_sub_a || a_sub_e2) { expr_ref then_br = t2_sub_a ? expr_ref(a, m) : a_sub_t2 ? expr_ref(t2, m) : mk_union(a, t2); expr_ref else_br = e2_sub_a ? expr_ref(a, m) : a_sub_e2 ? expr_ref(e2, m) : mk_union(a, e2); @@ -651,6 +756,36 @@ namespace seq { } } + // BDD merge for union: only when both are ITE and pred_implies fires + // (avoids exponential blowup when conditions are unrelated) + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) { + // Only merge if we can determine the relationship between conditions + bool c1_imp_c2 = pred_implies(c1, c2); + bool c1_imp_nc2 = !c1_imp_c2 && pred_implies(c1, m.mk_not(c2)); + expr_ref notc1(m.mk_not(c1), m); + bool nc1_imp_c2 = pred_implies(notc1, c2); + bool nc1_imp_nc2 = !nc1_imp_c2 && pred_implies(notc1, m.mk_not(c2)); + if (c1_imp_c2 || c1_imp_nc2 || nc1_imp_c2 || nc1_imp_nc2) { + // pred_implies fires — safe to merge without exponential blowup + expr_ref r1(m), r2(m); + // Under c1: + if (c1_imp_c2) + r1 = mk_union(t1, t2); + else if (c1_imp_nc2) + r1 = mk_union(t1, e2); + else + r1 = mk_union(t1, b); + // Under ~c1: + if (nc1_imp_c2) + r2 = mk_union(e1, t2); + else if (nc1_imp_nc2) + r2 = mk_union(e1, e2); + else + r2 = mk_union(e1, b); + return mk_ite(c1, r1, r2); + } + } + // ACI: flatten, sort, deduplicate expr_ref_vector args(m); flatten_union(a, args); @@ -721,17 +856,48 @@ namespace seq { return mk_deriv_concat(expr_ref(a1, m), tail); } - // ITE combination: if both are ITE with same condition, merge + // ITE handling for intersection expr *c1, *t1, *e1, *c2, *t2, *e2; + + // Same condition merge if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { expr_ref then_br = mk_inter(t1, t2); expr_ref else_br = mk_inter(e1, e2); return mk_ite(c1, then_br, else_br); } - // ITE hoisting for intersection with depth bound. - // Unconditional hoisting needed for re.inter/re.diff/re.comp patterns, - // but bounded to prevent exponential blowup on union-heavy patterns. + // Both-ITE with pred_implies: exploit condition relationships (no depth cost) + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) { + // Order conditions: larger id on outside + if (c1->get_id() < c2->get_id()) { + std::swap(a, b); + std::swap(c1, c2); + std::swap(t1, t2); + std::swap(e1, e2); + } + expr_ref r1(m), r2(m); + bool have_r1 = false, have_r2 = false; + // Under c1: what do we know about c2? + if (pred_implies(c1, c2)) { + r1 = mk_inter(t1, t2); have_r1 = true; + } else if (pred_implies(c1, m.mk_not(c2))) { + r1 = mk_inter(t1, e2); have_r1 = true; + } + // Under ~c1: what do we know about c2? + expr_ref notc1(m.mk_not(c1), m); + if (pred_implies(notc1, c2)) { + r2 = mk_inter(e1, t2); have_r2 = true; + } else if (pred_implies(notc1, m.mk_not(c2))) { + r2 = mk_inter(e1, e2); have_r2 = true; + } + if (have_r1 || have_r2) { + if (!have_r1) r1 = mk_inter(t1, b); + if (!have_r2) r2 = mk_inter(e1, b); + return mk_ite(c1, r1, r2); + } + } + + // ITE hoisting with depth bound (fallback when pred_implies doesn't fire) if (m_inter_hoist_depth < m_max_inter_hoist_depth) { if (m.is_ite(a, c1, t1, e1)) { m_inter_hoist_depth++; @@ -854,17 +1020,10 @@ namespace seq { return mk_ite(c, ct, ce); } - // De Morgan: ~(r1 ∪ r2) → ~r1 ∩ ~r2 - expr* r1 = nullptr, * r2 = nullptr; - if (re().is_union(a, r1, r2)) { - expr_ref c1 = mk_complement(r1); - expr_ref c2 = mk_complement(r2); - return mk_inter(c1, c2); - } - // ~ε → .+ sort* s = nullptr; - if (re().is_to_re(a, r) && u().str.is_empty(r)) { + expr* r1 = nullptr; + if (re().is_to_re(a, r1) && u().str.is_empty(r1)) { VERIFY(m_util.is_re(a, s)); return expr_ref(re().mk_plus(re().mk_full_char(a->get_sort())), m); } @@ -973,39 +1132,61 @@ namespace seq { if (!is_ite1 && !is_ite2) return op(d1, d2); - // d1 is ITE, d2 is not + // d1 is ITE, d2 is not — linear distribution (no depth cost) if (is_ite1 && !is_ite2) { expr_ref then_r = ite_combine_binary(t1, d2, op); expr_ref else_r = ite_combine_binary(e1, d2, op); return mk_ite(c1, then_r, else_r); } - // d2 is ITE, d1 is not + // d2 is ITE, d1 is not — linear distribution (no depth cost) if (!is_ite1 && is_ite2) { expr_ref then_r = ite_combine_binary(d1, t2, op); expr_ref else_r = ite_combine_binary(d1, e2, op); return mk_ite(c2, then_r, else_r); } - // Both are ITE + // Both are ITE — this is the cross-product case, consume depth budget + m_inter_hoist_depth++; + expr_ref result(m); + if (c1 == c2) { - // Same condition: combine pairwise + // Same condition: combine pairwise (no cross-product) expr_ref then_r = ite_combine_binary(t1, t2, op); expr_ref else_r = ite_combine_binary(e1, e2, op); - return mk_ite(c1, then_r, else_r); - } - - // Order by condition id (larger id on outside for canonical form) - if (c1->get_id() > c2->get_id()) { - expr_ref then_r = ite_combine_binary(t1, d2, op); - expr_ref else_r = ite_combine_binary(e1, d2, op); - return mk_ite(c1, then_r, else_r); + result = mk_ite(c1, then_r, else_r); } else { - expr_ref then_r = ite_combine_binary(d1, t2, op); - expr_ref else_r = ite_combine_binary(d1, e2, op); - return mk_ite(c2, then_r, else_r); + // Different conditions. Order by id for canonical form. + if (c1->get_id() < c2->get_id()) { + std::swap(d1, d2); + std::swap(c1, c2); + std::swap(t1, t2); + std::swap(e1, e2); + } + + // Now c1->get_id() >= c2->get_id(). Hoist c1. + expr_ref r1(m), r2(m); + if (pred_implies(c1, c2)) + r1 = ite_combine_binary(t1, t2, op); + else if (pred_implies(c1, m.mk_not(c2))) + r1 = ite_combine_binary(t1, e2, op); + else + r1 = ite_combine_binary(t1, d2, op); + + expr_ref notc1(m.mk_not(c1), m); + if (pred_implies(notc1, c2)) + r2 = ite_combine_binary(e1, t2, op); + else if (pred_implies(notc1, m.mk_not(c2))) + r2 = ite_combine_binary(e1, e2, op); + else + r2 = ite_combine_binary(e1, d2, op); + + result = mk_ite(c1, r1, r2); } + + m_inter_hoist_depth--; + return result; } expr_ref derive::ite_combine_unary(expr* d, diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 122619809..1c6623bb6 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -73,7 +73,7 @@ namespace seq { unsigned m_simp_depth { 0 }; static const unsigned m_max_simp_depth = 8; - // Intersection ITE hoisting depth limit + // ITE combine depth limit (bounds exponential blowup in BDD merge) unsigned m_inter_hoist_depth { 0 }; static const unsigned m_max_inter_hoist_depth = 4; @@ -129,6 +129,11 @@ namespace seq { // Lightweight subsumption check: returns true if L(a) ⊆ L(b) bool is_subset(expr* a, expr* b); + // Predicate implication for character range conditions. + // Returns true if condition a implies condition b. + bool pred_implies(expr* a, expr* b); + bool extract_char_range(expr* cond, unsigned& lo, unsigned& hi); + // Normalize reverse(r) by pushing reverse inward expr_ref normalize_reverse(expr* r); diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 62dbfd3aa..a3c56159d 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -22,6 +22,7 @@ Author: #include "ast/ast_util.h" #include "ast/for_each_expr.h" #include +#include namespace smt { @@ -223,6 +224,40 @@ namespace smt { th.add_axiom(~lit); return true; } + // Second pass: deeper exploration for intersection/complement/diff regexes + // These are candidates for dead state detection (the result may be empty) + // For these, do unlimited depth exploration with a time budget + unsigned r_id = get_state_id(r); + expr* r1 = nullptr, *r2 = nullptr; + if (!m_state_graph.is_dead(r_id) && !m_state_graph.is_live(r_id) && + (re().is_intersection(r, r1, r2) || re().is_complement(r, r1) || re().is_diff(r, r1, r2))) { + // Collect all unexplored states and explore them iteratively + // with a time budget + auto pass2_start = std::chrono::steady_clock::now(); + bool changed = true; + while (changed && !m_state_graph.is_dead(r_id)) { + auto elapsed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - pass2_start).count(); + if (elapsed > 100) break; + changed = false; + for (unsigned i = 0; i < m_state_to_expr.size() && !m_state_graph.is_dead(r_id); ++i) { + unsigned st_id = i + 1; + if (m_state_graph.is_done(st_id) || m_state_graph.is_live(st_id) || m_state_graph.is_dead(st_id)) + continue; + // This is an unexplored state — explore it + expr* st = m_state_to_expr.get(i); + if (re().get_info(st).nullable == l_true) + continue; + if (update_state_graph(st, 1)) + changed = true; + } + } + if (m_state_graph.is_dead(r_id)) { + STRACE(seq_regex_brief, tout << "(dead2) ";); + th.add_axiom(~lit); + return true; + } + } } return false; } @@ -816,7 +851,7 @@ namespace smt { /* Update the state graph with expression r and all its derivatives. */ - bool seq_regex::update_state_graph(expr* r) { + bool seq_regex::update_state_graph(expr* r, unsigned depth) { unsigned r_id = get_state_id(r); if (m_state_graph.is_done(r_id)) return false; if (m_state_graph.get_size() >= m_max_state_graph_size) { @@ -859,15 +894,37 @@ namespace smt { m_state_graph.add_edge(r_id, dr_id, maybecycle); } m_state_graph.mark_done(r_id); - // Recursively explore unexplored targets for dead state detection - // Skip targets that are nullable to avoid state explosion - for (auto const& dr: derivatives) { - unsigned dr_id = get_state_id(dr); - if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id)) - continue; - if (re().get_info(dr).nullable == l_true) - continue; - update_state_graph(dr); + // Explore direct targets for dead state detection (depth 1 only) + // This compensates for less-canonical derivative representations + if (depth < 1) { + for (auto const& dr: derivatives) { + unsigned dr_id = get_state_id(dr); + if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id)) + continue; + if (re().get_info(dr).nullable == l_true) + continue; + update_state_graph(dr, depth + 1); + } + } + else if (depth == 1) { + // At depth 1, do lightweight exploration: compute derivatives + // of this state's targets but only to check if they're all dead. + // Don't add complex states to the graph — just mark them dead if + // their get_info says min_length == UINT_MAX or is_empty. + for (auto const& dr: derivatives) { + unsigned dr_id = get_state_id(dr); + if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id)) + continue; + auto dr_info = re().get_info(dr); + if (dr_info.nullable == l_true) { + m_state_graph.add_state(dr_id); + m_state_graph.mark_live(dr_id); + } + else if (re().is_empty(dr) || dr_info.min_length == UINT_MAX) { + m_state_graph.add_state(dr_id); + m_state_graph.mark_done(dr_id); + } + } } } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 5c3fddd25..af03b3c50 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -124,7 +124,7 @@ namespace smt { // Note: Doesn't need to be sound or complete (doesn't affect soundness) bool can_be_in_cycle(expr* r1, expr* r2); // Update the graph - bool update_state_graph(expr* r); + bool update_state_graph(expr* r, unsigned depth = 0); // Printing expressions for seq_regex_brief std::string state_str(expr* e); From 357a15cd2593396fb29c2affd82d11f25df5b818 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sat, 6 Jun 2026 15:29:58 -0700 Subject: [PATCH 19/32] Delete benchmarks/instance08175.smt2 --- benchmarks/instance08175.smt2 | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 benchmarks/instance08175.smt2 diff --git a/benchmarks/instance08175.smt2 b/benchmarks/instance08175.smt2 deleted file mode 100644 index fe34ad2d5..000000000 --- a/benchmarks/instance08175.smt2 +++ /dev/null @@ -1,23 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.++ (re.* (str.to_re " ")) (str.to_re "=") (re.* (str.to_re " ")) (re.* (str.to_re "\u{22}")) (str.to_re "cid") (re.* (str.to_re " ")) (str.to_re ":") (re.* (str.to_re " ")) (re.+ (re.union (str.to_re "\u{22}") (str.to_re "<") (str.to_re ">") (str.to_re " "))) (str.to_re "\u{a}")))) -(assert (str.in_re X (re.++ ((_ re.loop 1 11) (re.union (re.range "a" "z") (re.range "0" "9"))) (str.to_re "\u{a}")))) -(assert (str.in_re X (re.++ ((_ re.loop 3 3) (re.range "0" "9")) ((_ re.loop 1 1) (re.union (str.to_re "-") (str.to_re "|") (str.to_re "/"))) ((_ re.loop 6 6) (re.range "0" "9")) ((_ re.loop 1 1) (re.union (str.to_re "-") (str.to_re "|") (str.to_re "/"))) ((_ re.loop 6 6) (re.range "0" "9")) (str.to_re "\u{a}")))) -(assert (not (str.in_re X (str.to_re "http://tv.seekmo.com/showme.aspx?keyword=\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "@") (re.union (re.++ ((_ re.loop 2 255) (re.union (re.range "a" "z") (re.range "A" "Z") (re.range "0" "9") (str.to_re "-"))) (str.to_re ".") (re.union (str.to_re "ad") (str.to_re "ae") (str.to_re "af") (str.to_re "ag") (str.to_re "ai") (str.to_re "al") (str.to_re "am") (str.to_re "an") (str.to_re "ao") (str.to_re "aq") (str.to_re "ar") (str.to_re "as") (str.to_re "at") (str.to_re "au") (str.to_re "aw") (str.to_re "az") (str.to_re "ba") (str.to_re "bb") (str.to_re "bd") (str.to_re "be") (str.to_re "bf") (str.to_re "bg") (str.to_re "bh") (str.to_re "bi") (str.to_re "bj") (str.to_re "bm") (str.to_re "bn") (str.to_re "bo") (str.to_re "br") (str.to_re "bs") (str.to_re "bt") (str.to_re "bv") (str.to_re "bw") (str.to_re "by") (str.to_re "bz") (str.to_re "ca") (str.to_re "cc") (str.to_re "cf") (str.to_re "cg") (str.to_re "ch") (str.to_re "ci") (str.to_re "ck") (str.to_re "cl") (str.to_re "cm") (str.to_re "cn") (str.to_re "co") (str.to_re "cr") (str.to_re "cu") (str.to_re "cv") (str.to_re "cx") (str.to_re "cy") (str.to_re "cz") (str.to_re "de") (str.to_re "di") (str.to_re "dk") (str.to_re "dm") (str.to_re "do") (str.to_re "dz") (str.to_re "ec") (str.to_re "ee") (str.to_re "eg") (str.to_re "eh") (str.to_re "er") (str.to_re "es") (str.to_re "et") (str.to_re "fi") (str.to_re "fj") (str.to_re "fk") (str.to_re "fm") (str.to_re "fo") (str.to_re "fr") (str.to_re "ga") (str.to_re "gb") (str.to_re "gd") (str.to_re "ge") (str.to_re "gf") (str.to_re "gh") (str.to_re "gi") (str.to_re "gl") (str.to_re "gm") (str.to_re "gn") (str.to_re "gp") (str.to_re "gq") (str.to_re "gr") (str.to_re "gs") (str.to_re "gt") (str.to_re "gu") (str.to_re "gw") (str.to_re "gy") (str.to_re "hk") (str.to_re "hm") (str.to_re "hn") (str.to_re "hr") (str.to_re "ht") (str.to_re "hu") (str.to_re "id") (str.to_re "ie") (str.to_re "il") (str.to_re "in") (str.to_re "io") (str.to_re "iq") (str.to_re "ir") (str.to_re "is") (str.to_re "it") (str.to_re "jo") (str.to_re "jm") (str.to_re "jp") (str.to_re "ke") (str.to_re "kg") (str.to_re "kh") (str.to_re "ki") (str.to_re "km") (str.to_re "kn") (str.to_re "kp") (str.to_re "kr") (str.to_re "kw") (str.to_re "ky") (str.to_re "kz") (str.to_re "la") (str.to_re "lb") (str.to_re "lc") (str.to_re "li") (str.to_re "lk") (str.to_re "lr") (str.to_re "ls") (str.to_re "lt") (str.to_re "lu") (str.to_re "lv") (str.to_re "ly") (str.to_re "ma") (str.to_re "mc") (str.to_re "md") (str.to_re "mg") (str.to_re "mh") (str.to_re "mk") (str.to_re "ml") (str.to_re "mm") (str.to_re "mn") (str.to_re "mo") (str.to_re "mp") (str.to_re "mq") (str.to_re "mr") (str.to_re "ms") (str.to_re "mt") (str.to_re "mu") (str.to_re "mv") (str.to_re "mw") (str.to_re "mx") (str.to_re "my") (str.to_re "mz") (str.to_re "an") (str.to_re "nc") (str.to_re "ne") (str.to_re "nf") (str.to_re "ng") (str.to_re "ni") (str.to_re "nl") (str.to_re "no") (str.to_re "np") (str.to_re "nr") (str.to_re "nt") (str.to_re "nu") (str.to_re "nz") (str.to_re "om") (str.to_re "pa") (str.to_re "pe") (str.to_re "pf") (str.to_re "pg") (str.to_re "ph") (str.to_re "pk") (str.to_re "pl") (str.to_re "pm") (str.to_re "pn") (str.to_re "pr") (str.to_re "pt") (str.to_re "pw") (str.to_re "py") (str.to_re "qa") (str.to_re "re") (str.to_re "ro") (str.to_re "ru") (str.to_re "rw") (str.to_re "sa") (str.to_re "sb") (str.to_re "sc") (str.to_re "sd") (str.to_re "se") (str.to_re "sq") (str.to_re "sh") (str.to_re "si") (str.to_re "sj") (str.to_re "sk") (str.to_re "sl") (str.to_re "sm") (str.to_re "sn") (str.to_re "so") (str.to_re "sr") (str.to_re "st") (str.to_re "su") (str.to_re "sv") (str.to_re "sy") (str.to_re "sz") (str.to_re "tc") (str.to_re "td") (str.to_re "tf") (str.to_re "tg") (str.to_re "th") (str.to_re "tj") (str.to_re "tk") (str.to_re "tm") (str.to_re "tn") (str.to_re "to") (str.to_re "tp") (str.to_re "tr") (str.to_re "tt") (str.to_re "tv") (str.to_re "tw") (str.to_re "tz") (str.to_re "ua") (str.to_re "ug") (str.to_re "uk") (str.to_re "um") (str.to_re "us") (str.to_re "uy") (str.to_re "uz") (str.to_re "va") (str.to_re "vc") (str.to_re "ve") (str.to_re "vg") (str.to_re "vi") (str.to_re "vn") (str.to_re "vu") (str.to_re "wf") (str.to_re "ws") (str.to_re "ye") (str.to_re "yt") (str.to_re "yu") (str.to_re "za") (str.to_re "zm") (str.to_re "zr") (str.to_re "zw") (str.to_re "arpa") (str.to_re "arts") (str.to_re "biz") (str.to_re "com") (str.to_re "edu") (str.to_re "firm") (str.to_re "gov") (str.to_re "info") (str.to_re "int") (str.to_re "mil") (str.to_re "nato") (str.to_re "net") (str.to_re "nom") (str.to_re "org") (str.to_re "rec") (str.to_re "store") (str.to_re "web"))) (re.++ (re.union (re.++ (str.to_re "25") (re.range "0" "5")) (re.++ (str.to_re "2") (re.range "0" "4") (re.range "0" "9")) (re.++ ((_ re.loop 1 1) (re.range "0" "1")) ((_ re.loop 2 2) (re.range "0" "9"))) (re.++ ((_ re.loop 1 1) (re.range "1" "9")) ((_ re.loop 1 1) (re.range "0" "9"))) (re.range "1" "9")) (str.to_re ".") (re.union (re.++ (str.to_re "25") (re.range "0" "5")) (re.++ (str.to_re "2") (re.range "0" "4") (re.range "0" "9")) (re.++ ((_ re.loop 1 1) (re.range "0" "1")) ((_ re.loop 2 2) (re.range "0" "9"))) (re.++ ((_ re.loop 1 1) (re.range "1" "9")) ((_ re.loop 1 1) (re.range "0" "9"))) (re.range "1" "9") (str.to_re "0")) (str.to_re ".") (re.union (re.++ (str.to_re "25") (re.range "0" "5")) (re.++ (str.to_re "2") (re.range "0" "4") (re.range "0" "9")) (re.++ ((_ re.loop 1 1) (re.range "0" "1")) ((_ re.loop 2 2) (re.range "0" "9"))) (re.++ ((_ re.loop 1 1) (re.range "1" "9")) ((_ re.loop 1 1) (re.range "0" "9"))) (re.range "1" "9") (str.to_re "0")) (str.to_re ".") (re.union (re.++ (str.to_re "25") (re.range "0" "5")) (re.++ (str.to_re "2") (re.range "0" "4") (re.range "0" "9")) (re.++ ((_ re.loop 1 1) (re.range "0" "1")) ((_ re.loop 2 2) (re.range "0" "9"))) (re.++ ((_ re.loop 1 1) (re.range "1" "9")) ((_ re.loop 1 1) (re.range "0" "9"))) (re.range "0" "9")))) (str.to_re "\u{a}") (re.+ (re.union (str.to_re "_") (re.range "a" "z") (re.range "A" "Z") (re.range "0" "9") (str.to_re "-"))) (str.to_re ".") (re.* (re.union (str.to_re "_") (re.range "a" "z") (re.range "A" "Z") (re.range "0" "9") (str.to_re "-")))))) -(check-sat) - -(exit) From 3b9dee1a7f303cd27fc50ec40958b38d772679b7 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sat, 6 Jun 2026 15:30:20 -0700 Subject: [PATCH 20/32] Delete benchmarks/instance08315.smt2 --- benchmarks/instance08315.smt2 | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 benchmarks/instance08315.smt2 diff --git a/benchmarks/instance08315.smt2 b/benchmarks/instance08315.smt2 deleted file mode 100644 index 45eecdd57..000000000 --- a/benchmarks/instance08315.smt2 +++ /dev/null @@ -1,22 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status sat) - -(declare-const X String) -(assert (not (str.in_re X (re.++ (str.to_re "\u{a}") (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "3"))) (str.to_re ":") (re.range "0" "5") (re.range "0" "9"))))) -(assert (not (str.in_re X (re.++ (re.union (str.to_re "0") (re.++ (re.range "1" "9") ((_ re.loop 0 3) (re.range "0" "9"))) (re.++ (re.range "1" "5") ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (str.to_re "6") (re.range "0" "5") (re.range "0" "5") (re.union (re.++ (re.range "0" "2") (re.range "0" "9")) (re.++ (str.to_re "3") (re.range "0" "5"))))) (str.to_re "\u{a}"))))) -(assert (not (str.in_re X (re.++ (re.union (str.to_re "ac") (str.to_re "AC") (str.to_re "al") (str.to_re "AL") (str.to_re "am") (str.to_re "AM") (str.to_re "ap") (str.to_re "AP") (str.to_re "ba") (str.to_re "BA") (str.to_re "ce") (str.to_re "CE") (str.to_re "df") (str.to_re "DF") (str.to_re "es") (str.to_re "ES") (str.to_re "go") (str.to_re "GO") (str.to_re "ma") (str.to_re "MA") (str.to_re "mg") (str.to_re "MG") (str.to_re "ms") (str.to_re "MS") (str.to_re "mt") (str.to_re "MT") (str.to_re "pa") (str.to_re "PA") (str.to_re "pb") (str.to_re "PB") (str.to_re "pe") (str.to_re "PE") (str.to_re "pi") (str.to_re "PI") (str.to_re "pr") (str.to_re "PR") (str.to_re "rj") (str.to_re "RJ") (str.to_re "rn") (str.to_re "RN") (str.to_re "ro") (str.to_re "RO") (str.to_re "rr") (str.to_re "RR") (str.to_re "rs") (str.to_re "RS") (str.to_re "sc") (str.to_re "SC") (str.to_re "se") (str.to_re "SE") (str.to_re "sp") (str.to_re "SP") (str.to_re "to") (str.to_re "TO")) (str.to_re "\u{a}"))))) -(assert (not (str.in_re X (re.++ (re.union (re.++ (re.union (re.++ (re.* (str.to_re "0")) (re.range "1" "9")) (re.++ (re.union (str.to_re "1") (str.to_re "2")) (re.range "0" "9")) (re.++ (str.to_re "3") (re.union (str.to_re "0") (str.to_re "1")))) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (re.++ (re.* (str.to_re "0")) (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "8"))) (str.to_re "10") (str.to_re "12")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (re.++ (re.* (str.to_re "0")) (re.range "1" "9")) (re.++ (re.union (str.to_re "1") (str.to_re "2")) (re.range "0" "9")) (str.to_re "30")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (re.++ (re.* (str.to_re "0")) (re.union (str.to_re "4") (str.to_re "6") (str.to_re "9"))) (str.to_re "11")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (re.++ (re.* (str.to_re "0")) (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "8"))) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (str.to_re "29") (re.union (str.to_re ".") (str.to_re "-") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "0") (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "0") (str.to_re "4") (str.to_re "8")) (str.to_re "00")) (re.++ (str.to_re "29") (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "2") (str.to_re "6")) (str.to_re "00")) (re.++ (str.to_re "29") (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.range "0" "9") (re.range "0" "9") (str.to_re "0") (re.union (str.to_re "4") (str.to_re "8"))) (re.++ (str.to_re "29") (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.range "0" "9") (re.range "0" "9") (re.union (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "0") (str.to_re "4") (str.to_re "8"))) (re.++ (str.to_re "29") (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.union (str.to_re "02") (str.to_re "2")) (re.union (str.to_re "-") (str.to_re ".") (str.to_re "/")) (re.range "0" "9") (re.range "0" "9") (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "2") (str.to_re "6")))) (str.to_re "\u{a}"))))) -(check-sat) - -(exit) From 37ce61ddc2d774d61df911be267e09d44585e999 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sat, 6 Jun 2026 15:31:19 -0700 Subject: [PATCH 21/32] remove local copies of benchmarks Signed-off-by: Nikolaj Bjorner --- benchmarks/instance08965.smt2 | 23 ----------------------- benchmarks/instance09159.smt2 | 23 ----------------------- benchmarks/instance11213.smt2 | 22 ---------------------- benchmarks/instance11705.smt2 | 23 ----------------------- benchmarks/instance11745.smt2 | 21 --------------------- benchmarks/instance11856.smt2 | 21 --------------------- benchmarks/instance12204.smt2 | 22 ---------------------- benchmarks/instance12488.smt2 | 22 ---------------------- benchmarks/instance12671.smt2 | 21 --------------------- benchmarks/instance12768.smt2 | 22 ---------------------- benchmarks/instance12833.smt2 | 22 ---------------------- benchmarks/instance13062.smt2 | 23 ----------------------- benchmarks/instance13106.smt2 | 23 ----------------------- benchmarks/instance13975.smt2 | 22 ---------------------- benchmarks/instance14260.smt2 | 23 ----------------------- benchmarks/instance14326.smt2 | 21 --------------------- benchmarks/instance14382.smt2 | 21 --------------------- 17 files changed, 375 deletions(-) delete mode 100644 benchmarks/instance08965.smt2 delete mode 100644 benchmarks/instance09159.smt2 delete mode 100644 benchmarks/instance11213.smt2 delete mode 100644 benchmarks/instance11705.smt2 delete mode 100644 benchmarks/instance11745.smt2 delete mode 100644 benchmarks/instance11856.smt2 delete mode 100644 benchmarks/instance12204.smt2 delete mode 100644 benchmarks/instance12488.smt2 delete mode 100644 benchmarks/instance12671.smt2 delete mode 100644 benchmarks/instance12768.smt2 delete mode 100644 benchmarks/instance12833.smt2 delete mode 100644 benchmarks/instance13062.smt2 delete mode 100644 benchmarks/instance13106.smt2 delete mode 100644 benchmarks/instance13975.smt2 delete mode 100644 benchmarks/instance14260.smt2 delete mode 100644 benchmarks/instance14326.smt2 delete mode 100644 benchmarks/instance14382.smt2 diff --git a/benchmarks/instance08965.smt2 b/benchmarks/instance08965.smt2 deleted file mode 100644 index 4449c9d4e..000000000 --- a/benchmarks/instance08965.smt2 +++ /dev/null @@ -1,23 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.++ (str.to_re "qr/") (re.union (str.to_re "Alabama") (str.to_re "Alaska") (str.to_re "Arizona") (str.to_re "Arkansas") (str.to_re "California") (str.to_re "Colorado") (str.to_re "Connecticut") (str.to_re "Delaware") (str.to_re "Florida") (str.to_re "Georgia") (str.to_re "Hawaii") (str.to_re "Idaho") (str.to_re "Illinois") (str.to_re "Indiana") (str.to_re "Iowa") (str.to_re "Kansas") (str.to_re "Kentucky") (str.to_re "Louisiana") (str.to_re "Maine") (str.to_re "Maryland") (str.to_re "Massachusetts") (str.to_re "Michigan") (str.to_re "Minnesota") (str.to_re "Mississippi") (str.to_re "Missouri") (str.to_re "Montana") (str.to_re "Nebraska") (str.to_re "Nevada") (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Hampshire")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Jersey")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Mexico")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "York")) (re.++ (str.to_re "North") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Carolina")) (re.++ (str.to_re "North") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Dakota")) (str.to_re "Ohio") (str.to_re "Oklahoma") (str.to_re "Oregon") (str.to_re "Pennsylvania") (re.++ (str.to_re "Rhode") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Island")) (re.++ (str.to_re "South") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Carolina")) (re.++ (str.to_re "South") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Dakota")) (str.to_re "Tennessee") (str.to_re "Texas") (str.to_re "Utah") (str.to_re "Vermont") (str.to_re "Virginia") (str.to_re "Washington") (re.++ (str.to_re "West") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Virginia")) (str.to_re "Wisconsin") (str.to_re "Wyoming")) (str.to_re "/\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "6") ((_ re.loop 7 7) (re.range "0" "9")) (str.to_re "\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "/Referer:") (re.+ (re.union (str.to_re "\u{d}") (str.to_re "\u{a}"))) (str.to_re "/.html\u{d}/Hsm\u{a}") ((_ re.loop 32 32) (re.union (str.to_re "_") (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (re.* (re.union (str.to_re "_") (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_")))))) -(assert (str.in_re X (re.++ (str.to_re "Wareztv.seekmo.com") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Keylogging\u{13}TRUSTYFILES.COM\u{a}")))) -(assert (not (str.in_re X (re.++ (str.to_re "Flooded") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "Host:") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "AppName/GRSI|Server|\u{13}Host:origin=sidefindHost:User-Agent:\u{a}"))))) -(check-sat) - -(exit) diff --git a/benchmarks/instance09159.smt2 b/benchmarks/instance09159.smt2 deleted file mode 100644 index 2d5e138d9..000000000 --- a/benchmarks/instance09159.smt2 +++ /dev/null @@ -1,23 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status sat) - -(declare-const X String) -(assert (not (str.in_re X (re.++ (str.to_re "__") (re.+ re.allchar) (str.to_re "__\u{a}"))))) -(assert (not (str.in_re X (re.++ (str.to_re "/.avi") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}"))))) -(assert (not (str.in_re X (re.++ (str.to_re "User") (re.* re.allchar) (str.to_re "User-Agent:") (re.* re.allchar) (str.to_re "ResultATTENTION:riggiymd/wdhi.vhi\u{a}"))))) -(assert (str.in_re X (re.++ (re.union (re.++ (str.to_re "A") (re.union (str.to_re "BW") (str.to_re "FG") (str.to_re "GO") (str.to_re "IA") (re.++ (str.to_re "L") (re.union (str.to_re "A") (str.to_re "B"))) (re.++ (str.to_re "N") (re.union (str.to_re "D") (str.to_re "T"))) (re.++ (str.to_re "R") (re.union (str.to_re "E") (str.to_re "G") (str.to_re "M"))) (str.to_re "SM") (re.++ (str.to_re "T") (re.union (str.to_re "A") (str.to_re "F") (str.to_re "G"))) (re.++ (str.to_re "U") (re.union (str.to_re "S") (str.to_re "T"))) (str.to_re "ZE"))) (re.++ (str.to_re "B") (re.union (str.to_re "DI") (re.++ (str.to_re "E") (re.union (str.to_re "L") (str.to_re "N"))) (str.to_re "FA") (re.++ (str.to_re "G") (re.union (str.to_re "D") (str.to_re "R"))) (re.++ (str.to_re "H") (re.union (str.to_re "R") (str.to_re "S"))) (str.to_re "IH") (re.++ (str.to_re "L") (re.union (str.to_re "M") (str.to_re "R") (str.to_re "Z"))) (str.to_re "MU") (str.to_re "OL") (re.++ (str.to_re "R") (re.union (str.to_re "A") (str.to_re "B") (str.to_re "N"))) (str.to_re "TN") (str.to_re "VT") (str.to_re "WA"))) (re.++ (str.to_re "C") (re.union (re.++ (str.to_re "A") (re.union (str.to_re "F") (str.to_re "N"))) (str.to_re "CK") (re.++ (str.to_re "H") (re.union (str.to_re "E") (str.to_re "L") (str.to_re "N"))) (str.to_re "IV") (str.to_re "MR") (re.++ (str.to_re "O") (re.union (str.to_re "D") (str.to_re "G") (str.to_re "K") (str.to_re "L") (str.to_re "M"))) (str.to_re "PV") (str.to_re "RI") (str.to_re "UB") (str.to_re "XR") (re.++ (str.to_re "Y") (re.union (str.to_re "M") (str.to_re "P"))) (str.to_re "ZE"))) (re.++ (str.to_re "D") (re.union (str.to_re "EU") (str.to_re "JI") (str.to_re "MA") (str.to_re "NK") (str.to_re "OM") (str.to_re "ZA"))) (re.++ (str.to_re "E") (re.union (str.to_re "CU") (str.to_re "GY") (str.to_re "RI") (re.++ (str.to_re "S") (re.union (str.to_re "H") (str.to_re "P") (str.to_re "T"))) (str.to_re "TH"))) (re.++ (str.to_re "F") (re.union (str.to_re "IN") (str.to_re "JI") (str.to_re "LK") (re.++ (str.to_re "R") (re.union (str.to_re "A") (str.to_re "O"))) (str.to_re "SM"))) (re.++ (str.to_re "G") (re.union (str.to_re "AB") (str.to_re "BR") (str.to_re "EO") (str.to_re "GY") (str.to_re "HA") (re.++ (str.to_re "I") (re.union (str.to_re "B") (str.to_re "N"))) (str.to_re "LP") (str.to_re "MB") (str.to_re "NQ") (str.to_re "NB") (re.++ (str.to_re "R") (re.union (str.to_re "C") (str.to_re "D") (str.to_re "L"))) (str.to_re "TM") (re.++ (str.to_re "U") (re.union (str.to_re "F") (str.to_re "M") (str.to_re "Y"))))) (re.++ (str.to_re "H") (re.union (str.to_re "KG") (str.to_re "MD") (str.to_re "ND") (str.to_re "RV") (str.to_re "TI") (str.to_re "UN"))) (re.++ (str.to_re "I") (re.union (str.to_re "DN") (str.to_re "MN") (str.to_re "ND") (str.to_re "OT") (re.++ (str.to_re "R") (re.union (str.to_re "L") (str.to_re "N") (str.to_re "Q"))) (re.++ (str.to_re "S") (re.union (str.to_re "L") (str.to_re "R"))) (str.to_re "TA"))) (re.++ (str.to_re "J") (re.union (str.to_re "AM") (str.to_re "EY") (str.to_re "OR") (str.to_re "PN"))) (re.++ (str.to_re "K") (re.union (str.to_re "AZ") (str.to_re "EN") (str.to_re "GZ") (str.to_re "HM") (str.to_re "IR") (str.to_re "NA") (str.to_re "OR") (str.to_re "WT"))) (re.++ (str.to_re "L") (re.union (str.to_re "AO") (re.++ (str.to_re "B") (re.union (str.to_re "N") (str.to_re "R") (str.to_re "Y"))) (str.to_re "CA") (str.to_re "IE") (str.to_re "KA") (str.to_re "SO") (str.to_re "TU") (str.to_re "UX") (str.to_re "VA"))) (re.++ (str.to_re "M") (re.union (re.++ (str.to_re "A") (re.union (str.to_re "C") (str.to_re "F") (str.to_re "R"))) (str.to_re "CO") (re.++ (str.to_re "D") (re.union (str.to_re "A") (str.to_re "G") (str.to_re "V"))) (str.to_re "EX") (str.to_re "HL") (str.to_re "KD") (re.++ (str.to_re "L") (re.union (str.to_re "I") (str.to_re "T"))) (str.to_re "MR") (re.++ (str.to_re "N") (re.union (str.to_re "E") (str.to_re "G") (str.to_re "P"))) (str.to_re "OZ") (str.to_re "RT") (str.to_re "SR") (str.to_re "TQ") (str.to_re "US") (str.to_re "WI") (re.++ (str.to_re "Y") (re.union (str.to_re "S") (str.to_re "T"))))) (re.++ (str.to_re "N") (re.union (str.to_re "AM") (str.to_re "CL") (str.to_re "ER") (str.to_re "FK") (str.to_re "GA") (re.++ (str.to_re "I") (re.union (str.to_re "C") (str.to_re "U"))) (str.to_re "LD") (str.to_re "OR") (str.to_re "PL") (str.to_re "RU") (str.to_re "ZL"))) (str.to_re "OMN") (re.++ (str.to_re "P") (re.union (re.++ (str.to_re "A") (re.union (str.to_re "K") (str.to_re "N"))) (str.to_re "CN") (str.to_re "ER") (str.to_re "HL") (str.to_re "LW") (str.to_re "NG") (str.to_re "OL") (re.++ (str.to_re "R") (re.union (str.to_re "I") (str.to_re "K") (str.to_re "T") (str.to_re "Y"))) (str.to_re "SE") (str.to_re "YF"))) (str.to_re "QAT") (re.++ (str.to_re "R") (re.union (str.to_re "EU") (str.to_re "OU") (str.to_re "US") (str.to_re "WA"))) (re.++ (str.to_re "S") (re.union (str.to_re "AU") (str.to_re "DN") (str.to_re "EN") (re.++ (str.to_re "G") (re.union (str.to_re "P") (str.to_re "S"))) (str.to_re "HN") (str.to_re "JM") (re.++ (str.to_re "L") (re.union (str.to_re "B") (str.to_re "E") (str.to_re "V"))) (str.to_re "MR") (str.to_re "OM") (str.to_re "PM") (str.to_re "RB") (str.to_re "TP") (str.to_re "UR") (re.++ (str.to_re "V") (re.union (str.to_re "K") (str.to_re "N"))) (re.++ (str.to_re "W") (re.union (str.to_re "E") (str.to_re "Z"))) (re.++ (str.to_re "Y") (re.union (str.to_re "C") (str.to_re "R"))))) (re.++ (str.to_re "T") (re.union (re.++ (str.to_re "C") (re.union (str.to_re "A") (str.to_re "D"))) (str.to_re "GO") (str.to_re "HA") (str.to_re "JK") (re.++ (str.to_re "K") (re.union (str.to_re "L") (str.to_re "M"))) (str.to_re "LS") (str.to_re "ON") (str.to_re "TO") (re.++ (str.to_re "U") (re.union (str.to_re "N") (str.to_re "R") (str.to_re "V"))) (str.to_re "WN") (str.to_re "ZA"))) (re.++ (str.to_re "U") (re.union (str.to_re "EN") (str.to_re "GA") (str.to_re "KR") (str.to_re "MI") (str.to_re "RY") (str.to_re "SA") (str.to_re "ZB"))) (re.++ (str.to_re "V") (re.union (str.to_re "AT") (str.to_re "CT") (str.to_re "GB") (str.to_re "IR") (str.to_re "NM") (str.to_re "UT"))) (re.++ (str.to_re "W") (re.union (str.to_re "LF") (str.to_re "SM"))) (str.to_re "YEM") (re.++ (str.to_re "Z") (re.union (str.to_re "AF") (str.to_re "MB") (str.to_re "WE")))) (str.to_re "\u{a}")))) -(assert (not (str.in_re X (re.++ (re.* (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re "-") ((_ re.loop 2 2) (re.range "0" "9")))) (str.to_re "\u{a}"))))) -(check-sat) - -(exit) diff --git a/benchmarks/instance11213.smt2 b/benchmarks/instance11213.smt2 deleted file mode 100644 index 0bdec4a17..000000000 --- a/benchmarks/instance11213.smt2 +++ /dev/null @@ -1,22 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (not (str.in_re X (re.++ (str.to_re "//") ((_ re.loop 48 48) (re.union (str.to_re "-") (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (str.to_re "/U\u{a}"))))) -(assert (str.in_re X (re.++ (re.opt (re.++ (re.opt (str.to_re " ")) (re.opt (str.to_re "+")) (str.to_re "34"))) (re.union (re.++ (str.to_re "6") (re.union ((_ re.loop 8 8) (re.range "0" "9")) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 6 6) (re.range "0" "9"))) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 3 3) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 3 3) (re.range "0" "9"))))) (re.++ (str.to_re "9") (re.union ((_ re.loop 8 8) (re.range "0" "9")) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 6 6) (re.range "0" "9"))) (re.++ (re.range "1" "9") (str.to_re " ") ((_ re.loop 7 7) (re.range "0" "9"))) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 3 3) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 3 3) (re.range "0" "9"))) (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re " ") ((_ re.loop 2 2) (re.range "0" "9")))))) (str.to_re "\u{a}")))) -(assert (str.in_re X (re.++ (re.+ (re.union (re.range "a" "z") (re.range "A" "Z") (re.range "0" "9") (str.to_re ".") (str.to_re "-") (str.to_re "_") (str.to_re "'") (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "\u{a}")))) -(assert (str.in_re X (re.union (re.++ (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "general") (str.to_re "unicode") (str.to_re "roman") (str.to_re "slovak") (str.to_re "czech") (str.to_re "icelandic") (re.++ (re.union (str.to_re "latv") (str.to_re "pers")) (str.to_re "ian")) (re.++ (re.union (str.to_re "dan") (str.to_re "pol") (str.to_re "span") (str.to_re "swed") (str.to_re "turk")) (str.to_re "ish")) (str.to_re "spanish2") (re.++ (re.union (str.to_re "esto") (str.to_re "lithua") (str.to_re "roma") (str.to_re "slove")) (str.to_re "nian_ci"))) (str.to_re "u") (re.union (str.to_re "cs2") (str.to_re "tf8"))) (re.++ (re.union (re.++ (str.to_re "mac") (re.union (str.to_re "ce") (str.to_re "roman"))) (re.++ (str.to_re "cp") (re.union (re.++ (str.to_re "8") (re.union (re.++ (str.to_re "5") (re.union (str.to_re "0") (str.to_re "2"))) (str.to_re "66"))) (str.to_re "1256"))) (str.to_re "armscii8") (str.to_re "geostd8") (str.to_re "ascii") (str.to_re "keybcs2") (str.to_re "greek") (str.to_re "hebrew") (re.++ (str.to_re "koi8") (re.union (str.to_re "r") (str.to_re "u")))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "general_ci"))) (re.++ (re.union (str.to_re "dec8") (str.to_re "swe7")) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "swedish_ci"))) (re.++ (re.union (str.to_re "hp8") (str.to_re "latin5")) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "english_ci"))) (re.++ (re.union (str.to_re "big5") (re.++ (str.to_re "gb") (re.union (str.to_re "2312") (str.to_re "k")))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "chinese_ci"))) (re.++ (re.union (str.to_re "cp932") (str.to_re "eucjpms") (re.++ (re.union (str.to_re "s") (str.to_re "u")) (str.to_re "jis"))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "japanese_ci"))) (re.++ (str.to_re "euckr_") (re.union (str.to_re "bin") (str.to_re "korean_ci"))) (re.++ (str.to_re "tis620_") (re.union (str.to_re "bin") (str.to_re "thai_ci"))) (re.++ (str.to_re "latin1_") (re.union (str.to_re "bin") (re.++ (re.union (re.++ (re.union (str.to_re "dan") (str.to_re "span") (str.to_re "swed")) (str.to_re "ish")) (re.++ (str.to_re "german") (re.union (str.to_re "1") (str.to_re "2")))) (str.to_re "_ci")) (re.++ (str.to_re "general_c") (re.union (str.to_re "i") (str.to_re "s"))))) (re.++ (str.to_re "cp1250_") (re.union (str.to_re "bin") (str.to_re "czech_cs") (str.to_re "general_ci"))) (re.++ (str.to_re "latin2_") (re.union (str.to_re "bin") (str.to_re "czech_cs") (re.++ (re.union (str.to_re "general") (str.to_re "hungarian") (str.to_re "croatian")) (str.to_re "_ci")))) (re.++ (str.to_re "cp1257_") (re.union (str.to_re "bin") (re.++ (re.union (str.to_re "general") (str.to_re "lithuanian")) (str.to_re "_ci")))) (re.++ (str.to_re "latin7_") (re.union (str.to_re "bin") (re.++ (str.to_re "general_c") (re.union (str.to_re "i") (str.to_re "s"))) (str.to_re "estonian_cs"))) (re.++ (str.to_re "\u{a}cp1251_") (re.union (str.to_re "bin") (re.++ (re.union (str.to_re "general") (str.to_re "bulgarian") (str.to_re "ukrainian")) (str.to_re "_ci")) (str.to_re "general_cs")))))) -(check-sat) - -(exit) diff --git a/benchmarks/instance11705.smt2 b/benchmarks/instance11705.smt2 deleted file mode 100644 index 2c344ae0e..000000000 --- a/benchmarks/instance11705.smt2 +++ /dev/null @@ -1,23 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.++ (str.to_re "pjpoptwql/rlnj") (re.+ (re.range "0" "9")) (str.to_re "waiting") (re.+ (re.range "0" "9")) (str.to_re "ocllceclbhs/gth") (re.+ (re.union (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (str.to_re "gdvsotuqwsg/dxt.hdUser-Agent:\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "Host:") (re.range "0" "9") (str.to_re "Keylogger") (re.* re.allchar) (str.to_re "Onetrustyfiles.com\u{a}")))) -(assert (str.in_re X (re.++ ((_ re.loop 2 2) (re.range "0" "9")) (str.to_re "-") (re.union (str.to_re "Jan") (str.to_re "Feb") (str.to_re "Mar") (str.to_re "Apr") (str.to_re "May") (str.to_re "Jun") (str.to_re "Jul") (str.to_re "Aug") (str.to_re "Sep") (str.to_re "Oct") (str.to_re "Nov") (str.to_re "Dec") (str.to_re "jan") (str.to_re "feb") (str.to_re "mar") (str.to_re "apr") (str.to_re "may") (str.to_re "jun") (str.to_re "jul") (str.to_re "aug") (str.to_re "sep") (str.to_re "oct") (str.to_re "nov") (str.to_re "dec") (str.to_re "JAN") (str.to_re "FEB") (str.to_re "MAR") (str.to_re "APR") (str.to_re "MAY") (str.to_re "JUN") (str.to_re "JUL") (str.to_re "AUG") (str.to_re "SEP") (str.to_re "OCT") (str.to_re "NOV") (str.to_re "DEC")) (str.to_re "-") ((_ re.loop 4 4) (re.range "0" "9")) (str.to_re "\u{a}")))) -(assert (not (str.in_re X (re.++ (re.range "\u{80}" "\u{ff}") (str.to_re "\u{a}"))))) -(assert (str.in_re X (re.++ (str.to_re "/.xlw") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance11745.smt2 b/benchmarks/instance11745.smt2 deleted file mode 100644 index 246a3f777..000000000 --- a/benchmarks/instance11745.smt2 +++ /dev/null @@ -1,21 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.union (str.to_re "EMA") (str.to_re "QCY") (str.to_re "SQZ") (str.to_re "ORM") (str.to_re "NQT") (str.to_re "WTN") (str.to_re "CBG") (str.to_re "QFO") (str.to_re "BEQ") (str.to_re "LKZ") (str.to_re "LTN") (str.to_re "KNF") (str.to_re "MHZ") (str.to_re "NWI") (str.to_re "CLF") (str.to_re "QUY") (str.to_re "SEN") (str.to_re "STN") (str.to_re "BEQ") (str.to_re "BQH") (str.to_re "LHR") (str.to_re "NHT") (str.to_re "LCY") (str.to_re "MME") (str.to_re "NCL") (str.to_re "BWF") (str.to_re "BLK") (str.to_re "CAX") (str.to_re "LPL") (str.to_re "MAN") (str.to_re "BBP") (str.to_re "BEX") (str.to_re "BZZ") (str.to_re "LGW") (str.to_re "SOU") (str.to_re "FAB") (str.to_re "OXF") (str.to_re "ESH") (str.to_re "QLA") (str.to_re "LYX") (str.to_re "KRH") (str.to_re "ODH") (str.to_re "RCS") (str.to_re "QUC") (str.to_re "BBS") (str.to_re "GLO") (str.to_re "EXT") (str.to_re "FFD") (str.to_re "BOH") (str.to_re "LYE") (str.to_re "NQY") (str.to_re "LEQ") (str.to_re "ISC") (str.to_re "UPV") (str.to_re "BRS") (str.to_re "YEO") (str.to_re "CVT") (str.to_re "BHX") (str.to_re "DSA") (str.to_re "HUY") (str.to_re "LBA") (str.to_re "HRT") (str.to_re "BFS") (str.to_re "BHD") (str.to_re "LDY") (str.to_re "ENK") (str.to_re "ABZ") (str.to_re "OBN") (str.to_re "BEB") (str.to_re "BRR") (str.to_re "CAL") (str.to_re "COL") (str.to_re "CSA") (str.to_re "NRL") (str.to_re "INV") (str.to_re "SCS") (str.to_re "DND") (str.to_re "LSI") (str.to_re "EOI") (str.to_re "EDI") (str.to_re "FIE") (str.to_re "FOA") (str.to_re "ILY") (str.to_re "FSS") (str.to_re "NDY") (str.to_re "ADX") (str.to_re "LMO") (str.to_re "OUK") (str.to_re "PSV") (str.to_re "PPW") (str.to_re "PIK") (str.to_re "GLA") (str.to_re "KOI") (str.to_re "PSL") (str.to_re "SYY") (str.to_re "SKL") (str.to_re "SOY") (str.to_re "LWK") (str.to_re "TRE") (str.to_re "WRY") (str.to_re "WHS") (str.to_re "WIC") (str.to_re "HAW") (str.to_re "CEG") (str.to_re "VLY") (str.to_re "SWS") (str.to_re "CWL") (str.to_re "DGX") (str.to_re "ACI") (str.to_re "GCI") (str.to_re "IOM") (str.to_re "JER\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "/User-Agent: ") ((_ re.loop 9 9) (re.range "A" "Z")) (str.to_re "\u{d}\u{a}/Hm\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "Admin") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "daosearch.comMyPostwww.raxsearch.comref=%user_id\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance11856.smt2 b/benchmarks/instance11856.smt2 deleted file mode 100644 index 0c9a4d020..000000000 --- a/benchmarks/instance11856.smt2 +++ /dev/null @@ -1,21 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status sat) - -(declare-const X String) -(assert (not (str.in_re X (re.++ (str.to_re "/filename=") (re.* (re.comp (str.to_re "\u{a}"))) (str.to_re ".addin/i\u{a}"))))) -(assert (not (str.in_re X (re.++ (str.to_re "//") ((_ re.loop 5 5) (re.range "0" "9")) (str.to_re ".jar/U\u{a}"))))) -(assert (str.in_re X (re.++ (re.union (str.to_re "big5") (re.++ (str.to_re "euc") (re.union (str.to_re "kr") (str.to_re "jpms"))) (str.to_re "binary") (str.to_re "greek") (str.to_re "tis620") (str.to_re "hebrew") (str.to_re "ascii") (str.to_re "swe7") (re.++ (str.to_re "koi8") (re.union (str.to_re "r") (str.to_re "u"))) (re.++ (re.union (str.to_re "u") (str.to_re "keyb")) (str.to_re "cs2")) (re.++ (re.union (str.to_re "dec") (str.to_re "hp") (str.to_re "utf") (str.to_re "geostd") (str.to_re "armscii")) (str.to_re "8")) (re.++ (str.to_re "gb") (re.union (str.to_re "k") (str.to_re "2312"))) (re.++ (str.to_re "cp") (re.union (re.++ (str.to_re "8") (re.union (re.++ (str.to_re "5") (re.union (str.to_re "0") (str.to_re "2"))) (str.to_re "66"))) (str.to_re "932") (re.++ (str.to_re "125") (re.union (str.to_re "0") (str.to_re "1") (str.to_re "6") (str.to_re "7"))))) (re.++ (str.to_re "latin") (re.union (str.to_re "1") (str.to_re "2") (str.to_re "5") (str.to_re "7"))) (re.++ (re.union (str.to_re "u") (str.to_re "s")) (str.to_re "jis")) (re.++ (str.to_re "mac") (re.union (str.to_re "ce") (str.to_re "roman")))) (str.to_re "\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance12204.smt2 b/benchmarks/instance12204.smt2 deleted file mode 100644 index 4a192538c..000000000 --- a/benchmarks/instance12204.smt2 +++ /dev/null @@ -1,22 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (not (str.in_re X (re.++ (re.* (re.range "0" "9")) (re.+ (re.range "1" "9")) (re.* (re.range "0" "9")) (str.to_re "\u{a}"))))) -(assert (not (str.in_re X (re.++ (str.to_re "/.svg") (re.opt (str.to_re "z")) (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}"))))) -(assert (str.in_re X (re.++ (str.to_re "<") (re.opt (str.to_re "/")) (re.union (str.to_re "a") (str.to_re "abbr") (str.to_re "acronym") (str.to_re "address") (str.to_re "applet") (str.to_re "area") (str.to_re "b") (str.to_re "base") (str.to_re "basefont") (str.to_re "bdo") (str.to_re "big") (str.to_re "blockquote") (str.to_re "body") (str.to_re "br") (str.to_re "button") (str.to_re "caption") (str.to_re "center") (str.to_re "cite") (str.to_re "code") (str.to_re "col") (str.to_re "colgroup") (str.to_re "dd") (str.to_re "del") (str.to_re "dir") (str.to_re "div") (str.to_re "dfn") (str.to_re "dl") (str.to_re "dt") (str.to_re "em") (str.to_re "fieldset") (str.to_re "font") (str.to_re "form") (str.to_re "frame") (str.to_re "frameset") (re.++ (str.to_re "h") (re.range "1" "6")) (str.to_re "head") (str.to_re "hr") (str.to_re "html") (str.to_re "i") (str.to_re "iframe") (str.to_re "img") (str.to_re "input") (str.to_re "ins") (str.to_re "isindex") (str.to_re "kbd") (str.to_re "label") (str.to_re "legend") (str.to_re "li") (str.to_re "link") (str.to_re "map") (str.to_re "menu") (str.to_re "meta") (str.to_re "noframes") (str.to_re "noscript") (str.to_re "object") (str.to_re "ol") (str.to_re "optgroup") (str.to_re "option") (str.to_re "p") (str.to_re "param") (str.to_re "pre") (str.to_re "q") (str.to_re "s") (str.to_re "samp") (str.to_re "script") (str.to_re "select") (str.to_re "small") (str.to_re "span") (str.to_re "strike") (str.to_re "strong") (str.to_re "style") (str.to_re "sub") (str.to_re "sup") (str.to_re "table") (str.to_re "tbody") (str.to_re "td") (str.to_re "textarea") (str.to_re "tfoot") (str.to_re "th") (str.to_re "thead") (str.to_re "title") (str.to_re "tr") (str.to_re "tt") (str.to_re "u") (str.to_re "ul") (str.to_re "var") (str.to_re "xmp")) (re.* (re.union (re.* (re.union (re.++ (str.to_re "\u{22}") (re.* (re.comp (str.to_re "\u{22}"))) (str.to_re "\u{22}")) (re.++ (str.to_re "'") (re.* (re.comp (str.to_re "'"))) (str.to_re "'")))) (str.to_re "\u{22}") (str.to_re "'") (str.to_re ">"))) (str.to_re ">\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "/SOAPAction:") (re.* (re.union (str.to_re "\u{d}") (str.to_re "\u{a}"))) (str.to_re "Get") (re.union (str.to_re "ServerTime") (str.to_re "FileList") (str.to_re "File")) (str.to_re "\u{22}/i\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance12488.smt2 b/benchmarks/instance12488.smt2 deleted file mode 100644 index b62061cb5..000000000 --- a/benchmarks/instance12488.smt2 +++ /dev/null @@ -1,22 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (not (str.in_re X (re.++ (re.range "1" "9") (re.range "0" "9") (re.range "0" "9") (re.range "0" "9") (re.range "0" "9") (re.range "0" "9") (str.to_re "\u{a}"))))) -(assert (str.in_re X (re.++ (str.to_re "/.s3m") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}")))) -(assert (str.in_re X (re.++ ((_ re.loop 7 8) (re.range "0" "9")) (str.to_re "\u{a}0") (re.union (str.to_re "2") (str.to_re "|") (str.to_re "3") (str.to_re "4") (str.to_re "5") (str.to_re "6") (str.to_re "8") (str.to_re "9") (str.to_re "7"))))) -(assert (str.in_re X (re.++ (re.+ (re.union (str.to_re "_") (re.range "a" "z") (re.range "0" "9") (str.to_re "-"))) (re.* (re.++ (str.to_re ".") (re.+ (re.union (str.to_re "_") (re.range "a" "z") (re.range "0" "9") (str.to_re "-"))))) (str.to_re "@") (re.+ (re.union (re.range "a" "z") (re.range "0" "9"))) (re.opt (re.++ (re.opt (str.to_re "-")) (re.+ (re.union (re.range "a" "z") (re.range "0" "9"))))) (re.* (re.++ (str.to_re ".") (re.+ (re.union (re.range "a" "z") (re.range "0" "9"))) (re.opt (re.++ (re.opt (str.to_re "-")) (re.+ (re.union (re.range "a" "z") (re.range "0" "9"))))))) (str.to_re ".") (re.union ((_ re.loop 2 2) (re.range "a" "z")) (re.++ (str.to_re "xn") ((_ re.loop 2 2) (str.to_re "-")) ((_ re.loop 4 18) (re.union (re.range "a" "z") (re.range "0" "9")))) (str.to_re "arpa") (str.to_re "aero") (str.to_re "asia") (str.to_re "biz") (str.to_re "cat") (str.to_re "com") (str.to_re "coop") (str.to_re "edu") (str.to_re "gov") (str.to_re "info") (str.to_re "int") (str.to_re "jobs") (str.to_re "mil") (str.to_re "mobi") (str.to_re "museum") (str.to_re "name") (str.to_re "net") (str.to_re "org") (str.to_re "pro") (str.to_re "tel") (str.to_re "travel") (str.to_re "xxx")) (str.to_re "\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance12671.smt2 b/benchmarks/instance12671.smt2 deleted file mode 100644 index 164fbb7ed..000000000 --- a/benchmarks/instance12671.smt2 +++ /dev/null @@ -1,21 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.union (re.++ (re.union (re.++ (re.union (re.range "1" "9") (re.++ (str.to_re "0") (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (str.to_re "30")) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "P") (str.to_re ",") (str.to_re "p")) (re.union (str.to_re "R") (str.to_re ",") (str.to_re "r"))) (re.++ (re.union (str.to_re "J") (str.to_re ",") (str.to_re "j")) (re.union (str.to_re "U") (str.to_re ",") (str.to_re "u")) (re.union (str.to_re "N") (str.to_re ",") (str.to_re "n"))) (re.++ (re.union (str.to_re "S") (str.to_re ",") (str.to_re "s")) (re.union (str.to_re "E") (str.to_re ",") (str.to_re "e")) (re.union (str.to_re "P") (str.to_re ",") (str.to_re "p"))) (re.++ (re.union (str.to_re "N") (str.to_re ",") (str.to_re "n")) (re.union (str.to_re "O") (str.to_re ",") (str.to_re "o")) (re.union (str.to_re "V") (str.to_re ",") (str.to_re "v"))))) (re.++ (re.union (re.range "1" "9") (re.++ (str.to_re "0") (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (re.++ (str.to_re "3") (re.range "0" "1"))) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "J") (str.to_re ",") (str.to_re "j")) (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "N") (str.to_re ",") (str.to_re "n"))) (re.++ (re.union (str.to_re "M") (str.to_re ",") (str.to_re "m")) (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "R") (str.to_re ",") (str.to_re "r"))) (re.++ (re.union (str.to_re "M") (str.to_re ",") (str.to_re "m")) (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "Y") (str.to_re ",") (str.to_re "y"))) (re.++ (re.union (str.to_re "J") (str.to_re ",") (str.to_re "j")) (re.union (str.to_re "U") (str.to_re ",") (str.to_re "u")) (re.union (str.to_re "L") (str.to_re ",") (str.to_re "l"))) (re.++ (re.union (str.to_re "A") (str.to_re ",") (str.to_re "a")) (re.union (str.to_re "U") (str.to_re ",") (str.to_re "u")) (re.union (str.to_re "G") (str.to_re ",") (str.to_re "g"))) (re.++ (re.union (str.to_re "O") (str.to_re ",") (str.to_re "o")) (re.union (str.to_re "C") (str.to_re ",") (str.to_re "c")) (re.union (str.to_re "T") (str.to_re ",") (str.to_re "t"))) (re.++ (re.union (str.to_re "D") (str.to_re ",") (str.to_re "d")) (re.union (str.to_re "E") (str.to_re ",") (str.to_re "e")) (re.union (str.to_re "C") (str.to_re ",") (str.to_re "c")))))) (str.to_re "-") ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (re.range "1" "9") (re.++ (str.to_re "0") (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "8"))) (str.to_re "--") ((_ re.loop 2 2) (re.range "0" "9")) (re.union (re.++ (re.union (str.to_re "0") (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "1") (str.to_re "2") (str.to_re "3") (str.to_re "5") (str.to_re "6") (str.to_re "7") (str.to_re "9"))) (re.++ (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "0") (str.to_re "1") (str.to_re "3") (str.to_re "4") (str.to_re "5") (str.to_re "7") (str.to_re "8") (str.to_re "9")))) (re.union (str.to_re "F") (str.to_re ",") (str.to_re "f")) (re.union (str.to_re "E") (str.to_re ",") (str.to_re "e")) (re.union (str.to_re "B") (str.to_re ",") (str.to_re "b"))) (re.++ (str.to_re "\u{a}") (re.union (re.range "1" "9") (re.++ (str.to_re "0") (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "9"))) (str.to_re "--") ((_ re.loop 2 2) (re.range "0" "9")) (re.union (re.++ (re.union (str.to_re "0") (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "0") (str.to_re "4") (str.to_re "8"))) (re.++ (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "2") (str.to_re "6")))) (re.union (str.to_re "F") (str.to_re ",") (str.to_re "f")) (re.union (str.to_re "E") (str.to_re ",") (str.to_re "e")) (re.union (str.to_re "B") (str.to_re ",") (str.to_re "b")))))) -(assert (str.in_re X (re.union (re.++ (re.union (re.range "0" "9") (re.++ (re.range "0" "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "3"))) (str.to_re ":") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (re.union (str.to_re "AM") (str.to_re "PM") (str.to_re "am") (str.to_re "pm") (str.to_re "aM") (str.to_re "Am") (str.to_re "pM") (re.++ (str.to_re "P") ((_ re.loop 2 2) (str.to_re "m")))) (re.range "0" "5") (re.range "0" "9")) (re.++ (str.to_re "\u{a}") (re.union (re.range "0" "9") (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "3"))) (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (re.union (str.to_re "AM") (str.to_re "PM") (str.to_re "am") (str.to_re "pm") (str.to_re "aM") (str.to_re "Am") (str.to_re "pM") (re.++ (str.to_re "P") ((_ re.loop 2 2) (str.to_re "m")))))))) -(assert (str.in_re X (re.++ (str.to_re "Points") (re.+ (re.range "0" "9")) (str.to_re "Host:") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "toBasicwww.webcruiser.cc\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance12768.smt2 b/benchmarks/instance12768.smt2 deleted file mode 100644 index 361639b63..000000000 --- a/benchmarks/instance12768.smt2 +++ /dev/null @@ -1,22 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.++ (str.to_re "/User-Agent:") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "Ryeol") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "HTTP") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "Client") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "Class/smiH\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "Spyware") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "ToolBar") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "User-Agent:MM_RECO.EXEToClientonAlert\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "ver") (re.+ (re.range "0" "9")) (str.to_re "sports") (re.+ (re.union (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (str.to_re "whenu.com\u{13}wp-includes/feed.php?\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "qr/") (re.union (str.to_re "Alabama") (str.to_re "Alaska") (str.to_re "Arizona") (str.to_re "Arkansas") (str.to_re "California") (str.to_re "Colorado") (str.to_re "Connecticut") (str.to_re "Delaware") (str.to_re "Florida") (str.to_re "Georgia") (str.to_re "Hawaii") (str.to_re "Idaho") (str.to_re "Illinois") (str.to_re "Indiana") (str.to_re "Iowa") (str.to_re "Kansas") (str.to_re "Kentucky") (str.to_re "Louisiana") (str.to_re "Maine") (str.to_re "Maryland") (str.to_re "Massachusetts") (str.to_re "Michigan") (str.to_re "Minnesota") (str.to_re "Mississippi") (str.to_re "Missouri") (str.to_re "Montana") (str.to_re "Nebraska") (str.to_re "Nevada") (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Hampshire")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Jersey")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Mexico")) (re.++ (str.to_re "New") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "York")) (re.++ (str.to_re "North") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Carolina")) (re.++ (str.to_re "North") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Dakota")) (str.to_re "Ohio") (str.to_re "Oklahoma") (str.to_re "Oregon") (str.to_re "Pennsylvania") (re.++ (str.to_re "Rhode") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Island")) (re.++ (str.to_re "South") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Carolina")) (re.++ (str.to_re "South") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Dakota")) (str.to_re "Tennessee") (str.to_re "Texas") (str.to_re "Utah") (str.to_re "Vermont") (str.to_re "Virginia") (str.to_re "Washington") (re.++ (str.to_re "West") (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "Virginia")) (str.to_re "Wisconsin") (str.to_re "Wyoming")) (str.to_re "/\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance12833.smt2 b/benchmarks/instance12833.smt2 deleted file mode 100644 index 1cbbd4099..000000000 --- a/benchmarks/instance12833.smt2 +++ /dev/null @@ -1,22 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.++ (str.to_re "|Connected") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "adblock.linkz.com\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "//u\u{a}") (re.range "\u{0}" "\u{ff}") (re.* (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))))) -(assert (str.in_re X (re.++ (re.* (re.++ (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.union (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7"))) (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8"))) (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8"))) (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.* (re.++ (re.union (str.to_re "2") (str.to_re "5") (str.to_re "8")) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7")))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))) (re.union (str.to_re "1") (str.to_re "4") (str.to_re "7"))))) (re.* (re.union (str.to_re "0") (str.to_re "3") (str.to_re "6") (str.to_re "9"))))) (str.to_re "\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "/.pui") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance13062.smt2 b/benchmarks/instance13062.smt2 deleted file mode 100644 index 06bb16d4e..000000000 --- a/benchmarks/instance13062.smt2 +++ /dev/null @@ -1,23 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (not (str.in_re X (re.++ (str.to_re "/filename") (re.* (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "=") (re.* (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (re.* (re.union (str.to_re "\u{d}") (str.to_re "\u{a}"))) (str.to_re ".swf") (re.union (str.to_re "\u{22}") (str.to_re "'") (str.to_re ";") (str.to_re "\u{d}") (str.to_re "\u{a}") (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "/i\u{a}"))))) -(assert (not (str.in_re X (re.++ (re.union (re.++ (str.to_re "A") (re.union (str.to_re "L") (str.to_re "K") (str.to_re "S") (str.to_re "Z") (str.to_re "R") (str.to_re "A") (str.to_re "E") (str.to_re "P"))) (re.++ (str.to_re "C") (re.union (str.to_re "A") (str.to_re "O") (str.to_re "T"))) (re.++ (str.to_re "D") (re.union (str.to_re "E") (str.to_re "C"))) (re.++ (str.to_re "F") (re.union (str.to_re "L") (str.to_re "M"))) (re.++ (str.to_re "G") (re.union (str.to_re "A") (str.to_re "U"))) (str.to_re "HI") (re.++ (str.to_re "I") (re.union (str.to_re "A") (str.to_re "D") (str.to_re "L") (str.to_re "N"))) (re.++ (str.to_re "K") (re.union (str.to_re "S") (str.to_re "Y"))) (str.to_re "LA") (re.++ (str.to_re "M") (re.union (str.to_re "A") (str.to_re "D") (str.to_re "E") (str.to_re "H") (str.to_re "I") (str.to_re "N") (str.to_re "O") (str.to_re "P") (str.to_re "S") (str.to_re "T"))) (re.++ (str.to_re "N") (re.union (str.to_re "C") (str.to_re "D") (str.to_re "E") (str.to_re "H") (str.to_re "J") (str.to_re "M") (str.to_re "V") (str.to_re "Y"))) (re.++ (str.to_re "O") (re.union (str.to_re "H") (str.to_re "K") (str.to_re "R"))) (re.++ (str.to_re "P") (re.union (str.to_re "A") (str.to_re "R") (str.to_re "W"))) (str.to_re "RI") (re.++ (str.to_re "S") (re.union (str.to_re "C") (str.to_re "D"))) (re.++ (str.to_re "T") (re.union (str.to_re "N") (str.to_re "X"))) (str.to_re "UT") (re.++ (str.to_re "V") (re.union (str.to_re "A") (str.to_re "I") (str.to_re "T"))) (re.++ (str.to_re "W") (re.union (str.to_re "A") (str.to_re "I") (str.to_re "V") (str.to_re "Y")))) (str.to_re "\u{a}"))))) -(assert (str.in_re X (re.++ (str.to_re "/.wmv") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}")))) -(assert (not (str.in_re X (re.++ (str.to_re "User-Agent:") (re.* re.allchar) (str.to_re "Host:") (re.* re.allchar) (str.to_re "w3whowww.sogou.com\u{a}"))))) -(assert (str.in_re X (re.++ (str.to_re "Guarded") (re.+ (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "ready") (re.+ (re.union (re.range "0" "9") (re.range "A" "Z") (re.range "a" "z") (str.to_re "_"))) (str.to_re "PARSERHost:A-311ServerUser-Agent:scn.mystoretoolbar.com\u{13}\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance13106.smt2 b/benchmarks/instance13106.smt2 deleted file mode 100644 index f0a3462e3..000000000 --- a/benchmarks/instance13106.smt2 +++ /dev/null @@ -1,23 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.++ (str.to_re "security") (re.+ (re.range "0" "9")) (str.to_re "Redirector\u{22}ServerHost:X-Mailer:\u{13}\u{a}")))) -(assert (str.in_re X (re.++ (re.opt (re.++ (str.to_re "1") (re.union (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "-") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re ".") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "/") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.opt (re.++ (re.opt (str.to_re "(")) ((_ re.loop 1 1) (re.range "2" "9")) ((_ re.loop 2 2) (re.range "0" "9")) (re.opt (str.to_re ")")) (re.opt (re.union (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "-") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re ".") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "/") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))))) ((_ re.loop 3 3) (re.range "0" "9")) (re.opt (re.union (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "-") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re ".") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (re.++ (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) (str.to_re "/") (re.opt (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")))) (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) ((_ re.loop 4 4) (re.range "0" "9")) (str.to_re "\u{a}")))) -(assert (str.in_re X (re.++ (str.to_re "/filename=") (re.* (re.comp (str.to_re "\u{a}"))) (str.to_re ".svg/i\u{a}")))) -(assert (not (str.in_re X (str.to_re "MyHost:toHost:WinSessionwww.urlblaze.netResultHost:\u{a}")))) -(assert (not (str.in_re X (re.++ (str.to_re "/filename=") (re.opt (re.union (str.to_re "\u{22}") (str.to_re "'"))) (re.* (re.comp (str.to_re "\u{a}"))) (str.to_re ".pif") (re.union (str.to_re "\u{22}") (str.to_re "'") (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (str.to_re "/si\u{a}"))))) -(check-sat) - -(exit) diff --git a/benchmarks/instance13975.smt2 b/benchmarks/instance13975.smt2 deleted file mode 100644 index af0baede7..000000000 --- a/benchmarks/instance13975.smt2 +++ /dev/null @@ -1,22 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.++ (re.opt (str.to_re "D")) (re.opt (re.union (str.to_re "-") (str.to_re "D"))) ((_ re.loop 5 5) (re.range "0" "9")) (str.to_re "\u{a}")))) -(assert (str.in_re X (re.union (re.++ (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "general") (str.to_re "unicode") (str.to_re "roman") (str.to_re "slovak") (str.to_re "czech") (str.to_re "icelandic") (re.++ (re.union (str.to_re "latv") (str.to_re "pers")) (str.to_re "ian")) (re.++ (re.union (str.to_re "dan") (str.to_re "pol") (str.to_re "span") (str.to_re "swed") (str.to_re "turk")) (str.to_re "ish")) (str.to_re "spanish2") (re.++ (re.union (str.to_re "esto") (str.to_re "lithua") (str.to_re "roma") (str.to_re "slove")) (str.to_re "nian_ci"))) (str.to_re "u") (re.union (str.to_re "cs2") (str.to_re "tf8"))) (re.++ (re.union (re.++ (str.to_re "mac") (re.union (str.to_re "ce") (str.to_re "roman"))) (re.++ (str.to_re "cp") (re.union (re.++ (str.to_re "8") (re.union (re.++ (str.to_re "5") (re.union (str.to_re "0") (str.to_re "2"))) (str.to_re "66"))) (str.to_re "1256"))) (str.to_re "armscii8") (str.to_re "geostd8") (str.to_re "ascii") (str.to_re "keybcs2") (str.to_re "greek") (str.to_re "hebrew") (re.++ (str.to_re "koi8") (re.union (str.to_re "r") (str.to_re "u")))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "general_ci"))) (re.++ (re.union (str.to_re "dec8") (str.to_re "swe7")) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "swedish_ci"))) (re.++ (re.union (str.to_re "hp8") (str.to_re "latin5")) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "english_ci"))) (re.++ (re.union (str.to_re "big5") (re.++ (str.to_re "gb") (re.union (str.to_re "2312") (str.to_re "k")))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "chinese_ci"))) (re.++ (re.union (str.to_re "cp932") (str.to_re "eucjpms") (re.++ (re.union (str.to_re "s") (str.to_re "u")) (str.to_re "jis"))) (str.to_re "_") (re.union (str.to_re "bin") (str.to_re "japanese_ci"))) (re.++ (str.to_re "euckr_") (re.union (str.to_re "bin") (str.to_re "korean_ci"))) (re.++ (str.to_re "tis620_") (re.union (str.to_re "bin") (str.to_re "thai_ci"))) (re.++ (str.to_re "latin1_") (re.union (str.to_re "bin") (re.++ (re.union (re.++ (re.union (str.to_re "dan") (str.to_re "span") (str.to_re "swed")) (str.to_re "ish")) (re.++ (str.to_re "german") (re.union (str.to_re "1") (str.to_re "2")))) (str.to_re "_ci")) (re.++ (str.to_re "general_c") (re.union (str.to_re "i") (str.to_re "s"))))) (re.++ (str.to_re "cp1250_") (re.union (str.to_re "bin") (str.to_re "czech_cs") (str.to_re "general_ci"))) (re.++ (str.to_re "latin2_") (re.union (str.to_re "bin") (str.to_re "czech_cs") (re.++ (re.union (str.to_re "general") (str.to_re "hungarian") (str.to_re "croatian")) (str.to_re "_ci")))) (re.++ (str.to_re "cp1257_") (re.union (str.to_re "bin") (re.++ (re.union (str.to_re "general") (str.to_re "lithuanian")) (str.to_re "_ci")))) (re.++ (str.to_re "latin7_") (re.union (str.to_re "bin") (re.++ (str.to_re "general_c") (re.union (str.to_re "i") (str.to_re "s"))) (str.to_re "estonian_cs"))) (re.++ (str.to_re "\u{a}cp1251_") (re.union (str.to_re "bin") (re.++ (re.union (str.to_re "general") (str.to_re "bulgarian") (str.to_re "ukrainian")) (str.to_re "_ci")) (str.to_re "general_cs")))))) -(assert (not (str.in_re X (re.++ (str.to_re "/.cgm") (re.union (str.to_re "?") (str.to_re "\u{5c}") (str.to_re "/")) (str.to_re "/smiU\u{a}"))))) -(assert (str.in_re X (re.++ (re.union (str.to_re "\u{5c}") (str.to_re "s") (str.to_re "+") (str.to_re ",")) (str.to_re "\u{a}")))) -(check-sat) - -(exit) diff --git a/benchmarks/instance14260.smt2 b/benchmarks/instance14260.smt2 deleted file mode 100644 index bf36054fc..000000000 --- a/benchmarks/instance14260.smt2 +++ /dev/null @@ -1,23 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (not (str.in_re X (re.++ (str.to_re "")) (re.* re.allchar) (str.to_re "\u{a}"))))) -(assert (str.in_re X (re.++ (re.union (re.++ (re.range "0" "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "3"))) (str.to_re ":\u{a}") (re.range "0" "5") (re.range "0" "9")))) -(assert (str.in_re X (re.++ (str.to_re "Password=\u{22}") (re.union (re.++ (str.to_re "{") (re.+ re.allchar) (str.to_re "}") (re.+ (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z"))) (re.* (str.to_re "="))) (re.+ (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z")))) (str.to_re "\u{22}\u{a}")))) -(assert (str.in_re X (re.++ (re.union (re.++ (re.union (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (re.++ (str.to_re "3") (re.range "0" "1"))) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "n") (str.to_re "N"))) (re.++ (re.union (str.to_re "m") (str.to_re "M")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "r") (str.to_re "R"))) (re.++ (re.union (str.to_re "m") (str.to_re "M")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "y") (str.to_re "Y"))) (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "l") (str.to_re "L"))) (re.++ (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "g") (str.to_re "G"))) (re.++ (re.union (str.to_re "o") (str.to_re "O")) (re.union (str.to_re "c") (str.to_re "C")) (re.union (str.to_re "t") (str.to_re "T"))) (re.++ (re.union (str.to_re "d") (str.to_re "D")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "c") (str.to_re "C"))))) (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (str.to_re "30")) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "p") (str.to_re "P")) (re.union (str.to_re "r") (str.to_re "R"))) (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "n") (str.to_re "N"))) (re.++ (re.union (str.to_re "s") (str.to_re "S")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "p") (str.to_re "P"))) (re.++ (re.union (str.to_re "n") (str.to_re "N")) (re.union (str.to_re "o") (str.to_re "O")) (re.union (str.to_re "v") (str.to_re "V"))))) (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "8"))) (str.to_re "-") (re.union (str.to_re "f") (str.to_re "F")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "b") (str.to_re "B")))) (str.to_re "-20") (re.union (re.++ (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "0") (str.to_re "1") (str.to_re "3") (str.to_re "4") (str.to_re "5") (str.to_re "7") (str.to_re "8") (str.to_re "9"))) (re.++ (re.union (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "1") (str.to_re "2") (str.to_re "3") (str.to_re "5") (str.to_re "6") (str.to_re "7") (str.to_re "9"))))) (re.++ (re.union (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (re.++ (str.to_re "3") (re.range "0" "1"))) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "n") (str.to_re "N"))) (re.++ (re.union (str.to_re "m") (str.to_re "M")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "r") (str.to_re "R"))) (re.++ (re.union (str.to_re "m") (str.to_re "M")) (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "y") (str.to_re "Y"))) (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "l") (str.to_re "L"))) (re.++ (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "g") (str.to_re "G"))) (re.++ (re.union (str.to_re "o") (str.to_re "O")) (re.union (str.to_re "c") (str.to_re "C")) (re.union (str.to_re "t") (str.to_re "T"))) (re.++ (re.union (str.to_re "d") (str.to_re "D")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "c") (str.to_re "C"))))) (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (re.range "1" "2") (re.range "0" "9")) (str.to_re "30")) (str.to_re "-") (re.union (re.++ (re.union (str.to_re "a") (str.to_re "A")) (re.union (str.to_re "p") (str.to_re "P")) (re.union (str.to_re "r") (str.to_re "R"))) (re.++ (re.union (str.to_re "j") (str.to_re "J")) (re.union (str.to_re "u") (str.to_re "U")) (re.union (str.to_re "n") (str.to_re "N"))) (re.++ (re.union (str.to_re "s") (str.to_re "S")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "p") (str.to_re "P"))) (re.++ (re.union (str.to_re "n") (str.to_re "N")) (re.union (str.to_re "o") (str.to_re "O")) (re.union (str.to_re "v") (str.to_re "V"))))) (re.++ (re.union (re.++ (re.opt (str.to_re "0")) (re.range "1" "9")) (re.++ (str.to_re "1") (re.range "0" "9")) (re.++ (str.to_re "2") (re.range "0" "9"))) (str.to_re "-") (re.union (str.to_re "f") (str.to_re "F")) (re.union (str.to_re "e") (str.to_re "E")) (re.union (str.to_re "b") (str.to_re "B")))) (str.to_re "-20") (re.union (re.++ (re.union (str.to_re "1") (str.to_re "3") (str.to_re "5") (str.to_re "7") (str.to_re "9")) (re.union (str.to_re "2") (str.to_re "6"))) (re.++ (re.union (str.to_re "2") (str.to_re "4") (str.to_re "6") (str.to_re "8")) (re.union (str.to_re "0") (str.to_re "4") (str.to_re "8")))))) (str.to_re "\u{a}")))) -(assert (not (str.in_re X (re.union (str.to_re "100") ((_ re.loop 1 2) (re.range "0" "9")) (re.++ ((_ re.loop 1 2) (re.range "0" "9")) (str.to_re ",") ((_ re.loop 1 3) (re.range "0" "9")) (str.to_re "\u{a}")))))) -(check-sat) - -(exit) diff --git a/benchmarks/instance14326.smt2 b/benchmarks/instance14326.smt2 deleted file mode 100644 index 35b8ab5c2..000000000 --- a/benchmarks/instance14326.smt2 +++ /dev/null @@ -1,21 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status sat) - -(declare-const X String) -(assert (not (str.in_re X (re.union (re.++ (re.union (str.to_re "Jan") (str.to_re "Feb") (str.to_re "Mar") (str.to_re "Apr") (str.to_re "May") (str.to_re "Jun") (str.to_re "Jul") (str.to_re "Aug") (str.to_re "Sep") (str.to_re "Oct") (str.to_re "Nov") (str.to_re "Dec")) (re.* (str.to_re ",")) (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) (re.* (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}"))) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (str.to_re "jan") (str.to_re "feb") (str.to_re "mar") (str.to_re "apr") (str.to_re "may") (str.to_re "jun") (str.to_re "jul") (str.to_re "aug") (str.to_re "sep") (str.to_re "oct") (str.to_re "nov") (str.to_re "dec")) (re.* (str.to_re ",")) (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (str.to_re "January") (str.to_re "February") (str.to_re "March") (str.to_re "April") (str.to_re "May") (str.to_re "June") (str.to_re "July") (str.to_re "August") (str.to_re "September") (str.to_re "October") (str.to_re "November") (str.to_re "December")) (re.* (str.to_re ",")) (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) ((_ re.loop 4 4) (re.range "0" "9"))) (re.++ (re.union (str.to_re "january") (str.to_re "february") (str.to_re "march") (str.to_re "april") (str.to_re "may") (str.to_re "june") (str.to_re "july") (str.to_re "august") (str.to_re "september") (str.to_re "october") (str.to_re "november") (str.to_re "december")) (re.* (str.to_re ",")) (re.union (str.to_re " ") (str.to_re "\u{9}") (str.to_re "\u{a}") (str.to_re "\u{c}") (str.to_re "\u{d}")) ((_ re.loop 4 4) (re.range "0" "9")) (str.to_re "\u{a}")))))) -(assert (not (str.in_re X (re.++ (str.to_re "//jdb/inf.php?id=") ((_ re.loop 32 32) (re.union (re.range "a" "f") (re.range "0" "9"))) (str.to_re "/Ui\u{a}"))))) -(assert (not (str.in_re X (re.++ (re.* (re.opt (re.range "0" "9"))) (re.opt (re.++ (str.to_re ".") (re.union ((_ re.loop 1 1) (re.range "0" "9")) ((_ re.loop 2 2) (re.range "0" "9"))))) (str.to_re "\u{a}"))))) -(check-sat) - -(exit) diff --git a/benchmarks/instance14382.smt2 b/benchmarks/instance14382.smt2 deleted file mode 100644 index 72c67b77c..000000000 --- a/benchmarks/instance14382.smt2 +++ /dev/null @@ -1,21 +0,0 @@ -(set-info :smt-lib-version 2.6) -(set-logic QF_S) -(set-info :source | -Generated by: Murphy Berzish, Mitja Kulczynski, Federico Mora, Florin Manea, Joel Day, Dirk Nowotka, Vijay Ganesh, Zhengyang Lu -Generated on: 2020-10-01 -Description: Translated from a collection of real-world regex queries, AutomataArk, collected by Loris D’Antoni and Fang Wang: https://github.com/lorisdanto/automatark -Application: Evaluate solvers on real-world regex queries -Target solver: Z3str3RE, CVC4, OSTRICH, Z3seq, Z3str3, Z3-Trau -Publications: Berzish, M., Kulczynski, M., Mora, F., Manea, F., Day, J. D., Nowotka, D., & Ganesh, V. (2021, July). An SMT solver for regular expressions and linear arithmetic over string length. In CAV 2021. -|) -(set-info :license "https://creativecommons.org/licenses/by/4.0/") -(set-info :category "industrial") -(set-info :status unsat) - -(declare-const X String) -(assert (str.in_re X (re.++ (str.to_re "~/") (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z") (str.to_re "_")) (re.* (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z") (str.to_re "/") (str.to_re "_") (str.to_re "-"))) (str.to_re ".") (re.+ (re.union (re.range "0" "9") (re.range "a" "z") (re.range "A" "Z") (str.to_re "_") (str.to_re "-"))) (str.to_re "\u{a}")))) -(assert (str.in_re X (re.++ (re.opt (str.to_re "-")) (re.opt (str.to_re ",")) (re.union (re.++ ((_ re.loop 1 3) (re.range "0" "9")) (re.* (re.++ (str.to_re ".") ((_ re.loop 3 3) (re.range "0" "9"))))) (re.+ (re.range "0" "9"))) (re.opt (re.++ (str.to_re ",") ((_ re.loop 2 2) (re.range "0" "9")))) (str.to_re "\u{a}")))) -(assert (not (str.in_re X (re.++ (re.union (str.to_re "big5") (re.++ (str.to_re "euc") (re.union (str.to_re "kr") (str.to_re "jpms"))) (str.to_re "binary") (str.to_re "greek") (str.to_re "tis620") (str.to_re "hebrew") (str.to_re "ascii") (str.to_re "swe7") (re.++ (str.to_re "koi8") (re.union (str.to_re "r") (str.to_re "u"))) (re.++ (re.union (str.to_re "u") (str.to_re "keyb")) (str.to_re "cs2")) (re.++ (re.union (str.to_re "dec") (str.to_re "hp") (str.to_re "utf") (str.to_re "geostd") (str.to_re "armscii")) (str.to_re "8")) (re.++ (str.to_re "gb") (re.union (str.to_re "k") (str.to_re "2312"))) (re.++ (str.to_re "cp") (re.union (re.++ (str.to_re "8") (re.union (re.++ (str.to_re "5") (re.union (str.to_re "0") (str.to_re "2"))) (str.to_re "66"))) (str.to_re "932") (re.++ (str.to_re "125") (re.union (str.to_re "0") (str.to_re "1") (str.to_re "6") (str.to_re "7"))))) (re.++ (str.to_re "latin") (re.union (str.to_re "1") (str.to_re "2") (str.to_re "5") (str.to_re "7"))) (re.++ (re.union (str.to_re "u") (str.to_re "s")) (str.to_re "jis")) (re.++ (str.to_re "mac") (re.union (str.to_re "ce") (str.to_re "roman")))) (str.to_re "\u{a}"))))) -(check-sat) - -(exit) From 2738e4317f073720c352b64bdaa75519035d13a1 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sun, 7 Jun 2026 09:10:02 -0700 Subject: [PATCH 22/32] tuning --- src/ast/rewriter/seq_derive.cpp | 198 ++++++++++++++++---------------- src/ast/rewriter/seq_derive.h | 14 +-- 2 files changed, 107 insertions(+), 105 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 408b1e922..e77edaa1c 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -50,16 +50,28 @@ namespace seq { m_trail.reset(); } + // Reset only operation caches (union/inter/concat/complement) + // while preserving derivative caches (m_cache, m_top_cache) + void derive::reset_op_caches() { + m_union_cache.reset(); + m_inter_cache.reset(); + m_concat_cache.reset(); + m_complement_cache.reset(); + } + expr_ref derive::operator()(expr* ele, expr* r) { SASSERT(m_util.is_re(r)); - if (m_trail.size() > 100000) + if (m_trail.size() > 500000) reset(); + else if (m_trail.size() > 100000) + reset_op_caches(); // Check top-level cache (post-simplify result) expr* cached = nullptr; if (m_top_cache.find(ele, r, cached)) return expr_ref(cached, m); m_ele = ele; m_depth = 0; + m_union_hoist_budget = 0; expr_ref result = derive_rec(r); result = simplify_ite(result); m_ele = nullptr; @@ -563,6 +575,29 @@ namespace seq { is_subset(a1, b1) && is_subset(a2, b2)) return true; + // Σ*-suffix subsumption: a ⊆ Σ*·B when a's right concat spine contains Σ*·B + // Proof: if a = X·(Σ*·B), then L(a) = L(X)·L(Σ*·B). Every s in L(a) is + // of the form p·t where t ∈ L(Σ*·B), meaning t has a suffix in L(B). + // Therefore p·t also has that suffix, so p·t ∈ L(Σ*·B) = L(b). + if (re().is_concat(b, b1, b2) && re().is_full_seq(b1)) { + expr* cur = a; + expr *l, *r; + while (re().is_concat(cur, l, r)) { + if (cur == b) return true; + cur = r; + } + if (cur == b) return true; + // Also check: a is a union and all members ⊆ b + // (handled by the union check above, but double-check for nested concats) + } + + // a ⊆ Σ*·B when a is a concat and its right spine contains b + // (handles non-Σ*-starting concats too, via recursive check) + if (re().is_concat(b, b1, b2) && re().is_full_seq(b1) && re().is_concat(a, a1, a2)) { + // Check if the tail of a (a2) is a subset of b + if (is_subset(a2, b)) return true; + } + // loop subsumption: r{la,ua} ⊆ r{lb,ub} when lb <= la and ua <= ub unsigned la, ua, lb, ub; if (re().is_loop(a, a1, la, ua) && re().is_loop(b, b1, lb, ub) && @@ -578,6 +613,7 @@ namespace seq { // Extract character range [lo, hi] from a derivative condition. // Conditions are of the form: + // ele == c → range [c, c] // char_le(lo_expr, ele) && char_le(ele, hi_expr) → range [lo, hi] // char_le(lo_expr, ele) → range [lo, max_char] // char_le(ele, hi_expr) → range [0, hi] @@ -592,6 +628,20 @@ namespace seq { if (m.is_not(cond, e1)) return false; + // Equality: ele == c → range [c, c] + if (m.is_eq(cond, e1, e2)) { + unsigned v; + if (u().is_const_char(e1, v) && !u().is_const_char(e2, v)) { + lo = hi = v; + return true; + } + if (u().is_const_char(e2, v) && !u().is_const_char(e1, v)) { + lo = hi = v; + return true; + } + return false; + } + // Conjunction: and(char_le(lo, x), char_le(x, hi)) if (m.is_and(cond, e1, e2)) { expr *a1, *a2, *b1, *b2; @@ -644,6 +694,15 @@ namespace seq { return false; } + // Check if a condition is a recognizable character condition (or negation thereof) + bool derive::is_char_cond(expr* c) { + unsigned lo, hi; + expr* e1; + if (m.is_not(c, e1)) + return is_char_cond(e1); + return extract_char_range(c, lo, hi); + } + // Predicate implication for character range conditions. // Returns true if: whenever cond_a is true, cond_b must also be true. // Used for BDD-merge of derivative ITE trees. @@ -731,13 +790,27 @@ namespace seq { return mk_ite(c1, then_br, else_br); } + // Budget-limited one-sided char-cond hoisting. + // Enables BDD merge for small alphabets; budget caps work for large ones. + if (m_union_hoist_budget < m_max_union_hoist_budget) { + if (m.is_ite(a, c1, t1, e1) && is_char_cond(c1)) { + ++m_union_hoist_budget; + return mk_ite(c1, mk_union(t1, b), mk_union(e1, b)); + } + if (m.is_ite(b, c2, t2, e2) && is_char_cond(c2)) { + ++m_union_hoist_budget; + return mk_ite(c2, mk_union(a, t2), mk_union(a, e2)); + } + } + // Conservative ITE hoisting via subsumption: // Only hoist when at least one branch simplifies by is_subset. + // Skip expensive is_subset on branches that are themselves ITE trees. if (m.is_ite(a, c1, t1, e1)) { - bool t1_sub_b = is_subset(t1, b); - bool b_sub_t1 = is_subset(b, t1); - bool e1_sub_b = is_subset(e1, b); - bool b_sub_e1 = is_subset(b, e1); + bool t1_sub_b = !m.is_ite(t1) && is_subset(t1, b); + bool b_sub_t1 = !m.is_ite(t1) && !t1_sub_b && is_subset(b, t1); + bool e1_sub_b = !m.is_ite(e1) && is_subset(e1, b); + bool b_sub_e1 = !m.is_ite(e1) && !e1_sub_b && is_subset(b, e1); if (t1_sub_b || b_sub_t1 || e1_sub_b || b_sub_e1) { expr_ref then_br = t1_sub_b ? expr_ref(b, m) : b_sub_t1 ? expr_ref(t1, m) : mk_union(t1, b); expr_ref else_br = e1_sub_b ? expr_ref(b, m) : b_sub_e1 ? expr_ref(e1, m) : mk_union(e1, b); @@ -745,10 +818,10 @@ namespace seq { } } if (m.is_ite(b, c2, t2, e2)) { - bool t2_sub_a = is_subset(t2, a); - bool a_sub_t2 = is_subset(a, t2); - bool e2_sub_a = is_subset(e2, a); - bool a_sub_e2 = is_subset(a, e2); + bool t2_sub_a = !m.is_ite(t2) && is_subset(t2, a); + bool a_sub_t2 = !m.is_ite(t2) && !t2_sub_a && is_subset(a, t2); + bool e2_sub_a = !m.is_ite(e2) && is_subset(e2, a); + bool a_sub_e2 = !m.is_ite(e2) && !e2_sub_a && is_subset(a, e2); if (t2_sub_a || a_sub_t2 || e2_sub_a || a_sub_e2) { expr_ref then_br = t2_sub_a ? expr_ref(a, m) : a_sub_t2 ? expr_ref(t2, m) : mk_union(a, t2); expr_ref else_br = e2_sub_a ? expr_ref(a, m) : a_sub_e2 ? expr_ref(e2, m) : mk_union(a, e2); @@ -898,15 +971,23 @@ namespace seq { } // ITE hoisting with depth bound (fallback when pred_implies doesn't fire) - if (m_inter_hoist_depth < m_max_inter_hoist_depth) { - if (m.is_ite(a, c1, t1, e1)) { + // Character conditions (recognizable ranges) get a larger depth allowance + // since they form bounded BDD minterms for small alphabets. + if (m.is_ite(a, c1, t1, e1)) { + bool char_cond = is_char_cond(c1); + unsigned max_depth = char_cond ? 8 : m_max_inter_hoist_depth; + if (m_inter_hoist_depth < max_depth) { m_inter_hoist_depth++; expr_ref then_br = mk_inter(t1, b); expr_ref else_br = mk_inter(e1, b); m_inter_hoist_depth--; return mk_ite(c1, then_br, else_br); } - if (m.is_ite(b, c2, t2, e2)) { + } + if (m.is_ite(b, c2, t2, e2)) { + bool char_cond = is_char_cond(c2); + unsigned max_depth = char_cond ? 8 : m_max_inter_hoist_depth; + if (m_inter_hoist_depth < max_depth) { m_inter_hoist_depth++; expr_ref then_br = mk_inter(a, t2); expr_ref else_br = mk_inter(a, e2); @@ -1013,11 +1094,16 @@ namespace seq { return expr_ref(re().mk_empty(a->get_sort()), m); // Push through ITE: ~(ite(c, t, e)) → ite(c, ~t, ~e) + // Only distribute if t or e is empty, full, or a complement + // (avoids exponential blowup on complex ITE trees) expr* c, * t, * e; if (m.is_ite(a, c, t, e)) { - expr_ref ct = mk_complement(t); - expr_ref ce = mk_complement(e); - return mk_ite(c, ct, ce); + if (re().is_empty(t) || re().is_full_seq(t) || re().is_complement(t) || + re().is_empty(e) || re().is_full_seq(e) || re().is_complement(e)) { + expr_ref ct = mk_complement(t); + expr_ref ce = mk_complement(e); + return mk_ite(c, ct, ce); + } } // ~ε → .+ @@ -1118,88 +1204,6 @@ namespace seq { return result; } - // ------------------------------------------------------- - // ITE-tree combinators (analogous to REsharp mk_binary/mk_unary) - // ------------------------------------------------------- - - expr_ref derive::ite_combine_binary(expr* d1, expr* d2, - std::function const& op) { - expr *c1, *t1, *e1, *c2, *t2, *e2; - bool is_ite1 = m.is_ite(d1, c1, t1, e1); - bool is_ite2 = m.is_ite(d2, c2, t2, e2); - - // Both are leaves (non-ITE) - if (!is_ite1 && !is_ite2) - return op(d1, d2); - - // d1 is ITE, d2 is not — linear distribution (no depth cost) - if (is_ite1 && !is_ite2) { - expr_ref then_r = ite_combine_binary(t1, d2, op); - expr_ref else_r = ite_combine_binary(e1, d2, op); - return mk_ite(c1, then_r, else_r); - } - - // d2 is ITE, d1 is not — linear distribution (no depth cost) - if (!is_ite1 && is_ite2) { - expr_ref then_r = ite_combine_binary(d1, t2, op); - expr_ref else_r = ite_combine_binary(d1, e2, op); - return mk_ite(c2, then_r, else_r); - } - - // Both are ITE — this is the cross-product case, consume depth budget - m_inter_hoist_depth++; - expr_ref result(m); - - if (c1 == c2) { - // Same condition: combine pairwise (no cross-product) - expr_ref then_r = ite_combine_binary(t1, t2, op); - expr_ref else_r = ite_combine_binary(e1, e2, op); - result = mk_ite(c1, then_r, else_r); - } - else { - // Different conditions. Order by id for canonical form. - if (c1->get_id() < c2->get_id()) { - std::swap(d1, d2); - std::swap(c1, c2); - std::swap(t1, t2); - std::swap(e1, e2); - } - - // Now c1->get_id() >= c2->get_id(). Hoist c1. - expr_ref r1(m), r2(m); - if (pred_implies(c1, c2)) - r1 = ite_combine_binary(t1, t2, op); - else if (pred_implies(c1, m.mk_not(c2))) - r1 = ite_combine_binary(t1, e2, op); - else - r1 = ite_combine_binary(t1, d2, op); - - expr_ref notc1(m.mk_not(c1), m); - if (pred_implies(notc1, c2)) - r2 = ite_combine_binary(e1, t2, op); - else if (pred_implies(notc1, m.mk_not(c2))) - r2 = ite_combine_binary(e1, e2, op); - else - r2 = ite_combine_binary(e1, d2, op); - - result = mk_ite(c1, r1, r2); - } - - m_inter_hoist_depth--; - return result; - } - - expr_ref derive::ite_combine_unary(expr* d, - std::function const& op) { - expr* c, * t, * e; - if (m.is_ite(d, c, t, e)) { - expr_ref then_r = ite_combine_unary(t, op); - expr_ref else_r = ite_combine_unary(e, op); - return mk_ite(c, then_r, else_r); - } - return op(d); - } - // ------------------------------------------------------- // Distribute concat through ITE/union structure of derivative // ------------------------------------------------------- diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 1c6623bb6..b2caa394b 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -77,6 +77,10 @@ namespace seq { unsigned m_inter_hoist_depth { 0 }; static const unsigned m_max_inter_hoist_depth = 4; + // Depth limit for one-sided union hoisting (global budget per derivative call) + unsigned m_union_hoist_budget { 0 }; + static const unsigned m_max_union_hoist_budget = 32; + seq_util::rex& re() { return m_util.re; } seq_util& u() { return m_util; } @@ -111,14 +115,6 @@ namespace seq { expr_ref mk_union_from_sorted(expr_ref_vector& args); expr_ref mk_inter_from_sorted(expr_ref_vector& args); - // ITE-tree binary combinator (analogous to REsharp mk_binary) - // Combines two ITE-tree derivatives with a binary regex operation - expr_ref ite_combine_binary(expr* d1, expr* d2, - std::function const& op); - - // ITE-tree unary combinator (analogous to REsharp mk_unary) - expr_ref ite_combine_unary(expr* d, std::function const& op); - // Distribute concatenation through ITE/union in derivative expr_ref mk_deriv_concat(expr* d, expr* tail); expr_ref mk_deriv_concat_core(expr* d, expr* tail); @@ -133,6 +129,7 @@ namespace seq { // Returns true if condition a implies condition b. bool pred_implies(expr* a, expr* b); bool extract_char_range(expr* cond, unsigned& lo, unsigned& hi); + bool is_char_cond(expr* c); // Normalize reverse(r) by pushing reverse inward expr_ref normalize_reverse(expr* r); @@ -157,6 +154,7 @@ namespace seq { sort* ele_sort(expr* r) { sort* s = seq_sort(r); sort* e = nullptr; m_util.is_seq(s, e); return e; } void reset(); + void reset_op_caches(); public: derive(ast_manager& m); From 8deac03ca84a61872251dd4e0bdfcc085890a5b8 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Mon, 8 Jun 2026 20:43:43 -0700 Subject: [PATCH 23/32] Refactor seq_derive: inline path pruning with ACI normalization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace simplify_ite_rec post-hoc pass with inline path pruning: - push/pop API with lbool return (l_true=implied, l_undef=pushed, l_false=contradicts) - apply_ite hoists ITE through union/inter/complement with path-aware pruning - Path-aware caching for mk_union, mk_inter, mk_complement - Incremental path expression maintenance for cache keys - Complement always pushes through ITE for same-condition merge - ACI normalization (flatten/sort/deduplicate) for union base case - is_subset subsumption prevents unbounded union growth - Prefix factoring (a·x ∪ a·y = a·(x ∪ y)) for loop derivatives - seq_rewriter passed as reference to derive class - Depth-limited single-ITE hoisting (path_stack.size() < 8) - pred_implies with signed atoms avoids mk_not allocations - extract_char_range properly checks m_ele identity Results: 0 timeouts on regression suite (vs 2 on master). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 1093 +++++++++++++------------------ src/ast/rewriter/seq_derive.h | 88 +-- src/ast/rewriter/seq_rewriter.h | 4 +- 3 files changed, 495 insertions(+), 690 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index e77edaa1c..3594c69f2 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -22,6 +22,7 @@ Authors: --*/ #include "ast/rewriter/seq_derive.h" +#include "ast/rewriter/seq_rewriter.h" #include "ast/ast_pp.h" #include "ast/array_decl_plugin.h" #include "ast/rewriter/bool_rewriter.h" @@ -30,13 +31,15 @@ Authors: namespace seq { - derive::derive(ast_manager& m) : + derive::derive(ast_manager& m, seq_rewriter& re) : m(m), m_util(m), m_autil(m), m_br(m), + m_re(re), m_trail(m), - m_ele(m) { + m_ele(m), + m_path_expr(m) { m_br.set_flat_and_or(false); } @@ -71,9 +74,12 @@ namespace seq { return expr_ref(cached, m); m_ele = ele; m_depth = 0; - m_union_hoist_budget = 0; + // Initialize path state for inline pruning + m_path.reset(); + m_intervals.reset(); + m_intervals.push_back(std::make_pair(0u, u().max_char())); + m_path_expr = m.mk_true(); expr_ref result = derive_rec(r); - result = simplify_ite(result); m_ele = nullptr; // Cache and pin the final result m_top_cache.insert(ele, r, result); @@ -529,87 +535,6 @@ namespace seq { // Smart constructors with simplification // ------------------------------------------------------- - // Lightweight structural subsumption: checks if L(a) ⊆ L(b) - // Returns true only when subsumption can be determined structurally. - bool derive::is_subset(expr* a, expr* b) { - if (a == b) return true; - if (re().is_empty(a)) return true; - if (re().is_full_seq(b)) return true; - - // a ⊆ .+ iff a is non-nullable (non-nullable means ε ∉ L(a)) - expr* b1 = nullptr; - if (re().is_plus(b, b1) && re().is_full_char(b1) && - re().get_info(a).nullable == l_false) - return true; - - // a ⊆ a* (since a* accepts everything a does and more) - if (re().is_star(b, b1) && a == b1) return true; - - // a* ⊆ b* if a ⊆ b - expr* a1 = nullptr; - if (re().is_star(a, a1) && re().is_star(b, b1) && is_subset(a1, b1)) return true; - - // a ⊆ b1 ∪ b2 if a ⊆ b1 or a ⊆ b2 - if (re().is_union(b, b1, a1)) { - if (is_subset(a, b1) || is_subset(a, a1)) return true; - } - - // a1 ∪ a2 ⊆ b if a1 ⊆ b and a2 ⊆ b - if (re().is_union(a, a1, b1)) { - if (is_subset(a1, b) && is_subset(b1, b)) return true; - } - - // a1 ∩ a2 ⊆ b if a1 ⊆ b or a2 ⊆ b - if (re().is_intersection(a, a1, b1)) { - if (is_subset(a1, b) || is_subset(b1, b)) return true; - } - - // a ⊆ b1 ∩ b2 if a ⊆ b1 and a ⊆ b2 - if (re().is_intersection(b, b1, a1)) { - if (is_subset(a, b1) && is_subset(a, a1)) return true; - } - - // concat subsumption: a1·a2 ⊆ b1·b2 when a1 ⊆ b1 and a2 ⊆ b2 - expr* a2 = nullptr, * b2 = nullptr; - if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && - is_subset(a1, b1) && is_subset(a2, b2)) - return true; - - // Σ*-suffix subsumption: a ⊆ Σ*·B when a's right concat spine contains Σ*·B - // Proof: if a = X·(Σ*·B), then L(a) = L(X)·L(Σ*·B). Every s in L(a) is - // of the form p·t where t ∈ L(Σ*·B), meaning t has a suffix in L(B). - // Therefore p·t also has that suffix, so p·t ∈ L(Σ*·B) = L(b). - if (re().is_concat(b, b1, b2) && re().is_full_seq(b1)) { - expr* cur = a; - expr *l, *r; - while (re().is_concat(cur, l, r)) { - if (cur == b) return true; - cur = r; - } - if (cur == b) return true; - // Also check: a is a union and all members ⊆ b - // (handled by the union check above, but double-check for nested concats) - } - - // a ⊆ Σ*·B when a is a concat and its right spine contains b - // (handles non-Σ*-starting concats too, via recursive check) - if (re().is_concat(b, b1, b2) && re().is_full_seq(b1) && re().is_concat(a, a1, a2)) { - // Check if the tail of a (a2) is a subset of b - if (is_subset(a2, b)) return true; - } - - // loop subsumption: r{la,ua} ⊆ r{lb,ub} when lb <= la and ua <= ub - unsigned la, ua, lb, ub; - if (re().is_loop(a, a1, la, ua) && re().is_loop(b, b1, lb, ub) && - a1 == b1 && lb <= la && ua <= ub) - return true; - - // complement: ~a ⊆ ~b if b ⊆ a - if (re().is_complement(a, a1) && re().is_complement(b, b1)) - return is_subset(b1, a1); - - return false; - } // Extract character range [lo, hi] from a derivative condition. // Conditions are of the form: @@ -623,19 +548,18 @@ namespace seq { lo = 0; hi = u().max_char(); - // Negation: ~(range [a,b]) = [0,a-1] ∪ [b+1,max] - // We don't handle negation here — it's handled via pred_implies logic + // Negation: not handled here — handled via pred_implies logic if (m.is_not(cond, e1)) return false; // Equality: ele == c → range [c, c] if (m.is_eq(cond, e1, e2)) { unsigned v; - if (u().is_const_char(e1, v) && !u().is_const_char(e2, v)) { + if (u().is_const_char(e1, v) && e2 == m_ele) { lo = hi = v; return true; } - if (u().is_const_char(e2, v) && !u().is_const_char(e1, v)) { + if (u().is_const_char(e2, v) && e1 == m_ele) { lo = hi = v; return true; } @@ -646,33 +570,17 @@ namespace seq { if (m.is_and(cond, e1, e2)) { expr *a1, *a2, *b1, *b2; unsigned v; - if (u().is_char_le(e1, a1, a2) && u().is_char_le(e2, b1, b2)) { - // e1: a1 <= a2, e2: b1 <= b2 - // Expect: lo <= ele (a1=const, a2=var) and ele <= hi (b1=var, b2=const) - // OR: ele <= hi (a1=var, a2=const) and lo <= ele (b1=const, b2=var) - if (u().is_const_char(a1, v) && u().is_const_char(b2, lo)) { - // e1: const <= a2, e2: b1 <= const - // This is: v <= ele and ele <= lo — wrong naming, let me fix - lo = v; - hi = 0; - if (u().is_const_char(b2, hi)) - return true; - } - } - // Try more carefully: extract from each conjunct - lo = 0; - hi = u().max_char(); if (u().is_char_le(e1, a1, a2)) { - if (u().is_const_char(a1, v) && !u().is_const_char(a2, v)) + if (u().is_const_char(a1, v) && a2 == m_ele) lo = std::max(lo, v); // v <= ele - else if (!u().is_const_char(a1, v) && u().is_const_char(a2, v)) + else if (a1 == m_ele && u().is_const_char(a2, v)) hi = std::min(hi, v); // ele <= v } if (u().is_char_le(e2, b1, b2)) { unsigned v2; - if (u().is_const_char(b1, v2) && !u().is_const_char(b2, v2)) + if (u().is_const_char(b1, v2) && b2 == m_ele) lo = std::max(lo, v2); // v2 <= ele - else if (!u().is_const_char(b1, v2) && u().is_const_char(b2, v2)) + else if (b1 == m_ele && u().is_const_char(b2, v2)) hi = std::min(hi, v2); // ele <= v2 } return lo <= hi; @@ -681,11 +589,11 @@ namespace seq { // Single char_le if (u().is_char_le(cond, lhs, rhs)) { unsigned v; - if (u().is_const_char(lhs, v) && !u().is_const_char(rhs, v)) { + if (u().is_const_char(lhs, v) && rhs == m_ele) { lo = v; // v <= ele return true; } - if (!u().is_const_char(lhs, v) && u().is_const_char(rhs, v)) { + if (lhs == m_ele && u().is_const_char(rhs, v)) { hi = v; // ele <= v return true; } @@ -694,67 +602,125 @@ namespace seq { return false; } - // Check if a condition is a recognizable character condition (or negation thereof) - bool derive::is_char_cond(expr* c) { - unsigned lo, hi; - expr* e1; - if (m.is_not(c, e1)) - return is_char_cond(e1); - return extract_char_range(c, lo, hi); - } - // Predicate implication for character range conditions. // Returns true if: whenever cond_a is true, cond_b must also be true. // Used for BDD-merge of derivative ITE trees. - bool derive::pred_implies(expr* a, expr* b) { - if (a == b) return true; + // pred_implies(sign_a, a, sign_b, b): does (sign_a ? ¬a : a) imply (sign_b ? ¬b : b)? + bool derive::pred_implies(bool sign_a, expr* a, bool sign_b, expr* b) { + // Same atom: check sign compatibility + if (a == b) return sign_a == sign_b; - expr *nota = nullptr, *notb = nullptr; - - // ~a implies ~b iff b implies a - if (m.is_not(a, nota) && m.is_not(b, notb)) - return pred_implies(notb, nota); + // Both negated: ¬a → ¬b iff b → a, i.e. pred_implies(false, b, false, a) + if (sign_a && sign_b) + return pred_implies(false, b, false, a); unsigned lo_a, hi_a, lo_b, hi_b; - // a implies b: range_a ⊆ range_b - if (extract_char_range(a, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) - return lo_b <= lo_a && hi_a <= hi_b; - - // a implies ~b: range_a ∩ range_b = ∅ - if (m.is_not(b, notb)) { - if (extract_char_range(a, lo_a, hi_a) && extract_char_range(notb, lo_b, hi_b)) + if (!sign_a && !sign_b) { + // a → b: range_a ⊆ range_b + if (extract_char_range(a, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) + return lo_b <= lo_a && hi_a <= hi_b; + } + else if (!sign_a && sign_b) { + // a → ¬b: range_a ∩ range_b = ∅ + if (extract_char_range(a, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) return hi_a < lo_b || hi_b < lo_a; } - - // ~a implies b: complement of range_a ⊆ range_b - // This is true when range_b covers everything outside range_a - // i.e., lo_b == 0 and hi_b >= max_char, minus range_a... complex, skip for now - if (m.is_not(a, nota)) { - if (extract_char_range(nota, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) - return lo_b <= 0 && hi_b >= u().max_char(); // only if b is universal + else if (sign_a && !sign_b) { + // ¬a → b: complement of range_a ⊆ range_b + if (extract_char_range(a, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) + return lo_b == 0 && hi_b >= u().max_char(); } return false; } + bool derive::pred_implies(expr* a, expr* b) { + expr* nota = nullptr, * notb = nullptr; + bool sign_a = m.is_not(a, nota); + bool sign_b = m.is_not(b, notb); + return pred_implies(sign_a, sign_a ? nota : a, sign_b, sign_b ? notb : b); + } + expr_ref derive::mk_union(expr* a, expr* b) { - // Check op cache + // Check path-aware op cache + expr* pe = get_path_expr(); expr* cached = nullptr; - if (m_union_cache.find(a, b, cached)) + if (m_union_cache.find(a, b, pe, cached)) return expr_ref(cached, m); expr_ref result = mk_union_core(a, b); // Store in cache - m_union_cache.insert(a, b, result); + m_union_cache.insert(a, b, pe, result); m_trail.push_back(a); m_trail.push_back(b); + m_trail.push_back(pe); m_trail.push_back(result); return result; } + // Lightweight structural subsumption: checks if L(a) ⊆ L(b) + bool derive::is_subset(expr* a, expr* b) { + if (a == b) return true; + if (re().is_empty(a)) return true; + if (re().is_full_seq(b)) return true; + + expr* b1 = nullptr; + if (re().is_plus(b, b1) && re().is_full_char(b1) && + re().get_info(a).nullable == l_false) + return true; + + if (re().is_star(b, b1) && a == b1) return true; + + expr* a1 = nullptr; + if (re().is_star(a, a1) && re().is_star(b, b1) && is_subset(a1, b1)) return true; + + if (re().is_union(b, b1, a1)) { + if (is_subset(a, b1) || is_subset(a, a1)) return true; + } + if (re().is_union(a, a1, b1)) { + if (is_subset(a1, b) && is_subset(b1, b)) return true; + } + if (re().is_intersection(a, a1, b1)) { + if (is_subset(a1, b) || is_subset(b1, b)) return true; + } + if (re().is_intersection(b, b1, a1)) { + if (is_subset(a, b1) && is_subset(a, a1)) return true; + } + + expr* a2 = nullptr, * b2 = nullptr; + if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && + is_subset(a1, b1) && is_subset(a2, b2)) + return true; + + // loop subsumption: r{la,ua} ⊆ r{lb,ub} when lb <= la and ua <= ub + unsigned la, ua, lb, ub; + if (re().is_loop(a, a1, la, ua) && re().is_loop(b, b1, lb, ub) && + a1 == b1 && lb <= la && ua <= ub) + return true; + + if (re().is_complement(a, a1) && re().is_complement(b, b1)) + return is_subset(b1, a1); + + return false; + } + + void derive::flatten_union(expr* e, expr_ref_vector& args) { + expr* a, *b; + if (re().is_union(e, a, b)) { + flatten_union(a, args); + flatten_union(b, args); + } else { + args.push_back(e); + } + } + expr_ref derive::mk_union_core(expr* a, expr* b) { + // Canonical order: smaller id first + if (a->get_id() > b->get_id()) + std::swap(a, b); + // Identity / annihilator if (a == b) return expr_ref(a, m); if (re().is_empty(a)) return expr_ref(b, m); @@ -769,141 +735,83 @@ namespace seq { if (re().is_complement(b, c) && c == a) return expr_ref(re().mk_full_seq(a->get_sort()), m); - // Subsumption: a ∪ b = b if a ⊆ b, a ∪ b = a if b ⊆ a - if (is_subset(a, b)) return expr_ref(b, m); - if (is_subset(b, a)) return expr_ref(a, m); + // ITE handling with path pruning + expr *c1, *t1, *e1, *c2, *t2, *e2; + auto union_op = [&](expr* x, expr* y) { return mk_union(x, y); }; + + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) { + expr_ref r(m); + if (c1 == c2) + r = apply_ite(c1, t1, e1, t2, e2, union_op); + else + r = apply_ite(c1, t1, e1, b, union_op); + if (r) return r; + return expr_ref(re().mk_empty(a->get_sort()), m); + } + if (m_path_stack.size() < 8) { + if (m.is_ite(a, c1, t1, e1)) { + expr_ref r = apply_ite(c1, t1, e1, b, union_op); + if (r) return r; + return expr_ref(re().mk_empty(a->get_sort()), m); + } + if (m.is_ite(b, c2, t2, e2)) { + expr_ref r = apply_ite(c2, t2, e2, a, union_op); + if (r) return r; + return expr_ref(re().mk_empty(a->get_sort()), m); + } + } // Prefix factoring: a·x ∪ a·y = a·(x ∪ y) expr *a1, *a2, *b1, *b2; if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && a1 == b1) { expr_ref tail = mk_union(a2, b2); - return mk_deriv_concat(expr_ref(a1, m), tail); + return mk_deriv_concat(a1, tail); } - // ITE handling for union - expr *c1, *t1, *e1, *c2, *t2, *e2; + // star absorbs epsilon: r* ∪ ε = r* + if (re().is_star(a) && re().is_epsilon(b)) return expr_ref(a, m); + if (re().is_star(b) && re().is_epsilon(a)) return expr_ref(b, m); - // Same condition merge (cheap, always correct) - if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { - expr_ref then_br = mk_union(t1, t2); - expr_ref else_br = mk_union(e1, e2); - return mk_ite(c1, then_br, else_br); - } + // Subsumption: a ∪ b = b if a ⊆ b, a ∪ b = a if b ⊆ a + if (is_subset(a, b)) return expr_ref(b, m); + if (is_subset(b, a)) return expr_ref(a, m); - // Budget-limited one-sided char-cond hoisting. - // Enables BDD merge for small alphabets; budget caps work for large ones. - if (m_union_hoist_budget < m_max_union_hoist_budget) { - if (m.is_ite(a, c1, t1, e1) && is_char_cond(c1)) { - ++m_union_hoist_budget; - return mk_ite(c1, mk_union(t1, b), mk_union(e1, b)); - } - if (m.is_ite(b, c2, t2, e2) && is_char_cond(c2)) { - ++m_union_hoist_budget; - return mk_ite(c2, mk_union(a, t2), mk_union(a, e2)); - } - } - - // Conservative ITE hoisting via subsumption: - // Only hoist when at least one branch simplifies by is_subset. - // Skip expensive is_subset on branches that are themselves ITE trees. - if (m.is_ite(a, c1, t1, e1)) { - bool t1_sub_b = !m.is_ite(t1) && is_subset(t1, b); - bool b_sub_t1 = !m.is_ite(t1) && !t1_sub_b && is_subset(b, t1); - bool e1_sub_b = !m.is_ite(e1) && is_subset(e1, b); - bool b_sub_e1 = !m.is_ite(e1) && !e1_sub_b && is_subset(b, e1); - if (t1_sub_b || b_sub_t1 || e1_sub_b || b_sub_e1) { - expr_ref then_br = t1_sub_b ? expr_ref(b, m) : b_sub_t1 ? expr_ref(t1, m) : mk_union(t1, b); - expr_ref else_br = e1_sub_b ? expr_ref(b, m) : b_sub_e1 ? expr_ref(e1, m) : mk_union(e1, b); - return mk_ite(c1, then_br, else_br); - } - } - if (m.is_ite(b, c2, t2, e2)) { - bool t2_sub_a = !m.is_ite(t2) && is_subset(t2, a); - bool a_sub_t2 = !m.is_ite(t2) && !t2_sub_a && is_subset(a, t2); - bool e2_sub_a = !m.is_ite(e2) && is_subset(e2, a); - bool a_sub_e2 = !m.is_ite(e2) && !e2_sub_a && is_subset(a, e2); - if (t2_sub_a || a_sub_t2 || e2_sub_a || a_sub_e2) { - expr_ref then_br = t2_sub_a ? expr_ref(a, m) : a_sub_t2 ? expr_ref(t2, m) : mk_union(a, t2); - expr_ref else_br = e2_sub_a ? expr_ref(a, m) : a_sub_e2 ? expr_ref(e2, m) : mk_union(a, e2); - return mk_ite(c2, then_br, else_br); - } - } - - // BDD merge for union: only when both are ITE and pred_implies fires - // (avoids exponential blowup when conditions are unrelated) - if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) { - // Only merge if we can determine the relationship between conditions - bool c1_imp_c2 = pred_implies(c1, c2); - bool c1_imp_nc2 = !c1_imp_c2 && pred_implies(c1, m.mk_not(c2)); - expr_ref notc1(m.mk_not(c1), m); - bool nc1_imp_c2 = pred_implies(notc1, c2); - bool nc1_imp_nc2 = !nc1_imp_c2 && pred_implies(notc1, m.mk_not(c2)); - if (c1_imp_c2 || c1_imp_nc2 || nc1_imp_c2 || nc1_imp_nc2) { - // pred_implies fires — safe to merge without exponential blowup - expr_ref r1(m), r2(m); - // Under c1: - if (c1_imp_c2) - r1 = mk_union(t1, t2); - else if (c1_imp_nc2) - r1 = mk_union(t1, e2); - else - r1 = mk_union(t1, b); - // Under ~c1: - if (nc1_imp_c2) - r2 = mk_union(e1, t2); - else if (nc1_imp_nc2) - r2 = mk_union(e1, e2); - else - r2 = mk_union(e1, b); - return mk_ite(c1, r1, r2); - } - } - - // ACI: flatten, sort, deduplicate + // ACI normalization: flatten, sort by id, deduplicate expr_ref_vector args(m); flatten_union(a, args); flatten_union(b, args); - - // Sort by expr id for canonical form - std::stable_sort(args.data(), args.data() + args.size(), - [](expr* x, expr* y) { return x->get_id() < y->get_id(); }); - - // Deduplicate - unsigned j = 0; - for (unsigned i = 0; i < args.size(); ++i) { - if (j > 0 && args.get(i) == args.get(j - 1)) - continue; // skip duplicate - if (re().is_empty(args.get(i))) - continue; // skip empty - if (re().is_full_seq(args.get(i))) - return expr_ref(args.get(i), m); // universal absorbs - args.set(j++, args.get(i)); + std::sort(args.data(), args.data() + args.size(), [](expr* x, expr* y) { return x->get_id() < y->get_id(); }); + expr_ref result(args.get(0), m); + for (unsigned i = 1; i < args.size(); ++i) { + if (args.get(i) != args.get(i - 1)) + result = expr_ref(re().mk_union(result, args.get(i)), m); } - args.shrink(j); - - if (args.empty()) - return expr_ref(re().mk_empty(a->get_sort()), m); - - return mk_union_from_sorted(args); + return result; } expr_ref derive::mk_inter(expr* a, expr* b) { - // Check op cache + // Check path-aware op cache + expr* pe = get_path_expr(); expr* cached = nullptr; - if (m_inter_cache.find(a, b, cached)) + if (m_inter_cache.find(a, b, pe, cached)) return expr_ref(cached, m); expr_ref result = mk_inter_core(a, b); // Store in cache - m_inter_cache.insert(a, b, result); + m_inter_cache.insert(a, b, pe, result); m_trail.push_back(a); m_trail.push_back(b); + m_trail.push_back(pe); m_trail.push_back(result); return result; } expr_ref derive::mk_inter_core(expr* a, expr* b) { + // Canonical order: smaller id first + if (a->get_id() > b->get_id()) + std::swap(a, b); + // Identity / annihilator if (a == b) return expr_ref(a, m); if (re().is_empty(a)) return expr_ref(a, m); @@ -918,127 +826,34 @@ namespace seq { if (re().is_complement(b, c) && c == a) return expr_ref(re().mk_empty(a->get_sort()), m); - // Subsumption: a ∩ b = a if a ⊆ b, a ∩ b = b if b ⊆ a - if (is_subset(a, b)) return expr_ref(a, m); - if (is_subset(b, a)) return expr_ref(b, m); - - // Prefix factoring: a·x ∩ a·y = a·(x ∩ y) - expr *a1, *b1, *a2, *b2; - if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && a1 == b1) { - expr_ref tail = mk_inter(a2, b2); - return mk_deriv_concat(expr_ref(a1, m), tail); - } - - // ITE handling for intersection + // ITE handling with path pruning expr *c1, *t1, *e1, *c2, *t2, *e2; + auto inter_op = [&](expr* x, expr* y) { return mk_inter(x, y); }; - // Same condition merge - if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { - expr_ref then_br = mk_inter(t1, t2); - expr_ref else_br = mk_inter(e1, e2); - return mk_ite(c1, then_br, else_br); - } - - // Both-ITE with pred_implies: exploit condition relationships (no depth cost) if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) { - // Order conditions: larger id on outside - if (c1->get_id() < c2->get_id()) { - std::swap(a, b); - std::swap(c1, c2); - std::swap(t1, t2); - std::swap(e1, e2); + expr_ref r(m); + if (c1 == c2) + r = apply_ite(c1, t1, e1, t2, e2, inter_op); + else + r = apply_ite(c1, t1, e1, b, inter_op); + if (r) return r; + return expr_ref(re().mk_empty(a->get_sort()), m); + } + if (m_path_stack.size() < 8) { + if (m.is_ite(a, c1, t1, e1)) { + expr_ref r = apply_ite(c1, t1, e1, b, inter_op); + if (r) return r; + return expr_ref(re().mk_empty(a->get_sort()), m); } - expr_ref r1(m), r2(m); - bool have_r1 = false, have_r2 = false; - // Under c1: what do we know about c2? - if (pred_implies(c1, c2)) { - r1 = mk_inter(t1, t2); have_r1 = true; - } else if (pred_implies(c1, m.mk_not(c2))) { - r1 = mk_inter(t1, e2); have_r1 = true; - } - // Under ~c1: what do we know about c2? - expr_ref notc1(m.mk_not(c1), m); - if (pred_implies(notc1, c2)) { - r2 = mk_inter(e1, t2); have_r2 = true; - } else if (pred_implies(notc1, m.mk_not(c2))) { - r2 = mk_inter(e1, e2); have_r2 = true; - } - if (have_r1 || have_r2) { - if (!have_r1) r1 = mk_inter(t1, b); - if (!have_r2) r2 = mk_inter(e1, b); - return mk_ite(c1, r1, r2); + if (m.is_ite(b, c2, t2, e2)) { + expr_ref r = apply_ite(c2, t2, e2, a, inter_op); + if (r) return r; + return expr_ref(re().mk_empty(a->get_sort()), m); } } - // ITE hoisting with depth bound (fallback when pred_implies doesn't fire) - // Character conditions (recognizable ranges) get a larger depth allowance - // since they form bounded BDD minterms for small alphabets. - if (m.is_ite(a, c1, t1, e1)) { - bool char_cond = is_char_cond(c1); - unsigned max_depth = char_cond ? 8 : m_max_inter_hoist_depth; - if (m_inter_hoist_depth < max_depth) { - m_inter_hoist_depth++; - expr_ref then_br = mk_inter(t1, b); - expr_ref else_br = mk_inter(e1, b); - m_inter_hoist_depth--; - return mk_ite(c1, then_br, else_br); - } - } - if (m.is_ite(b, c2, t2, e2)) { - bool char_cond = is_char_cond(c2); - unsigned max_depth = char_cond ? 8 : m_max_inter_hoist_depth; - if (m_inter_hoist_depth < max_depth) { - m_inter_hoist_depth++; - expr_ref then_br = mk_inter(a, t2); - expr_ref else_br = mk_inter(a, e2); - m_inter_hoist_depth--; - return mk_ite(c2, then_br, else_br); - } - } - - // ACI: flatten, sort, deduplicate - expr_ref_vector args(m); - flatten_inter(a, args); - flatten_inter(b, args); - - std::stable_sort(args.data(), args.data() + args.size(), - [](expr* x, expr* y) { return x->get_id() < y->get_id(); }); - - unsigned j = 0; - for (unsigned i = 0; i < args.size(); ++i) { - if (j > 0 && args.get(i) == args.get(j - 1)) - continue; - if (re().is_full_seq(args.get(i))) - continue; // skip universal - if (re().is_empty(args.get(i))) - return expr_ref(args.get(i), m); // empty absorbs - args.set(j++, args.get(i)); - } - args.shrink(j); - - if (args.empty()) - return expr_ref(re().mk_full_seq(a->get_sort()), m); - - // Special: r* ∩ .+ = r+ - expr* star_body = nullptr; - int star_idx = -1, dotplus_idx = -1; - for (unsigned i = 0; i < args.size(); ++i) { - if (re().is_star(args.get(i), star_body)) - star_idx = i; - if (re().is_dot_plus(args.get(i))) - dotplus_idx = i; - } - if (star_idx >= 0 && dotplus_idx >= 0 && star_body) { - args.set(star_idx, re().mk_plus(star_body)); - // Remove .+ by shifting - for (unsigned i = dotplus_idx; i + 1 < args.size(); ++i) - args.set(i, args.get(i + 1)); - args.shrink(args.size() - 1); - if (args.size() == 1) - return expr_ref(args.get(0), m); - } - - return mk_inter_from_sorted(args); + // Base case: build raw intersection + return expr_ref(re().mk_inter(a, b), m); } expr_ref derive::mk_concat(expr* a, expr* b) { @@ -1067,16 +882,18 @@ namespace seq { } expr_ref derive::mk_complement(expr* a) { - // Check op cache + // Check path-aware op cache + expr* pe = get_path_expr(); expr* cached = nullptr; - if (m_complement_cache.find(a, cached)) + if (m_complement_cache.find(a, pe, cached)) return expr_ref(cached, m); expr_ref result = mk_complement_core(a); // Store in cache - m_complement_cache.insert(a, result); + m_complement_cache.insert(a, pe, result); m_trail.push_back(a); + m_trail.push_back(pe); m_trail.push_back(result); return result; } @@ -1093,17 +910,13 @@ namespace seq { if (re().is_full_seq(a)) return expr_ref(re().mk_empty(a->get_sort()), m); - // Push through ITE: ~(ite(c, t, e)) → ite(c, ~t, ~e) - // Only distribute if t or e is empty, full, or a complement - // (avoids exponential blowup on complex ITE trees) + // Push through ITE with path pruning: ~(ite(c, t, e)) → ite(c, ~t, ~e) expr* c, * t, * e; if (m.is_ite(a, c, t, e)) { - if (re().is_empty(t) || re().is_full_seq(t) || re().is_complement(t) || - re().is_empty(e) || re().is_full_seq(e) || re().is_complement(e)) { - expr_ref ct = mk_complement(t); - expr_ref ce = mk_complement(e); - return mk_ite(c, ct, ce); - } + auto comp_op = [&](expr* x) { return mk_complement(x); }; + expr_ref r = apply_ite(c, t, e, comp_op); + if (r) return r; + return expr_ref(re().mk_full_seq(a->get_sort()), m); } // ~ε → .+ @@ -1122,88 +935,13 @@ namespace seq { return expr_ref(t, m); if (m.is_false(c)) return expr_ref(e, m); - lbool cond_val = eval_cond(c); + // Use path-aware condition evaluation + lbool cond_val = eval_path_cond(c); if (cond_val == l_true) return expr_ref(t, m); if (cond_val == l_false) return expr_ref(e, m); return expr_ref(m.mk_ite(c, t, e), m); } - // ------------------------------------------------------- - // ACI normalization helpers - // ------------------------------------------------------- - - void derive::flatten_union(expr* r, expr_ref_vector& args) { - expr* a = nullptr, * b = nullptr; - if (re().is_union(r, a, b)) { - flatten_union(a, args); - flatten_union(b, args); - } - else { - args.push_back(r); - } - } - - void derive::flatten_inter(expr* r, expr_ref_vector& args) { - expr* a = nullptr, * b = nullptr; - if (re().is_intersection(r, a, b)) { - flatten_inter(a, args); - flatten_inter(b, args); - } - else { - args.push_back(r); - } - } - - expr_ref derive::mk_union_from_sorted(expr_ref_vector& args) { - if (args.empty()) { - UNREACHABLE(); - return expr_ref(m.mk_true(), m); - } - // Remove subsumed elements: if a ⊆ b, drop a from union - for (unsigned i = 0; i < args.size(); ++i) { - for (unsigned j = 0; j < args.size(); ++j) { - if (i != j && args.get(i) && args.get(j) && is_subset(args.get(i), args.get(j))) { - args[i] = args.back(); - args.pop_back(); - --i; - break; - } - } - } - if (args.size() == 1) - return expr_ref(args.get(0), m); - // Build right-associated union - expr_ref result(args.back(), m); - for (unsigned i = args.size() - 1; i-- > 0; ) - result = expr_ref(re().mk_union(args.get(i), result), m); - return result; - } - - expr_ref derive::mk_inter_from_sorted(expr_ref_vector& args) { - if (args.empty()) { - UNREACHABLE(); - return expr_ref(m.mk_true(), m); - } - // Remove subsuming elements: if a ⊆ b, drop b from intersection - for (unsigned i = 0; i < args.size(); ++i) { - for (unsigned j = 0; j < args.size(); ++j) { - if (i != j && args.get(i) && args.get(j) && is_subset(args.get(i), args.get(j))) { - args[j] = args.back(); - args.pop_back(); - if (j < i) --i; - --j; - } - } - } - if (args.size() == 1) - return expr_ref(args.get(0), m); - // Build right-associated intersection - expr_ref result(args.back(), m); - for (unsigned i = args.size() - 1; i-- > 0; ) - result = expr_ref(re().mk_inter(args.get(i), result), m); - return result; - } - // ------------------------------------------------------- // Distribute concat through ITE/union structure of derivative // ------------------------------------------------------- @@ -1249,7 +987,239 @@ namespace seq { } // ------------------------------------------------------- - // Post-processing: simplify ITE conditions w.r.t. m_ele + // Path management for inline pruning + // ------------------------------------------------------- + + lbool derive::push(expr* c, bool sign) { + // Check if (c, sign) is already determined by the path + lbool cv = eval_path_cond(c); + if (cv == l_true && !sign) return l_true; // c implied true, push(c,false) is redundant + if (cv == l_false && sign) return l_true; // c implied false, push(c,true) is redundant + if (cv == l_true && sign) return l_false; // c implied true, push(c,true) contradicts + if (cv == l_false && !sign) return l_false; // c implied false, push(c,false) contradicts + + // Save current state + unsigned saved_path_sz = m_path.size(); + intervals_t saved_intervals(m_intervals); + expr* saved_path_expr = m_path_expr; + + // Push atoms onto path and check for contradiction or implication + lbool atoms_result = push_path_atoms(c, sign); + if (atoms_result == l_false) { + m_path.shrink(saved_path_sz); + m_intervals = saved_intervals; + return l_false; + } + + // Update intervals + lbool intervals_result = push_intervals_impl(c, sign); + if (intervals_result == l_false) { + m_path.shrink(saved_path_sz); + m_intervals = saved_intervals; + return l_false; + } + + // If both determined the atom is implied, no need to actually push + if (atoms_result == l_true && intervals_result == l_true) { + m_path.shrink(saved_path_sz); + m_intervals = saved_intervals; + return l_true; + } + + // Update path expression + expr* atom = sign ? m.mk_not(c) : c; + m_path_expr = m.mk_and(m_path_expr, atom); + m_trail.push_back(m_path_expr); + + // Commit: save state for pop() + m_path_stack.push_back({ saved_path_sz, std::move(saved_intervals), saved_path_expr }); + return l_undef; + } + + void derive::pop() { + SASSERT(!m_path_stack.empty()); + auto const& saved = m_path_stack.back(); + m_path.shrink(saved.path_sz); + m_intervals = saved.intervals; + m_path_expr = saved.path_expr; + m_path_stack.pop_back(); + } + + // Binary apply_ite: hoist ite(c, t, e) op r with path pruning + expr_ref derive::apply_ite(expr* c, expr* t, expr* e, expr* r, std::function apply_op) { + expr_ref then_br(m), else_br(m); + switch (push(c, false)) { + case l_true: return apply_op(t, r); + case l_undef: then_br = apply_op(t, r); pop(); break; + case l_false: break; + } + switch (push(c, true)) { + case l_true: return apply_op(e, r); + case l_undef: else_br = apply_op(e, r); pop(); break; + case l_false: break; + } + if (then_br && else_br) return mk_ite(c, then_br, else_br); + if (then_br) return then_br; + if (else_br) return else_br; + return expr_ref(nullptr, m); + } + + // Same-condition merge: ite(c, t1, e1) op ite(c, t2, e2) → ite(c, t1 op t2, e1 op e2) + expr_ref derive::apply_ite(expr* c, expr* t1, expr* e1, expr* t2, expr* e2, std::function apply_op) { + expr_ref then_br(m), else_br(m); + switch (push(c, false)) { + case l_true: return apply_op(t1, t2); + case l_undef: then_br = apply_op(t1, t2); pop(); break; + case l_false: break; + } + switch (push(c, true)) { + case l_true: return apply_op(e1, e2); + case l_undef: else_br = apply_op(e1, e2); pop(); break; + case l_false: break; + } + if (then_br && else_br) return mk_ite(c, then_br, else_br); + if (then_br) return then_br; + if (else_br) return else_br; + return expr_ref(nullptr, m); + } + + // Unary apply_ite: hoist ite(c, t, e) through unary op with path pruning + expr_ref derive::apply_ite(expr* c, expr* t, expr* e, std::function apply_op) { + expr_ref then_br(m), else_br(m); + switch (push(c, false)) { + case l_true: return apply_op(t); + case l_undef: then_br = apply_op(t); pop(); break; + case l_false: break; + } + switch (push(c, true)) { + case l_true: return apply_op(e); + case l_undef: else_br = apply_op(e); pop(); break; + case l_false: break; + } + if (then_br && else_br) return mk_ite(c, then_br, else_br); + if (then_br) return then_br; + if (else_br) return else_br; + return expr_ref(nullptr, m); + } + + // Push signed atoms onto m_path. Returns l_true if implied, l_false if contradicted, l_undef if pushed. + lbool derive::push_path_atoms(expr* c, bool sign) { + // Check if (c, sign) is already determined by the path + for (auto const& [cond, csign] : m_path) { + if (c == cond) + return csign == sign ? l_true : l_false; + expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr; + if (!csign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) { + if (m.is_value(lhs1)) std::swap(lhs1, rhs1); + if (m.is_value(lhs2)) std::swap(lhs2, rhs2); + if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2)) + return sign ? l_true : l_false; + } + } + + // Composite: conjunction assumed true, or disjunction assumed false + if (!sign && m.is_and(c)) { + bool all_implied = true; + for (expr* arg : *to_app(c)) { + lbool r = push_path_atoms(arg, false); + if (r == l_false) return l_false; + if (r == l_undef) all_implied = false; + } + return all_implied ? l_true : l_undef; + } + if (sign && m.is_or(c)) { + bool all_implied = true; + for (expr* arg : *to_app(c)) { + lbool r = push_path_atoms(arg, true); + if (r == l_false) return l_false; + if (r == l_undef) all_implied = false; + } + return all_implied ? l_true : l_undef; + } + + // Atomic: push onto path + m_path.push_back({ c, sign }); + return l_undef; + } + + // Update m_intervals based on the condition. Returns l_true if implied, l_false if inconsistent, l_undef if pushed. + lbool derive::push_intervals_impl(expr* c, bool sign) { + unsigned lo = 0, hi = 0; + bool negated = false; + if (m_util.is_char_const_range(m_ele, c, lo, hi, negated)) { + bool effective_neg = (negated != sign); + if (!effective_neg) { + if (lo <= hi) { + // Check if current intervals already imply [lo,hi] + bool already_subset = true; + for (auto const& [ilo, ihi] : m_intervals) { + if (ilo < lo || ihi > hi) { already_subset = false; break; } + } + if (already_subset) return l_true; + intersect_intervals(lo, hi, m_intervals); + } else { + m_intervals.reset(); + } + } else { + if (lo <= hi) { + // Check if current intervals already exclude [lo,hi] + bool already_excluded = true; + for (auto const& [ilo, ihi] : m_intervals) { + if (ilo <= hi && ihi >= lo) { already_excluded = false; break; } + } + if (already_excluded) return l_true; + exclude_interval(lo, hi, m_intervals, u().max_char()); + } + } + } else if (!sign && m.is_and(c)) { + bool all_implied = true; + for (expr* arg : *to_app(c)) { + lbool r = push_intervals_impl(arg, false); + if (r == l_false) return l_false; + if (r == l_undef) all_implied = false; + } + return all_implied ? l_true : (m_intervals.empty() ? l_false : l_undef); + } else if (sign && m.is_or(c)) { + bool all_implied = true; + for (expr* arg : *to_app(c)) { + lbool r = push_intervals_impl(arg, true); + if (r == l_false) return l_false; + if (r == l_undef) all_implied = false; + } + return all_implied ? l_true : (m_intervals.empty() ? l_false : l_undef); + } + return m_intervals.empty() ? l_false : l_undef; + } + + // Evaluate a condition against the current path and intervals. + lbool derive::eval_path_cond(expr* c) { + // First try static evaluation (concrete m_ele, tautologies) + lbool v = eval_cond(c); + if (v != l_undef) return v; + + // Check against path atoms + for (auto const& [cond, sign] : m_path) { + if (c == cond) + return sign ? l_false : l_true; + } + + // Check against intervals + v = eval_range_cond(m_intervals, c); + if (v != l_undef) return v; + + // Check pred_implies from path atoms + for (auto const& [cond, sign] : m_path) { + if (pred_implies(sign, cond, false, c)) + return l_true; + if (pred_implies(sign, cond, true, c)) + return l_false; + } + + return l_undef; + } + + // ------------------------------------------------------- + // Condition evaluation helpers // ------------------------------------------------------- lbool derive::eval_cond(expr* cond) { @@ -1310,84 +1280,31 @@ namespace seq { return l_undef; } - // Evaluate a single atomic condition (char_le or equality) against path constraints. - // Returns l_true if path implies (c, !sign), l_false if path contradicts (c, !sign), l_undef otherwise. - - lbool derive::push_path(path_t& path, expr* c, bool sign) { - // Check if (c, sign) is already determined by the path - for (auto const& [cond, csign] : path) { - if (c == cond) - return csign == sign ? l_true : l_false; - - expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr; - if (!csign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) { - if (m.is_value(lhs1)) std::swap(lhs1, rhs1); - if (m.is_value(lhs2)) std::swap(lhs2, rhs2); - if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2)) - return sign ? l_true : l_false; - } - } - - // Composite case: conjunction (sign=false) or disjunction (sign=true) - if (!sign && m.is_and(c)) { - auto sz = path.size(); - lbool r = l_true; - for (expr* arg : *to_app(c)) { - lbool v = push_path(path, arg, false); - if (v == l_false) { path.shrink(sz); return l_false; } - if (v == l_undef) r = l_undef; - } - if (r == l_true) path.shrink(sz); - return r; - } - if (sign && m.is_or(c)) { - auto sz = path.size(); - lbool r = l_true; - for (expr* arg : *to_app(c)) { - lbool v = push_path(path, arg, true); - if (v == l_false) { path.shrink(sz); return l_false; } - if (v == l_undef) r = l_undef; - } - if (r == l_true) path.shrink(sz); - return r; - } - - // Atomic case: not determined, push onto path - path.push_back({ c, sign }); - return l_undef; - } - - lbool derive::push_intervals(intervals_t& intervals, expr* c, bool sign) { - // First check if the condition is already determined by current intervals - lbool range_val = eval_range_cond(intervals, c); - if (range_val != l_undef) - return sign ? ~range_val : range_val; - - // Not determined — modify intervals + lbool derive::eval_range_cond(intervals_t const& intervals, expr* c) { + if (intervals.empty()) + return l_false; unsigned lo = 0, hi = 0; bool negated = false; - if (m_util.is_char_const_range(m_ele, c, lo, hi, negated)) { - bool effective_neg = (negated != sign); - if (!effective_neg) { - if (lo > hi) - return l_false; - intersect_intervals(lo, hi, intervals); - } else { - if (lo <= hi) - exclude_interval(lo, hi, intervals, u().max_char()); - } - } else if (!sign && m.is_and(c)) { - auto saved = intervals; - for (expr* arg : *to_app(c)) { - lbool v = push_intervals(intervals, arg, false); - if (v == l_false) { intervals = saved; return l_false; } - } - } else if (sign && m.is_or(c)) { - auto saved = intervals; - for (expr* arg : *to_app(c)) { - lbool v = push_intervals(intervals, arg, true); - if (v == l_false) { intervals = saved; return l_false; } - } + if (!m_util.is_char_const_range(m_ele, c, lo, hi, negated)) + return l_undef; + if (lo > hi) { + return negated ? l_true : l_false; + } + // Check if [lo, hi] overlaps with intervals and/or contains all intervals + bool any_overlap = false; + bool all_contained = true; + for (auto const& [r_lo, r_hi] : intervals) { + if (std::max(r_lo, lo) <= std::min(r_hi, hi)) + any_overlap = true; + if (r_lo < lo || r_hi > hi) + all_contained = false; + } + if (!negated) { + if (!any_overlap) return l_false; + if (all_contained) return l_true; + } else { + if (all_contained) return l_false; + if (!any_overlap) return l_true; } return l_undef; } @@ -1414,120 +1331,4 @@ namespace seq { ranges.append(right); } - lbool derive::eval_range_cond(intervals_t const& intervals, expr* c) { - if (intervals.empty()) - return l_false; - unsigned lo = 0, hi = 0; - bool negated = false; - if (!m_util.is_char_const_range(m_ele, c, lo, hi, negated)) - return l_undef; - if (lo > hi) { - // c asserts x in empty range or c asserts x NOT in empty range - return negated ? l_true : l_false; - } - // Check if [lo, hi] overlaps with intervals and/or contains all intervals - bool any_overlap = false; - bool all_contained = true; // all intervals ⊆ [lo, hi] - for (auto const& [r_lo, r_hi] : intervals) { - if (std::max(r_lo, lo) <= std::min(r_hi, hi)) - any_overlap = true; - if (r_lo < lo || r_hi > hi) - all_contained = false; - } - if (!negated) { - // c asserts x ∈ [lo, hi] - if (!any_overlap) return l_false; - if (all_contained) return l_true; - } else { - // c asserts x ∉ [lo, hi] - if (all_contained) return l_false; // all values are in [lo,hi], so ¬(x∈[lo,hi]) is false - if (!any_overlap) return l_true; // no values are in [lo,hi], so ¬(x∈[lo,hi]) is true - } - return l_undef; - } - - std::pair derive::simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e, unsigned depth) { - auto sz = path.size(); - auto saved_intervals = intervals; - - // Push c with sign=false (then-branch: c is true) - lbool path_val = push_path(path, c, false); - if (path_val != l_undef) { - path.shrink(sz); - expr_ref r = simplify_ite_rec(path, intervals, path_val == l_true ? t : e, depth); - return { r, r }; - } - - lbool intv_val = push_intervals(intervals, c, false); - if (intv_val != l_undef) { - path.shrink(sz); - intervals = saved_intervals; - expr_ref r = simplify_ite_rec(path, intervals, intv_val == l_true ? t : e, depth); - return { r, r }; - } - - // Then-branch increases depth - expr_ref st = simplify_ite_rec(path, intervals, t, depth + 1); - path.shrink(sz); - intervals = saved_intervals; - - // Push c with sign=true (else-branch: c is false) - path_val = push_path(path, c, true); - if (path_val != l_undef) { - path.shrink(sz); - expr_ref r = simplify_ite_rec(path, intervals, path_val == l_true ? e : t, depth); - return { r, r }; - } - - intv_val = push_intervals(intervals, c, true); - if (intv_val != l_undef) { - path.shrink(sz); - intervals = saved_intervals; - expr_ref r = simplify_ite_rec(path, intervals, intv_val == l_true ? e : t, depth); - return { r, r }; - } - - // Else-branch does NOT increase depth (covers disjoint cases) - expr_ref se = simplify_ite_rec(path, intervals, e, depth); - path.shrink(sz); - intervals = saved_intervals; - return { st, se }; - } - - expr_ref derive::simplify_ite(expr* d) { - expr* c, * t, * e; - if (!m.is_ite(d, c, t, e)) - return expr_ref(d, m); - - lbool cond_val = eval_cond(c); - if (cond_val == l_true) return simplify_ite(t); - if (cond_val == l_false) return simplify_ite(e); - - path_t path; - intervals_t intervals; - intervals.push_back(std::make_pair(0u, u().max_char())); - auto [st, se] = simplify_ite_rec(path, intervals, c, t, e, 0); - return mk_ite(c, st, se); - } - - expr_ref derive::simplify_ite_rec(path_t& path, intervals_t& intervals, expr* d, unsigned depth) { - expr* c, * t, * e; - if (!m.is_ite(d, c, t, e)) - return expr_ref(d, m); - - // Depth limit reached — return without further simplification - if (depth >= m_max_simp_depth) - return expr_ref(d, m); - - // Try to evaluate c directly (handles trivially true/false conditions) - lbool cond_val = eval_cond(c); - if (cond_val == l_true) return simplify_ite_rec(path, intervals, t, depth); - if (cond_val == l_false) return simplify_ite_rec(path, intervals, e, depth); - - // Cannot simplify c: recurse into branches with extended paths - // push_path and push_intervals will check subsumption/contradiction - auto [st, se] = simplify_ite_rec(path, intervals, c, t, e, depth); - return mk_ite(c, st, se); - } - } \ No newline at end of file diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index b2caa394b..d61a02ee8 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -29,6 +29,10 @@ Authors: #include "ast/array_decl_plugin.h" #include "ast/rewriter/bool_rewriter.h" #include "util/obj_pair_hashtable.h" +#include "util/obj_triple_hashtable.h" +#include + +class seq_rewriter; namespace seq { @@ -53,6 +57,7 @@ namespace seq { seq_util m_util; arith_util m_autil; bool_rewriter m_br; + seq_rewriter& m_re; // Cache: maps (ele, regex) pair to its derivative obj_pair_map m_cache; @@ -60,33 +65,52 @@ namespace seq { expr_ref_vector m_trail; // pin cached results // Op cache for ITE-hoisting operations (union, inter, concat, complement) - obj_pair_map m_union_cache; - obj_pair_map m_inter_cache; + // Path-aware caches: key is (a, b, path_expr) for binary ops, (a, path_expr) for complement + obj_triple_map m_union_cache; + obj_triple_map m_inter_cache; obj_pair_map m_concat_cache; - obj_map m_complement_cache; + obj_pair_map m_complement_cache; // Depth limiting unsigned m_depth { 0 }; static const unsigned m_max_depth = 512; - // Simplify ITE recursion depth limit - unsigned m_simp_depth { 0 }; - static const unsigned m_max_simp_depth = 8; - - // ITE combine depth limit (bounds exponential blowup in BDD merge) - unsigned m_inter_hoist_depth { 0 }; - static const unsigned m_max_inter_hoist_depth = 4; - - // Depth limit for one-sided union hoisting (global budget per derivative call) - unsigned m_union_hoist_budget { 0 }; - static const unsigned m_max_union_hoist_budget = 32; - seq_util::rex& re() { return m_util.re; } seq_util& u() { return m_util; } // The element (character) for the current derivative computation expr_ref m_ele; + // Path state for inline pruning during mk_inter/mk_union/mk_complement + using intervals_t = svector>; + + // Path: vector of signed atoms + svector> m_path; + // Intervals: feasible character ranges under current path + intervals_t m_intervals; + // Stack of saved states for push/pop + struct path_save { unsigned path_sz; intervals_t intervals; expr* path_expr; }; + svector m_path_stack; + // Boolean expression encoding of current path (for cache keys) + expr_ref m_path_expr; + + // Path interface + lbool push(expr* c, bool sign); // l_true: implied, l_undef: pushed (must pop), l_false: contradicts + void pop(); // restore state to matching push + expr* get_path_expr() { return m_path_expr; } + + // Hoist ITE: apply_op through ite(c, t, e) with path pruning + expr_ref apply_ite(expr* c, expr* t, expr* e, expr* r, std::function apply_op); + expr_ref apply_ite(expr* c, expr* t1, expr* e1, expr* t2, expr* e2, std::function apply_op); + expr_ref apply_ite(expr* c, expr* t, expr* e, std::function apply_op); + + // Evaluate a condition against the current path/intervals + lbool eval_path_cond(expr* c); + + // Internal helpers for push + lbool push_path_atoms(expr* c, bool sign); + lbool push_intervals_impl(expr* c, bool sign); + // Core derivative computation expr_ref derive_rec(expr* r); expr_ref derive_core(expr* r); @@ -99,8 +123,10 @@ namespace seq { // Nullable check: returns a Boolean expression expr_ref is_nullable(expr* r); - // Smart constructors with simplification and ACI canonicalization + // Smart constructors with path-aware simplification and ACI canonicalization expr_ref mk_union(expr* a, expr* b); + void flatten_union(expr* e, expr_ref_vector& args); + bool is_subset(expr* a, expr* b); expr_ref mk_union_core(expr* a, expr* b); expr_ref mk_inter(expr* a, expr* b); expr_ref mk_inter_core(expr* a, expr* b); @@ -109,12 +135,6 @@ namespace seq { expr_ref mk_complement_core(expr* a); expr_ref mk_ite(expr* c, expr* t, expr* e); - // Flatten and sort for ACI normal form - void flatten_union(expr* r, expr_ref_vector& args); - void flatten_inter(expr* r, expr_ref_vector& args); - expr_ref mk_union_from_sorted(expr_ref_vector& args); - expr_ref mk_inter_from_sorted(expr_ref_vector& args); - // Distribute concatenation through ITE/union in derivative expr_ref mk_deriv_concat(expr* d, expr* tail); expr_ref mk_deriv_concat_core(expr* d, expr* tail); @@ -122,28 +142,15 @@ namespace seq { // Extract head character and tail from a sequence expression bool get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl); - // Lightweight subsumption check: returns true if L(a) ⊆ L(b) - bool is_subset(expr* a, expr* b); - // Predicate implication for character range conditions. - // Returns true if condition a implies condition b. + bool pred_implies(bool sign_a, expr* a, bool sign_b, expr* b); bool pred_implies(expr* a, expr* b); bool extract_char_range(expr* cond, unsigned& lo, unsigned& hi); - bool is_char_cond(expr* c); // Normalize reverse(r) by pushing reverse inward expr_ref normalize_reverse(expr* r); - // Path of signed conditions for ITE simplification - using path_t = svector>; - using intervals_t = svector>; - - // Simplify ITE conditions w.r.t. m_ele and path knowledge - expr_ref simplify_ite(expr* d); - expr_ref simplify_ite_rec(path_t& path, intervals_t& intervals, expr* d, unsigned depth); - std::pair simplify_ite_rec(path_t& path, intervals_t& intervals, expr* c, expr* t, expr* e, unsigned depth); - lbool push_path(path_t& path, expr* c, bool sign); - lbool push_intervals(intervals_t& intervals, expr* c, bool sign); + // Condition evaluation helpers lbool eval_cond(expr* cond); lbool eval_range_cond(intervals_t const& intervals, expr* c); static void intersect_intervals(unsigned lo, unsigned hi, intervals_t& ranges); @@ -157,7 +164,7 @@ namespace seq { void reset_op_caches(); public: - derive(ast_manager& m); + derive(ast_manager& m, seq_rewriter& re); /** * Compute the derivative of regex r with respect to element ele. @@ -171,11 +178,6 @@ namespace seq { */ expr_ref operator()(expr* r); - /** - * Lightweight structural subsumption check: L(a) ⊆ L(b)? - * Returns true only when provable structurally. - */ - bool subsumes(expr* larger, expr* smaller) { return is_subset(smaller, larger); } }; } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 70b382457..9c3ba77d3 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -128,6 +128,8 @@ class seq_rewriter { void insert(decl_kind op, expr* a, expr* b, expr* c, expr* r); }; + friend class seq::derive; + seq_util m_util; arith_util m_autil; bool_rewriter m_br; @@ -332,7 +334,7 @@ class seq_rewriter { public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): - m_util(m), m_autil(m), m_br(m, p), m_derive(m), // m_re2aut(m), + m_util(m), m_autil(m), m_br(m, p), m_derive(m, *this), // m_re2aut(m), m_op_cache(m), m_es(m), m_lhs(m), m_rhs(m), m_coalesce_chars(true) { } From 143e5b9ffdb0326e6c6ec21791e8a5a2f1206fa3 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Mon, 8 Jun 2026 23:07:05 -0700 Subject: [PATCH 24/32] updates to derive --- src/ast/rewriter/seq_derive.cpp | 270 ++++++++++++++++---------------- src/ast/rewriter/seq_derive.h | 13 +- 2 files changed, 144 insertions(+), 139 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 3594c69f2..518f238dd 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -78,6 +78,7 @@ namespace seq { m_path.reset(); m_intervals.reset(); m_intervals.push_back(std::make_pair(0u, u().max_char())); + m_intervals_start = 0; m_path_expr = m.mk_true(); expr_ref result = derive_rec(r); m_ele = nullptr; @@ -664,6 +665,7 @@ namespace seq { bool derive::is_subset(expr* a, expr* b) { if (a == b) return true; if (re().is_empty(a)) return true; + if (re().is_full_seq(a)) return re().is_full_seq(b); if (re().is_full_seq(b)) return true; expr* b1 = nullptr; @@ -721,45 +723,14 @@ namespace seq { if (a->get_id() > b->get_id()) std::swap(a, b); - // Identity / annihilator - if (a == b) return expr_ref(a, m); - if (re().is_empty(a)) return expr_ref(b, m); - if (re().is_empty(b)) return expr_ref(a, m); - if (re().is_full_seq(a)) return expr_ref(a, m); - if (re().is_full_seq(b)) return expr_ref(b, m); - - // Complement absorption: r ∪ ~r = Σ* - expr* c = nullptr; - if (re().is_complement(a, c) && c == b) - return expr_ref(re().mk_full_seq(a->get_sort()), m); - if (re().is_complement(b, c) && c == a) - return expr_ref(re().mk_full_seq(a->get_sort()), m); + // Subsumption covers: a==b, empty(a), empty(b), full(a), full(b), complement absorption, etc. + if (is_subset(a, b)) return expr_ref(b, m); + if (is_subset(b, a)) return expr_ref(a, m); // ITE handling with path pruning - expr *c1, *t1, *e1, *c2, *t2, *e2; auto union_op = [&](expr* x, expr* y) { return mk_union(x, y); }; - - if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) { - expr_ref r(m); - if (c1 == c2) - r = apply_ite(c1, t1, e1, t2, e2, union_op); - else - r = apply_ite(c1, t1, e1, b, union_op); - if (r) return r; - return expr_ref(re().mk_empty(a->get_sort()), m); - } - if (m_path_stack.size() < 8) { - if (m.is_ite(a, c1, t1, e1)) { - expr_ref r = apply_ite(c1, t1, e1, b, union_op); - if (r) return r; - return expr_ref(re().mk_empty(a->get_sort()), m); - } - if (m.is_ite(b, c2, t2, e2)) { - expr_ref r = apply_ite(c2, t2, e2, a, union_op); - if (r) return r; - return expr_ref(re().mk_empty(a->get_sort()), m); - } - } + expr_ref r = hoist_ite(a, b, union_op); + if (r) return r; // Prefix factoring: a·x ∪ a·y = a·(x ∪ y) expr *a1, *a2, *b1, *b2; @@ -768,24 +739,34 @@ namespace seq { return mk_deriv_concat(a1, tail); } - // star absorbs epsilon: r* ∪ ε = r* - if (re().is_star(a) && re().is_epsilon(b)) return expr_ref(a, m); - if (re().is_star(b) && re().is_epsilon(a)) return expr_ref(b, m); - - // Subsumption: a ∪ b = b if a ⊆ b, a ∪ b = a if b ⊆ a - if (is_subset(a, b)) return expr_ref(b, m); - if (is_subset(b, a)) return expr_ref(a, m); - - // ACI normalization: flatten, sort by id, deduplicate + // ACI normalization: flatten, sort by id, deduplicate/subsume expr_ref_vector args(m); flatten_union(a, args); flatten_union(b, args); std::sort(args.data(), args.data() + args.size(), [](expr* x, expr* y) { return x->get_id() < y->get_id(); }); - expr_ref result(args.get(0), m); - for (unsigned i = 1; i < args.size(); ++i) { - if (args.get(i) != args.get(i - 1)) - result = expr_ref(re().mk_union(result, args.get(i)), m); + // Remove subsumed elements: if args[i] ⊆ args[j], drop args[i] + unsigned j = 0; + for (unsigned i = 0; i < args.size(); ++i) { + bool subsumed = false; + for (unsigned k = 0; k < j; ++k) { + if (is_subset(args.get(i), args.get(k))) { subsumed = true; break; } + } + if (!subsumed) { + // Check if new element subsumes any previously kept + unsigned new_j = 0; + for (unsigned k = 0; k < j; ++k) { + if (!is_subset(args.get(k), args.get(i))) + args[new_j++] = args.get(k); + } + args[new_j++] = args.get(i); + j = new_j; + } } + if (j == 0) + return expr_ref(re().mk_empty(a->get_sort()), m); + expr_ref result(args.get(0), m); + for (unsigned i = 1; i < j; ++i) + result = expr_ref(re().mk_union(result, args.get(i)), m); return result; } @@ -812,12 +793,9 @@ namespace seq { if (a->get_id() > b->get_id()) std::swap(a, b); - // Identity / annihilator - if (a == b) return expr_ref(a, m); - if (re().is_empty(a)) return expr_ref(a, m); - if (re().is_empty(b)) return expr_ref(b, m); - if (re().is_full_seq(a)) return expr_ref(b, m); - if (re().is_full_seq(b)) return expr_ref(a, m); + // Subsumption covers: a==b, empty(a), empty(b), full(a), full(b), etc. + if (is_subset(a, b)) return expr_ref(a, m); + if (is_subset(b, a)) return expr_ref(b, m); // Complement absorption: r ∩ ~r = ∅ expr* c = nullptr; @@ -827,30 +805,9 @@ namespace seq { return expr_ref(re().mk_empty(a->get_sort()), m); // ITE handling with path pruning - expr *c1, *t1, *e1, *c2, *t2, *e2; auto inter_op = [&](expr* x, expr* y) { return mk_inter(x, y); }; - - if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) { - expr_ref r(m); - if (c1 == c2) - r = apply_ite(c1, t1, e1, t2, e2, inter_op); - else - r = apply_ite(c1, t1, e1, b, inter_op); - if (r) return r; - return expr_ref(re().mk_empty(a->get_sort()), m); - } - if (m_path_stack.size() < 8) { - if (m.is_ite(a, c1, t1, e1)) { - expr_ref r = apply_ite(c1, t1, e1, b, inter_op); - if (r) return r; - return expr_ref(re().mk_empty(a->get_sort()), m); - } - if (m.is_ite(b, c2, t2, e2)) { - expr_ref r = apply_ite(c2, t2, e2, a, inter_op); - if (r) return r; - return expr_ref(re().mk_empty(a->get_sort()), m); - } - } + expr_ref r = hoist_ite(a, b, inter_op); + if (r) return r; // Base case: build raw intersection return expr_ref(re().mk_inter(a, b), m); @@ -1000,14 +957,16 @@ namespace seq { // Save current state unsigned saved_path_sz = m_path.size(); - intervals_t saved_intervals(m_intervals); + unsigned saved_intervals_sz = m_intervals.size(); + unsigned saved_intervals_start = m_intervals_start; expr* saved_path_expr = m_path_expr; // Push atoms onto path and check for contradiction or implication lbool atoms_result = push_path_atoms(c, sign); if (atoms_result == l_false) { m_path.shrink(saved_path_sz); - m_intervals = saved_intervals; + m_intervals.shrink(saved_intervals_sz); + m_intervals_start = saved_intervals_start; return l_false; } @@ -1015,14 +974,16 @@ namespace seq { lbool intervals_result = push_intervals_impl(c, sign); if (intervals_result == l_false) { m_path.shrink(saved_path_sz); - m_intervals = saved_intervals; + m_intervals.shrink(saved_intervals_sz); + m_intervals_start = saved_intervals_start; return l_false; } - // If both determined the atom is implied, no need to actually push - if (atoms_result == l_true && intervals_result == l_true) { + // If either determined the atom is implied, no need to actually push + if (atoms_result == l_true || intervals_result == l_true) { m_path.shrink(saved_path_sz); - m_intervals = saved_intervals; + m_intervals.shrink(saved_intervals_sz); + m_intervals_start = saved_intervals_start; return l_true; } @@ -1032,7 +993,7 @@ namespace seq { m_trail.push_back(m_path_expr); // Commit: save state for pop() - m_path_stack.push_back({ saved_path_sz, std::move(saved_intervals), saved_path_expr }); + m_path_stack.push_back({ saved_path_sz, saved_intervals_sz, saved_intervals_start, saved_path_expr }); return l_undef; } @@ -1040,7 +1001,8 @@ namespace seq { SASSERT(!m_path_stack.empty()); auto const& saved = m_path_stack.back(); m_path.shrink(saved.path_sz); - m_intervals = saved.intervals; + m_intervals.shrink(saved.intervals_sz); + m_intervals_start = saved.intervals_start; m_path_expr = saved.path_expr; m_path_stack.pop_back(); } @@ -1102,6 +1064,34 @@ namespace seq { return expr_ref(nullptr, m); } + // Common ITE dispatch for binary ops (union/inter). + // Returns nullptr if neither a nor b is ITE (or depth limit reached). + expr_ref derive::hoist_ite(expr* a, expr* b, std::function apply_op) { + expr *c1, *t1, *e1, *c2, *t2, *e2; + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) { + expr_ref r(m); + if (c1 == c2) + r = apply_ite(c1, t1, e1, t2, e2, apply_op); + else + r = apply_ite(c1, t1, e1, b, apply_op); + if (r) return r; + return expr_ref(re().mk_empty(a->get_sort()), m); + } + if (m_path_stack.size() < 8) { + if (m.is_ite(a, c1, t1, e1)) { + expr_ref r = apply_ite(c1, t1, e1, b, apply_op); + if (r) return r; + return expr_ref(re().mk_empty(a->get_sort()), m); + } + if (m.is_ite(b, c2, t2, e2)) { + expr_ref r = apply_ite(c2, t2, e2, a, apply_op); + if (r) return r; + return expr_ref(re().mk_empty(a->get_sort()), m); + } + } + return expr_ref(nullptr, m); + } + // Push signed atoms onto m_path. Returns l_true if implied, l_false if contradicted, l_undef if pushed. lbool derive::push_path_atoms(expr* c, bool sign) { // Check if (c, sign) is already determined by the path @@ -1118,19 +1108,10 @@ namespace seq { } // Composite: conjunction assumed true, or disjunction assumed false - if (!sign && m.is_and(c)) { + if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) { bool all_implied = true; for (expr* arg : *to_app(c)) { - lbool r = push_path_atoms(arg, false); - if (r == l_false) return l_false; - if (r == l_undef) all_implied = false; - } - return all_implied ? l_true : l_undef; - } - if (sign && m.is_or(c)) { - bool all_implied = true; - for (expr* arg : *to_app(c)) { - lbool r = push_path_atoms(arg, true); + lbool r = push_path_atoms(arg, sign); if (r == l_false) return l_false; if (r == l_undef) all_implied = false; } @@ -1143,6 +1124,8 @@ namespace seq { } // Update m_intervals based on the condition. Returns l_true if implied, l_false if inconsistent, l_undef if pushed. + // Operates on the active suffix m_intervals[m_intervals_start..end]. + // On modification, appends new intervals and updates m_intervals_start. lbool derive::push_intervals_impl(expr* c, bool sign) { unsigned lo = 0, hi = 0; bool negated = false; @@ -1152,43 +1135,38 @@ namespace seq { if (lo <= hi) { // Check if current intervals already imply [lo,hi] bool already_subset = true; - for (auto const& [ilo, ihi] : m_intervals) { - if (ilo < lo || ihi > hi) { already_subset = false; break; } + for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) { + if (m_intervals[i].first < lo || m_intervals[i].second > hi) { already_subset = false; break; } } if (already_subset) return l_true; - intersect_intervals(lo, hi, m_intervals); + intersect_intervals(lo, hi); } else { - m_intervals.reset(); + // lo > hi means empty range — contradiction + return l_false; } } else { if (lo <= hi) { // Check if current intervals already exclude [lo,hi] bool already_excluded = true; - for (auto const& [ilo, ihi] : m_intervals) { - if (ilo <= hi && ihi >= lo) { already_excluded = false; break; } + for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) { + if (m_intervals[i].first <= hi && m_intervals[i].second >= lo) { already_excluded = false; break; } } if (already_excluded) return l_true; - exclude_interval(lo, hi, m_intervals, u().max_char()); + exclude_interval(lo, hi); } } - } else if (!sign && m.is_and(c)) { + } else if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) { bool all_implied = true; for (expr* arg : *to_app(c)) { - lbool r = push_intervals_impl(arg, false); + lbool r = push_intervals_impl(arg, sign); if (r == l_false) return l_false; if (r == l_undef) all_implied = false; } - return all_implied ? l_true : (m_intervals.empty() ? l_false : l_undef); - } else if (sign && m.is_or(c)) { - bool all_implied = true; - for (expr* arg : *to_app(c)) { - lbool r = push_intervals_impl(arg, true); - if (r == l_false) return l_false; - if (r == l_undef) all_implied = false; - } - return all_implied ? l_true : (m_intervals.empty() ? l_false : l_undef); + unsigned n = m_intervals.size() - m_intervals_start; + return all_implied ? l_true : (n == 0 ? l_false : l_undef); } - return m_intervals.empty() ? l_false : l_undef; + unsigned n = m_intervals.size() - m_intervals_start; + return n == 0 ? l_false : l_undef; } // Evaluate a condition against the current path and intervals. @@ -1204,7 +1182,7 @@ namespace seq { } // Check against intervals - v = eval_range_cond(m_intervals, c); + v = eval_range_cond(c); if (v != l_undef) return v; // Check pred_implies from path atoms @@ -1280,8 +1258,9 @@ namespace seq { return l_undef; } - lbool derive::eval_range_cond(intervals_t const& intervals, expr* c) { - if (intervals.empty()) + lbool derive::eval_range_cond(expr* c) { + unsigned n = m_intervals.size() - m_intervals_start; + if (n == 0) return l_false; unsigned lo = 0, hi = 0; bool negated = false; @@ -1293,7 +1272,8 @@ namespace seq { // Check if [lo, hi] overlaps with intervals and/or contains all intervals bool any_overlap = false; bool all_contained = true; - for (auto const& [r_lo, r_hi] : intervals) { + for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) { + auto [r_lo, r_hi] = m_intervals[i]; if (std::max(r_lo, lo) <= std::min(r_hi, hi)) any_overlap = true; if (r_lo < lo || r_hi > hi) @@ -1309,26 +1289,48 @@ namespace seq { return l_undef; } - void derive::intersect_intervals(unsigned lo, unsigned hi, intervals_t& ranges) { - unsigned j = 0; - for (unsigned i = 0; i < ranges.size(); ++i) { - auto [lo1, hi1] = ranges[i]; + // Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi] + void derive::intersect_intervals(unsigned lo, unsigned hi) { + // Copy active suffix to end, update start, then filter + unsigned old_start = m_intervals_start; + unsigned old_sz = m_intervals.size(); + for (unsigned i = old_start; i < old_sz; ++i) + m_intervals.push_back(m_intervals[i]); + m_intervals_start = old_sz; + // Filter in-place within new suffix + unsigned j = m_intervals_start; + for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) { + auto [lo1, hi1] = m_intervals[i]; if (hi < lo1) break; if (hi1 >= lo) - ranges[j++] = std::make_pair(std::max(lo1, lo), std::min(hi1, hi)); + m_intervals[j++] = std::make_pair(std::max(lo1, lo), std::min(hi1, hi)); } - ranges.shrink(j); + m_intervals.shrink(j); } - void derive::exclude_interval(unsigned lo, unsigned hi, intervals_t& ranges, unsigned max_char) { - if (lo == 0 && hi >= max_char) { ranges.reset(); return; } - if (lo == 0) { intersect_intervals(hi + 1, max_char, ranges); return; } - if (hi >= max_char) { intersect_intervals(0, lo - 1, ranges); return; } - intervals_t right(ranges); - intersect_intervals(0, lo - 1, ranges); - intersect_intervals(hi + 1, max_char, right); - ranges.append(right); + // Exclude [lo, hi] from the active suffix m_intervals[m_intervals_start..end] + void derive::exclude_interval(unsigned lo, unsigned hi) { + unsigned max_char = u().max_char(); + if (lo == 0 && hi >= max_char) { m_intervals_start = m_intervals.size(); return; } + if (lo == 0) { intersect_intervals(hi + 1, max_char); return; } + if (hi >= max_char) { intersect_intervals(0, lo - 1); return; } + // Each interval [ilo, ihi] minus [lo, hi] → up to 2 pieces + // Append new results past the end, then move start + unsigned old_start = m_intervals_start; + unsigned old_sz = m_intervals.size(); + for (unsigned i = old_start; i < old_sz; ++i) { + auto [ilo, ihi] = m_intervals[i]; + if (ihi < lo || ilo > hi) { + m_intervals.push_back(m_intervals[i]); + } else { + if (ilo < lo) + m_intervals.push_back(std::make_pair(ilo, lo - 1)); + if (ihi > hi) + m_intervals.push_back(std::make_pair(hi + 1, ihi)); + } + } + m_intervals_start = old_sz; } } \ No newline at end of file diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index d61a02ee8..969c5fa16 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -86,10 +86,11 @@ namespace seq { // Path: vector of signed atoms svector> m_path; - // Intervals: feasible character ranges under current path + // Intervals: feasible character ranges under current path (append-only) intervals_t m_intervals; + unsigned m_intervals_start { 0 }; // Stack of saved states for push/pop - struct path_save { unsigned path_sz; intervals_t intervals; expr* path_expr; }; + struct path_save { unsigned path_sz; unsigned intervals_sz; unsigned intervals_start; expr* path_expr; }; svector m_path_stack; // Boolean expression encoding of current path (for cache keys) expr_ref m_path_expr; @@ -103,6 +104,8 @@ namespace seq { expr_ref apply_ite(expr* c, expr* t, expr* e, expr* r, std::function apply_op); expr_ref apply_ite(expr* c, expr* t1, expr* e1, expr* t2, expr* e2, std::function apply_op); expr_ref apply_ite(expr* c, expr* t, expr* e, std::function apply_op); + // Common ITE dispatch for binary ops (union/inter) + expr_ref hoist_ite(expr* a, expr* b, std::function apply_op); // Evaluate a condition against the current path/intervals lbool eval_path_cond(expr* c); @@ -152,9 +155,9 @@ namespace seq { // Condition evaluation helpers lbool eval_cond(expr* cond); - lbool eval_range_cond(intervals_t const& intervals, expr* c); - static void intersect_intervals(unsigned lo, unsigned hi, intervals_t& ranges); - static void exclude_interval(unsigned lo, unsigned hi, intervals_t& ranges, unsigned max_char); + lbool eval_range_cond(expr* c); + void intersect_intervals(unsigned lo, unsigned hi); + void exclude_interval(unsigned lo, unsigned hi); sort* re_sort(expr* r) { return r->get_sort(); } sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; } From 758aff4f1e64e7a982f0b5996acc79ec122489d3 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Tue, 9 Jun 2026 11:10:07 -0700 Subject: [PATCH 25/32] tune and fix derive --- src/ast/rewriter/seq_derive.cpp | 164 ++++++++++++++++++++++---------- src/ast/rewriter/seq_derive.h | 4 +- 2 files changed, 119 insertions(+), 49 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 518f238dd..2101223b4 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -708,29 +708,116 @@ namespace seq { return false; } - void derive::flatten_union(expr* e, expr_ref_vector& args) { - expr* a, *b; - if (re().is_union(e, a, b)) { - flatten_union(a, args); - flatten_union(b, args); - } else { - args.push_back(e); + unsigned derive::union_id(expr* e) { + expr* c = nullptr; + return re().is_complement(e, c) ? c->get_id() : e->get_id(); + } + + bool derive::are_complements(expr* a, expr* b) { + expr* c = nullptr; + if (re().is_complement(a, c) && c == b) return true; + if (re().is_complement(b, c) && c == a) return true; + return false; + } + + // Merge two sorted right-associated union chains. + // Uses is_subset for pairwise subsumption during merge. + expr_ref derive::merge_union(expr* r1, expr* r2) { + expr_ref _r1(r1, m), _r2(r2, m); + + if (r1 == r2) return expr_ref(r1, m); + if (re().is_empty(r1)) return expr_ref(r2, m); + if (re().is_empty(r2)) return expr_ref(r1, m); + if (re().is_full_seq(r1)) return expr_ref(r1, m); + if (re().is_full_seq(r2)) return expr_ref(r2, m); + if (are_complements(r1, r2)) return expr_ref(re().mk_full_seq(r1->get_sort()), m); + + // Flatten both chains into a vector, merge-sort style + expr_ref_vector elems(m); + auto collect = [&](expr* r) { + expr* a, *b; + while (re().is_union(r, a, b)) { + elems.push_back(a); + r = b; + } + elems.push_back(r); + }; + unsigned split; + collect(r1); + split = elems.size(); + collect(r2); + + // Merge pass: produce sorted result with subsumption + expr_ref_vector result_elems(m); + unsigned i = 0, j = split; + while (i < split && j < elems.size()) { + expr* a = elems.get(i); + expr* b = elems.get(j); + if (a == b) { + result_elems.push_back(a); + ++i; ++j; + } else if (are_complements(a, b)) { + return expr_ref(re().mk_full_seq(r1->get_sort()), m); + } else { + unsigned aid = union_id(a), bid = union_id(b); + if (aid == bid) { + // Same union_id: check subsumption + if (is_subset(a, b)) + result_elems.push_back(b); + else if (is_subset(b, a)) + result_elems.push_back(a); + else { + result_elems.push_back(a); + result_elems.push_back(b); + } + ++i; ++j; + } else if (aid < bid) { + result_elems.push_back(a); + ++i; + } else { + result_elems.push_back(b); + ++j; + } + } } + while (i < split) result_elems.push_back(elems.get(i++)); + while (j < elems.size()) result_elems.push_back(elems.get(j++)); + + // Subsumption pass: check each element against its neighbors + // This catches cases like loop(0,k)·star ⊆ loop(0,k+1)·star + // which have different union_ids + unsigned n = result_elems.size(); + svector removed(n, false); + for (unsigned k = 0; k + 1 < n; ++k) { + if (removed[k]) continue; + if (is_subset(result_elems.get(k), result_elems.get(k + 1))) { + removed[k] = true; + } else if (is_subset(result_elems.get(k + 1), result_elems.get(k))) { + removed[k + 1] = true; + } + } + + // Build right-associated chain from result + expr_ref result(m); + for (unsigned k = n; k-- > 0; ) { + if (removed[k]) continue; + if (!result) + result = expr_ref(result_elems.get(k), m); + else + result = expr_ref(re().mk_union(result_elems.get(k), result.get()), m); + } + return result ? result : expr_ref(re().mk_empty(r1->get_sort()), m); } expr_ref derive::mk_union_core(expr* a, expr* b) { - // Canonical order: smaller id first - if (a->get_id() > b->get_id()) - std::swap(a, b); - - // Subsumption covers: a==b, empty(a), empty(b), full(a), full(b), complement absorption, etc. - if (is_subset(a, b)) return expr_ref(b, m); - if (is_subset(b, a)) return expr_ref(a, m); - - // ITE handling with path pruning - auto union_op = [&](expr* x, expr* y) { return mk_union(x, y); }; - expr_ref r = hoist_ite(a, b, union_op); - if (r) return r; + // ITE handling with path pruning (before merge, since ITEs aren't part of sorted chains) + expr *c1, *t1, *e1, *c2, *t2, *e2; + if (m.is_ite(a, c1, t1, e1) || m.is_ite(b, c2, t2, e2)) { + // Canonical order for non-ITE cases handled by merge below + auto union_op = [&](expr* x, expr* y) { return mk_union(x, y); }; + expr_ref r = hoist_ite(a, b, union_op); + if (r) return r; + } // Prefix factoring: a·x ∪ a·y = a·(x ∪ y) expr *a1, *a2, *b1, *b2; @@ -739,35 +826,8 @@ namespace seq { return mk_deriv_concat(a1, tail); } - // ACI normalization: flatten, sort by id, deduplicate/subsume - expr_ref_vector args(m); - flatten_union(a, args); - flatten_union(b, args); - std::sort(args.data(), args.data() + args.size(), [](expr* x, expr* y) { return x->get_id() < y->get_id(); }); - // Remove subsumed elements: if args[i] ⊆ args[j], drop args[i] - unsigned j = 0; - for (unsigned i = 0; i < args.size(); ++i) { - bool subsumed = false; - for (unsigned k = 0; k < j; ++k) { - if (is_subset(args.get(i), args.get(k))) { subsumed = true; break; } - } - if (!subsumed) { - // Check if new element subsumes any previously kept - unsigned new_j = 0; - for (unsigned k = 0; k < j; ++k) { - if (!is_subset(args.get(k), args.get(i))) - args[new_j++] = args.get(k); - } - args[new_j++] = args.get(i); - j = new_j; - } - } - if (j == 0) - return expr_ref(re().mk_empty(a->get_sort()), m); - expr_ref result(args.get(0), m); - for (unsigned i = 1; i < j; ++i) - result = expr_ref(re().mk_union(result, args.get(i)), m); - return result; + // Merge-based normalization: merge two sorted right-associated union chains + return merge_union(a, b); } expr_ref derive::mk_inter(expr* a, expr* b) { @@ -809,6 +869,14 @@ namespace seq { expr_ref r = hoist_ite(a, b, inter_op); if (r) return r; + // TODO: Distribution of intersection over union + // Disabled pending performance analysis + // expr *u1, *u2; + // if (re().is_union(a, u1, u2)) + // return mk_union(mk_inter(u1, b), mk_inter(u2, b)); + // if (re().is_union(b, u1, u2)) + // return mk_union(mk_inter(a, u1), mk_inter(a, u2)); + // Base case: build raw intersection return expr_ref(re().mk_inter(a, b), m); } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 969c5fa16..4057e301c 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -128,7 +128,9 @@ namespace seq { // Smart constructors with path-aware simplification and ACI canonicalization expr_ref mk_union(expr* a, expr* b); - void flatten_union(expr* e, expr_ref_vector& args); + expr_ref merge_union(expr* a, expr* b); // merge two sorted right-associated union chains + bool are_complements(expr* a, expr* b); + unsigned union_id(expr* e); // complement-aware ID for sorting bool is_subset(expr* a, expr* b); expr_ref mk_union_core(expr* a, expr* b); expr_ref mk_inter(expr* a, expr* b); From 3c3abeeeb22967fc15cc53067a31c15af2bd0cc0 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Tue, 9 Jun 2026 13:36:13 -0700 Subject: [PATCH 26/32] Refactor merge_union and mk_union_core functions --- src/ast/rewriter/seq_derive.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 2101223b4..7b7d3fca9 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -725,11 +725,9 @@ namespace seq { expr_ref derive::merge_union(expr* r1, expr* r2) { expr_ref _r1(r1, m), _r2(r2, m); - if (r1 == r2) return expr_ref(r1, m); - if (re().is_empty(r1)) return expr_ref(r2, m); - if (re().is_empty(r2)) return expr_ref(r1, m); - if (re().is_full_seq(r1)) return expr_ref(r1, m); - if (re().is_full_seq(r2)) return expr_ref(r2, m); + if (is_subset(r1, r2)) return expr_ref(r2, m); + if (is_subset(r2, r1)) return expr_ref(r1, m); + if (are_complements(r1, r2)) return expr_ref(re().mk_full_seq(r1->get_sort()), m); // Flatten both chains into a vector, merge-sort style @@ -811,8 +809,7 @@ namespace seq { expr_ref derive::mk_union_core(expr* a, expr* b) { // ITE handling with path pruning (before merge, since ITEs aren't part of sorted chains) - expr *c1, *t1, *e1, *c2, *t2, *e2; - if (m.is_ite(a, c1, t1, e1) || m.is_ite(b, c2, t2, e2)) { + if (m.is_ite(a) || m.is_ite(b)) { // Canonical order for non-ITE cases handled by merge below auto union_op = [&](expr* x, expr* y) { return mk_union(x, y); }; expr_ref r = hoist_ite(a, b, union_op); @@ -849,7 +846,6 @@ namespace seq { } expr_ref derive::mk_inter_core(expr* a, expr* b) { - // Canonical order: smaller id first if (a->get_id() > b->get_id()) std::swap(a, b); @@ -1136,6 +1132,8 @@ namespace seq { // Returns nullptr if neither a nor b is ITE (or depth limit reached). expr_ref derive::hoist_ite(expr* a, expr* b, std::function apply_op) { expr *c1, *t1, *e1, *c2, *t2, *e2; + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1->get_id() > c2->get_id()) + std::swap(a, b); if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) { expr_ref r(m); if (c1 == c2) @@ -1401,4 +1399,4 @@ namespace seq { m_intervals_start = old_sz; } -} \ No newline at end of file +} From fc4d15e4f89095cb6d2fd24fbca983f40dba0e79 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Tue, 9 Jun 2026 15:19:15 -0700 Subject: [PATCH 27/32] reuse char extraction from seq_util --- src/ast/rewriter/seq_derive.cpp | 70 ++++----------------------------- src/ast/rewriter/seq_derive.h | 1 - 2 files changed, 7 insertions(+), 64 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 2101223b4..3c9d4b187 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -544,68 +544,8 @@ namespace seq { // char_le(lo_expr, ele) → range [lo, max_char] // char_le(ele, hi_expr) → range [0, hi] // Returns false if not a recognizable range condition. - bool derive::extract_char_range(expr* cond, unsigned& lo, unsigned& hi) { - expr* e1 = nullptr, *e2 = nullptr, *lhs = nullptr, *rhs = nullptr; - lo = 0; - hi = u().max_char(); - - // Negation: not handled here — handled via pred_implies logic - if (m.is_not(cond, e1)) - return false; - - // Equality: ele == c → range [c, c] - if (m.is_eq(cond, e1, e2)) { - unsigned v; - if (u().is_const_char(e1, v) && e2 == m_ele) { - lo = hi = v; - return true; - } - if (u().is_const_char(e2, v) && e1 == m_ele) { - lo = hi = v; - return true; - } - return false; - } - - // Conjunction: and(char_le(lo, x), char_le(x, hi)) - if (m.is_and(cond, e1, e2)) { - expr *a1, *a2, *b1, *b2; - unsigned v; - if (u().is_char_le(e1, a1, a2)) { - if (u().is_const_char(a1, v) && a2 == m_ele) - lo = std::max(lo, v); // v <= ele - else if (a1 == m_ele && u().is_const_char(a2, v)) - hi = std::min(hi, v); // ele <= v - } - if (u().is_char_le(e2, b1, b2)) { - unsigned v2; - if (u().is_const_char(b1, v2) && b2 == m_ele) - lo = std::max(lo, v2); // v2 <= ele - else if (b1 == m_ele && u().is_const_char(b2, v2)) - hi = std::min(hi, v2); // ele <= v2 - } - return lo <= hi; - } - - // Single char_le - if (u().is_char_le(cond, lhs, rhs)) { - unsigned v; - if (u().is_const_char(lhs, v) && rhs == m_ele) { - lo = v; // v <= ele - return true; - } - if (lhs == m_ele && u().is_const_char(rhs, v)) { - hi = v; // ele <= v - return true; - } - } - - return false; - } - // Predicate implication for character range conditions. // Returns true if: whenever cond_a is true, cond_b must also be true. - // Used for BDD-merge of derivative ITE trees. // pred_implies(sign_a, a, sign_b, b): does (sign_a ? ¬a : a) imply (sign_b ? ¬b : b)? bool derive::pred_implies(bool sign_a, expr* a, bool sign_b, expr* b) { // Same atom: check sign compatibility @@ -616,20 +556,24 @@ namespace seq { return pred_implies(false, b, false, a); unsigned lo_a, hi_a, lo_b, hi_b; + bool neg_a, neg_b; if (!sign_a && !sign_b) { // a → b: range_a ⊆ range_b - if (extract_char_range(a, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) + if (u().is_char_const_range(m_ele, a, lo_a, hi_a, neg_a) && !neg_a && + u().is_char_const_range(m_ele, b, lo_b, hi_b, neg_b) && !neg_b) return lo_b <= lo_a && hi_a <= hi_b; } else if (!sign_a && sign_b) { // a → ¬b: range_a ∩ range_b = ∅ - if (extract_char_range(a, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) + if (u().is_char_const_range(m_ele, a, lo_a, hi_a, neg_a) && !neg_a && + u().is_char_const_range(m_ele, b, lo_b, hi_b, neg_b) && !neg_b) return hi_a < lo_b || hi_b < lo_a; } else if (sign_a && !sign_b) { // ¬a → b: complement of range_a ⊆ range_b - if (extract_char_range(a, lo_a, hi_a) && extract_char_range(b, lo_b, hi_b)) + if (u().is_char_const_range(m_ele, a, lo_a, hi_a, neg_a) && !neg_a && + u().is_char_const_range(m_ele, b, lo_b, hi_b, neg_b) && !neg_b) return lo_b == 0 && hi_b >= u().max_char(); } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 4057e301c..17285962f 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -150,7 +150,6 @@ namespace seq { // Predicate implication for character range conditions. bool pred_implies(bool sign_a, expr* a, bool sign_b, expr* b); bool pred_implies(expr* a, expr* b); - bool extract_char_range(expr* cond, unsigned& lo, unsigned& hi); // Normalize reverse(r) by pushing reverse inward expr_ref normalize_reverse(expr* r); From 77ac58484f07926db07d58d7ee30eb2dd53c1384 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Tue, 9 Jun 2026 17:42:11 -0700 Subject: [PATCH 28/32] updates Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_derive.cpp | 145 ++---------------------------- src/ast/rewriter/seq_derive.h | 5 +- src/ast/rewriter/seq_rewriter.cpp | 9 +- src/ast/rewriter/seq_rewriter.h | 21 +++++ 4 files changed, 39 insertions(+), 141 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index dae3670a9..ea374d6c3 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -453,7 +453,7 @@ namespace seq { if (re().is_intersection(r, r1, r2)) { expr_ref a(re().mk_reverse(r1), m); expr_ref b(re().mk_reverse(r2), m); - return expr_ref(re().mk_inter(a, b), m); + return m_re.mk_inter(a, b); } // reverse(r1 \ r2) = reverse(r1) \ reverse(r2) @@ -607,49 +607,7 @@ namespace seq { // Lightweight structural subsumption: checks if L(a) ⊆ L(b) bool derive::is_subset(expr* a, expr* b) { - if (a == b) return true; - if (re().is_empty(a)) return true; - if (re().is_full_seq(a)) return re().is_full_seq(b); - if (re().is_full_seq(b)) return true; - - expr* b1 = nullptr; - if (re().is_plus(b, b1) && re().is_full_char(b1) && - re().get_info(a).nullable == l_false) - return true; - - if (re().is_star(b, b1) && a == b1) return true; - - expr* a1 = nullptr; - if (re().is_star(a, a1) && re().is_star(b, b1) && is_subset(a1, b1)) return true; - - if (re().is_union(b, b1, a1)) { - if (is_subset(a, b1) || is_subset(a, a1)) return true; - } - if (re().is_union(a, a1, b1)) { - if (is_subset(a1, b) && is_subset(b1, b)) return true; - } - if (re().is_intersection(a, a1, b1)) { - if (is_subset(a1, b) || is_subset(b1, b)) return true; - } - if (re().is_intersection(b, b1, a1)) { - if (is_subset(a, b1) && is_subset(a, a1)) return true; - } - - expr* a2 = nullptr, * b2 = nullptr; - if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && - is_subset(a1, b1) && is_subset(a2, b2)) - return true; - - // loop subsumption: r{la,ua} ⊆ r{lb,ub} when lb <= la and ua <= ub - unsigned la, ua, lb, ub; - if (re().is_loop(a, a1, la, ua) && re().is_loop(b, b1, lb, ub) && - a1 == b1 && lb <= la && ua <= ub) - return true; - - if (re().is_complement(a, a1) && re().is_complement(b, b1)) - return is_subset(b1, a1); - - return false; + return m_re.is_subset(a, b); } unsigned derive::union_id(expr* e) { @@ -662,94 +620,7 @@ namespace seq { if (re().is_complement(a, c) && c == b) return true; if (re().is_complement(b, c) && c == a) return true; return false; - } - - // Merge two sorted right-associated union chains. - // Uses is_subset for pairwise subsumption during merge. - expr_ref derive::merge_union(expr* r1, expr* r2) { - expr_ref _r1(r1, m), _r2(r2, m); - - if (is_subset(r1, r2)) return expr_ref(r2, m); - if (is_subset(r2, r1)) return expr_ref(r1, m); - - if (are_complements(r1, r2)) return expr_ref(re().mk_full_seq(r1->get_sort()), m); - - // Flatten both chains into a vector, merge-sort style - expr_ref_vector elems(m); - auto collect = [&](expr* r) { - expr* a, *b; - while (re().is_union(r, a, b)) { - elems.push_back(a); - r = b; - } - elems.push_back(r); - }; - unsigned split; - collect(r1); - split = elems.size(); - collect(r2); - - // Merge pass: produce sorted result with subsumption - expr_ref_vector result_elems(m); - unsigned i = 0, j = split; - while (i < split && j < elems.size()) { - expr* a = elems.get(i); - expr* b = elems.get(j); - if (a == b) { - result_elems.push_back(a); - ++i; ++j; - } else if (are_complements(a, b)) { - return expr_ref(re().mk_full_seq(r1->get_sort()), m); - } else { - unsigned aid = union_id(a), bid = union_id(b); - if (aid == bid) { - // Same union_id: check subsumption - if (is_subset(a, b)) - result_elems.push_back(b); - else if (is_subset(b, a)) - result_elems.push_back(a); - else { - result_elems.push_back(a); - result_elems.push_back(b); - } - ++i; ++j; - } else if (aid < bid) { - result_elems.push_back(a); - ++i; - } else { - result_elems.push_back(b); - ++j; - } - } - } - while (i < split) result_elems.push_back(elems.get(i++)); - while (j < elems.size()) result_elems.push_back(elems.get(j++)); - - // Subsumption pass: check each element against its neighbors - // This catches cases like loop(0,k)·star ⊆ loop(0,k+1)·star - // which have different union_ids - unsigned n = result_elems.size(); - svector removed(n, false); - for (unsigned k = 0; k + 1 < n; ++k) { - if (removed[k]) continue; - if (is_subset(result_elems.get(k), result_elems.get(k + 1))) { - removed[k] = true; - } else if (is_subset(result_elems.get(k + 1), result_elems.get(k))) { - removed[k + 1] = true; - } - } - - // Build right-associated chain from result - expr_ref result(m); - for (unsigned k = n; k-- > 0; ) { - if (removed[k]) continue; - if (!result) - result = expr_ref(result_elems.get(k), m); - else - result = expr_ref(re().mk_union(result_elems.get(k), result.get()), m); - } - return result ? result : expr_ref(re().mk_empty(r1->get_sort()), m); - } + } expr_ref derive::mk_union_core(expr* a, expr* b) { // ITE handling with path pruning (before merge, since ITEs aren't part of sorted chains) @@ -767,8 +638,7 @@ namespace seq { return mk_deriv_concat(a1, tail); } - // Merge-based normalization: merge two sorted right-associated union chains - return merge_union(a, b); + return m_re.mk_union(a, b); } expr_ref derive::mk_inter(expr* a, expr* b) { @@ -798,11 +668,13 @@ namespace seq { if (is_subset(b, a)) return expr_ref(b, m); // Complement absorption: r ∩ ~r = ∅ - expr* c = nullptr; + expr *c = nullptr, *d = nullptr; if (re().is_complement(a, c) && c == b) return expr_ref(re().mk_empty(a->get_sort()), m); if (re().is_complement(b, c) && c == a) return expr_ref(re().mk_empty(a->get_sort()), m); + if (re().is_complement(a, c) && re().is_complement(b, d)) + return expr_ref(re().mk_complement(mk_union_core(c, d)), m); // ITE handling with path pruning auto inter_op = [&](expr* x, expr* y) { return mk_inter(x, y); }; @@ -818,9 +690,10 @@ namespace seq { // return mk_union(mk_inter(a, u1), mk_inter(a, u2)); // Base case: build raw intersection - return expr_ref(re().mk_inter(a, b), m); + return m_re.mk_inter(a, b); } + expr_ref derive::mk_concat(expr* a, expr* b) { if (re().is_empty(a)) return expr_ref(a, m); if (re().is_empty(b)) return expr_ref(b, m); diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 17285962f..0c981d52f 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -128,13 +128,12 @@ namespace seq { // Smart constructors with path-aware simplification and ACI canonicalization expr_ref mk_union(expr* a, expr* b); - expr_ref merge_union(expr* a, expr* b); // merge two sorted right-associated union chains bool are_complements(expr* a, expr* b); unsigned union_id(expr* e); // complement-aware ID for sorting bool is_subset(expr* a, expr* b); - expr_ref mk_union_core(expr* a, expr* b); + expr_ref mk_union_core(expr* a, expr* b); expr_ref mk_inter(expr* a, expr* b); - expr_ref mk_inter_core(expr* a, expr* b); + expr_ref mk_inter_core(expr* a, expr* b); expr_ref mk_concat(expr* a, expr* b); expr_ref mk_complement(expr* a); expr_ref mk_complement_core(expr* a); diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 8af435f09..08c7887ee 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2914,11 +2914,16 @@ bool seq_rewriter::check_deriv_normal_form(expr* r, int level) { #endif expr_ref seq_rewriter::mk_derivative(expr* r) { - return m_derive(r); + auto result = m_derive(r); + TRACE(seq, tout << "Derivative of " << mk_pp(r, m()) << "\nis\n" << result << std::endl;); + return result; } expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - return m_derive(ele, r); + auto result = m_derive(ele, r); + TRACE(seq, + tout << "Derivative of " << mk_pp(r, m()) << " w.r.t. " << mk_pp(ele, m()) << "\nis\n" << result << std::endl;); + return result; } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 33b1462e8..30134d1cd 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -365,6 +365,27 @@ public: return result; } + expr_ref mk_union(expr *a, expr *b) { + expr_ref result(m()); + if (mk_re_union(a, b, result) == BR_FAILED) + result = re().mk_union(a, b); + return result; + } + + expr_ref mk_inter(expr *a, expr *b) { + expr_ref result(m()); + if (mk_re_inter(a, b, result) == BR_FAILED) + result = re().mk_inter(a, b); + return result; + } + + expr_ref mk_complement(expr *a) { + expr_ref result(m()); + if (mk_re_complement(a, result) == BR_FAILED) + result = re().mk_complement(a); + return result; + } + /* * makes concat and simplifies */ From 00fcd3a36de6804568e2d9894325a86a765574c2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Jun 2026 18:18:46 +0000 Subject: [PATCH 29/32] Address PR feedback on derive, nullability, and requested reverts --- src/ast/rewriter/seq_derive.cpp | 233 ++++++++++++++++++------------ src/ast/rewriter/seq_derive.h | 6 +- src/ast/rewriter/seq_rewriter.cpp | 92 ------------ src/ast/rewriter/seq_rewriter.h | 3 - src/model/datatype_factory.cpp | 3 - src/model/datatype_factory.h | 2 - src/smt/seq_regex.cpp | 69 +-------- src/smt/seq_regex.h | 2 +- 8 files changed, 143 insertions(+), 267 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index ea374d6c3..ecc3df701 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -252,7 +252,7 @@ namespace seq { // δ(reverse(r1)) - normalize by pushing reverse inward, then derive if (re().is_reverse(r, r1)) { - expr_ref norm = normalize_reverse(r1); + expr_ref norm = mk_regex_reverse(r1); if (norm != r) return derive_rec(norm); return expr_ref(re().mk_derivative(m_ele, r), m); @@ -423,96 +423,54 @@ namespace seq { } // ------------------------------------------------------- - // Normalize reverse by pushing it inward + // Normalize reverse // ------------------------------------------------------- - expr_ref derive::normalize_reverse(expr* r) { - expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * p = nullptr; + expr_ref derive::mk_regex_reverse(expr* r) { + expr* r1 = nullptr, * r2 = nullptr, * c = nullptr; unsigned lo = 0, hi = 0; - zstring zs; - - // reverse(reverse(r1)) = r1 - if (re().is_reverse(r, r1)) - return expr_ref(r1, m); - - // reverse(r1 · r2) = reverse(r2) · reverse(r1) - if (re().is_concat(r, r1, r2)) { - expr_ref a(re().mk_reverse(r2), m); - expr_ref b(re().mk_reverse(r1), m); - return expr_ref(re().mk_concat(a, b), m); + expr_ref result(m); + if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) || + re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r)) + result = r; + else if (re().is_to_re(r)) + result = re().mk_reverse(r); + else if (re().is_reverse(r, r1)) + result = r1; + else if (re().is_concat(r, r1, r2)) + result = mk_concat(mk_regex_reverse(r2), mk_regex_reverse(r1)); + else if (m.is_ite(r, c, r1, r2)) + result = m.mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2)); + else if (re().is_union(r, r1, r2)) { + auto a1 = mk_regex_reverse(r1); + auto b1 = mk_regex_reverse(r2); + result = re().mk_union(a1, b1); } - - // reverse(r1 ∪ r2) = reverse(r1) ∪ reverse(r2) - if (re().is_union(r, r1, r2)) { - expr_ref a(re().mk_reverse(r1), m); - expr_ref b(re().mk_reverse(r2), m); - return expr_ref(re().mk_union(a, b), m); + else if (re().is_intersection(r, r1, r2)) { + auto a1 = mk_regex_reverse(r1); + auto b1 = mk_regex_reverse(r2); + result = re().mk_inter(a1, b1); } - - // reverse(r1 ∩ r2) = reverse(r1) ∩ reverse(r2) - if (re().is_intersection(r, r1, r2)) { - expr_ref a(re().mk_reverse(r1), m); - expr_ref b(re().mk_reverse(r2), m); - return m_re.mk_inter(a, b); + else if (re().is_diff(r, r1, r2)) { + auto a1 = mk_regex_reverse(r1); + auto b1 = mk_regex_reverse(r2); + result = re().mk_diff(a1, b1); } - - // reverse(r1 \ r2) = reverse(r1) \ reverse(r2) - if (re().is_diff(r, r1, r2)) { - expr_ref a(re().mk_reverse(r1), m); - expr_ref b(re().mk_reverse(r2), m); - return expr_ref(re().mk_diff(a, b), m); - } - - // reverse(ite(c, r1, r2)) = ite(c, reverse(r1), reverse(r2)) - if (m.is_ite(r, p, r1, r2)) - return expr_ref(m.mk_ite(p, re().mk_reverse(r1), re().mk_reverse(r2)), m); - - // reverse(r1?) = reverse(r1)? - if (re().is_opt(r, r1)) - return expr_ref(re().mk_opt(re().mk_reverse(r1)), m); - - // reverse(~r1) = ~reverse(r1) - if (re().is_complement(r, r1)) - return expr_ref(re().mk_complement(re().mk_reverse(r1)), m); - - // reverse(r1*) = reverse(r1)* - if (re().is_star(r, r1)) - return expr_ref(re().mk_star(re().mk_reverse(r1)), m); - - // reverse(r1+) = reverse(r1)+ - if (re().is_plus(r, r1)) - return expr_ref(re().mk_plus(re().mk_reverse(r1)), m); - - // reverse(r1{lo,}) = reverse(r1){lo,} - if (re().is_loop(r, r1, lo)) - return expr_ref(re().mk_loop(re().mk_reverse(r1), lo), m); - - // reverse(r1{lo,hi}) = reverse(r1){lo,hi} - if (re().is_loop(r, r1, lo, hi)) - return expr_ref(re().mk_loop_proper(re().mk_reverse(r1), lo, hi), m); - - // Symmetric: full_seq, empty, range, full_char, of_pred - if (re().is_full_seq(r) || re().is_empty(r) || re().is_range(r) || - re().is_full_char(r) || re().is_of_pred(r)) - return expr_ref(r, m); - - // reverse(to_re(s)) where s is a string literal - if (re().is_to_re(r, s) && u().str.is_string(s, zs)) - return expr_ref(re().mk_to_re(u().str.mk_string(zs.reverse())), m); - - // reverse(to_re(unit)) = to_re(unit) - if (re().is_to_re(r, s) && u().str.is_unit(s)) - return expr_ref(r, m); - - // reverse(to_re(s1 ++ s2)) = reverse(to_re(s2)) · reverse(to_re(s1)) - if (re().is_to_re(r, s) && u().str.is_concat(s, r1, r2)) { - expr_ref a(re().mk_reverse(re().mk_to_re(r2)), m); - expr_ref b(re().mk_reverse(re().mk_to_re(r1)), m); - return expr_ref(re().mk_concat(a, b), m); - } - - // Stuck — cannot normalize further - return expr_ref(re().mk_reverse(r), m); + else if (re().is_star(r, r1)) + result = re().mk_star(mk_regex_reverse(r1)); + else if (re().is_plus(r, r1)) + result = re().mk_plus(mk_regex_reverse(r1)); + else if (re().is_loop(r, r1, lo)) + result = re().mk_loop(mk_regex_reverse(r1), lo); + else if (re().is_loop(r, r1, lo, hi)) + result = re().mk_loop_proper(mk_regex_reverse(r1), lo, hi); + else if (re().is_opt(r, r1)) + result = re().mk_opt(mk_regex_reverse(r1)); + else if (re().is_complement(r, r1)) + result = re().mk_complement(mk_regex_reverse(r1)); + else + result = re().mk_reverse(r); + return result; } // ------------------------------------------------------- @@ -520,16 +478,92 @@ namespace seq { // ------------------------------------------------------- expr_ref derive::is_nullable(expr* r) { - // First, try the static info which handles ground/interpreted regex - lbool nb = re().get_info(r).nullable; - if (nb == l_true) - return expr_ref(m.mk_true(), m); - if (nb == l_false) - return expr_ref(m.mk_false(), m); - // For symbolic regexes, return a membership predicate - sort* s = nullptr; - VERIFY(m_util.is_re(r, s)); - return expr_ref(re().mk_in_re(u().str.mk_empty(s), r), m); + SASSERT(m_util.is_re(r) || m_util.is_seq(r)); + expr* r1 = nullptr, * r2 = nullptr, * cond = nullptr; + sort* seq_sort = nullptr; + unsigned lo = 0, hi = 0; + zstring s1; + expr_ref result(m); + if (re().is_concat(r, r1, r2) || + re().is_intersection(r, r1, r2)) { + m_br.mk_and(is_nullable(r1), is_nullable(r2), result); + } + else if (re().is_union(r, r1, r2) || re().is_antimirov_union(r, r1, r2)) { + m_br.mk_or(is_nullable(r1), is_nullable(r2), result); + } + else if (re().is_diff(r, r1, r2)) { + m_br.mk_not(is_nullable(r2), result); + m_br.mk_and(result, is_nullable(r1), result); + } + else if (re().is_star(r) || + re().is_opt(r) || + re().is_full_seq(r) || + re().is_epsilon(r) || + (re().is_loop(r, r1, lo) && lo == 0) || + (re().is_loop(r, r1, lo, hi) && lo == 0)) { + result = m.mk_true(); + } + else if (re().is_full_char(r) || + re().is_empty(r) || + re().is_of_pred(r) || + re().is_range(r)) { + result = m.mk_false(); + } + else if (re().is_plus(r, r1) || + (re().is_loop(r, r1, lo) && lo > 0) || + (re().is_loop(r, r1, lo, hi) && lo > 0) || + (re().is_reverse(r, r1))) { + result = is_nullable(r1); + } + else if (re().is_complement(r, r1)) { + m_br.mk_not(is_nullable(r1), result); + } + else if (re().is_to_re(r, r1)) { + result = is_nullable(r1); + } + else if (m.is_ite(r, cond, r1, r2)) { + m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result); + } + else if (m_util.is_re(r, seq_sort)) { + result = is_nullable_symbolic_regex(r, seq_sort); + } + else if (u().str.is_concat(r, r1, r2)) { + m_br.mk_and(is_nullable(r1), is_nullable(r2), result); + } + else if (u().str.is_empty(r)) { + result = m.mk_true(); + } + else if (u().str.is_unit(r)) { + result = m.mk_false(); + } + else if (u().str.is_string(r, s1)) { + result = m.mk_bool_val(s1.length() == 0); + } + else { + SASSERT(m_util.is_seq(r)); + result = m.mk_eq(u().str.mk_empty(r->get_sort()), r); + } + return result; + } + + expr_ref derive::is_nullable_symbolic_regex(expr* r, sort* seq_sort) { + SASSERT(m_util.is_re(r)); + expr* elem = nullptr, * r1 = r, * r2 = nullptr, * s = nullptr; + expr_ref elems(u().str.mk_empty(seq_sort), m); + expr_ref result(m); + while (re().is_derivative(r1, elem, r2)) { + if (u().str.is_empty(elems)) + elems = u().str.mk_unit(elem); + else + elems = u().str.mk_concat(u().str.mk_unit(elem), elems); + r1 = r2; + } + if (re().is_to_re(r1, s)) { + result = m.mk_eq(elems, s); + return result; + } + result = re().mk_in_re(u().str.mk_empty(seq_sort), r); + return result; } // ------------------------------------------------------- @@ -695,10 +729,19 @@ namespace seq { expr_ref derive::mk_concat(expr* a, expr* b) { + sort* seq_s = nullptr, * ele_s = nullptr; + VERIFY(m_util.is_re(a, seq_s)); + VERIFY(u().is_seq(seq_s, ele_s)); if (re().is_empty(a)) return expr_ref(a, m); if (re().is_empty(b)) return expr_ref(b, m); if (re().is_epsilon(a)) return expr_ref(b, m); if (re().is_epsilon(b)) return expr_ref(a, m); + if (re().is_full_seq(a) && re().is_full_seq(b)) + return expr_ref(a, m); + if (re().is_full_char(a) && re().is_full_seq(b)) + return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m); + if (re().is_full_seq(a) && re().is_full_char(b)) + return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m); // to_re(s1) · to_re(s2) → to_re(s1 ++ s2) expr* s1 = nullptr, * s2 = nullptr; diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 0c981d52f..69f38e72f 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -125,6 +125,7 @@ namespace seq { // Nullable check: returns a Boolean expression expr_ref is_nullable(expr* r); + expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort); // Smart constructors with path-aware simplification and ACI canonicalization expr_ref mk_union(expr* a, expr* b); @@ -150,8 +151,8 @@ namespace seq { bool pred_implies(bool sign_a, expr* a, bool sign_b, expr* b); bool pred_implies(expr* a, expr* b); - // Normalize reverse(r) by pushing reverse inward - expr_ref normalize_reverse(expr* r); + // Normalize reverse(r) + expr_ref mk_regex_reverse(expr* r); // Condition evaluation helpers lbool eval_cond(expr* cond); @@ -184,4 +185,3 @@ namespace seq { }; } - diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 08c7887ee..fa200f849 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3080,98 +3080,6 @@ expr_ref seq_rewriter::merge_regex_sets(expr* r1, expr* r2, expr* unit, } } -expr_ref seq_rewriter::mk_regex_reverse(expr* r) { - expr* r1 = nullptr, * r2 = nullptr, * c = nullptr; - unsigned lo = 0, hi = 0; - expr_ref result(m()); - if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) || - re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r)) - result = r; - else if (re().is_to_re(r)) - result = re().mk_reverse(r); - else if (re().is_reverse(r, r1)) - result = r1; - else if (re().is_concat(r, r1, r2)) - result = mk_regex_concat(mk_regex_reverse(r2), mk_regex_reverse(r1)); - else if (m().is_ite(r, c, r1, r2)) - result = m().mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2)); - else if (re().is_union(r, r1, r2)) { - // enforce deterministic evaluation order - auto a1 = mk_regex_reverse(r1); - auto b1 = mk_regex_reverse(r2); - result = re().mk_union(a1, b1); - } - else if (re().is_intersection(r, r1, r2)) { - auto a1 = mk_regex_reverse(r1); - auto b1 = mk_regex_reverse(r2); - result = re().mk_inter(a1, b1); - } - else if (re().is_diff(r, r1, r2)) { - auto a1 = mk_regex_reverse(r1); - auto b1 = mk_regex_reverse(r2); - result = re().mk_diff(a1, b1); - } - else if (re().is_star(r, r1)) - result = re().mk_star(mk_regex_reverse(r1)); - else if (re().is_plus(r, r1)) - result = re().mk_plus(mk_regex_reverse(r1)); - else if (re().is_loop(r, r1, lo)) - result = re().mk_loop(mk_regex_reverse(r1), lo); - else if (re().is_loop(r, r1, lo, hi)) - result = re().mk_loop_proper(mk_regex_reverse(r1), lo, hi); - else if (re().is_opt(r, r1)) - result = re().mk_opt(mk_regex_reverse(r1)); - else if (re().is_complement(r, r1)) - result = re().mk_complement(mk_regex_reverse(r1)); - else - //stuck cases: such as r being a regex variable - //observe that re().mk_reverse(to_re(s)) is not a stuck case - result = re().mk_reverse(r); - return result; -} - -expr_ref seq_rewriter::mk_regex_concat(expr* r, expr* s) { - sort* seq_sort = nullptr, * ele_sort = nullptr; - VERIFY(m_util.is_re(r, seq_sort)); - VERIFY(u().is_seq(seq_sort, ele_sort)); - SASSERT(r->get_sort() == s->get_sort()); - expr_ref result(m()); - expr* r1, * r2; - if (re().is_epsilon(r) || re().is_empty(s)) - result = s; - else if (re().is_epsilon(s) || re().is_empty(r)) - result = r; - else if (re().is_full_seq(r) && re().is_full_seq(s)) - result = r; - else if (re().is_full_char(r) && re().is_full_seq(s)) - // ..* = .+ - result = re().mk_plus(re().mk_full_char(ele_sort)); - else if (re().is_full_seq(r) && re().is_full_char(s)) - // .*. = .+ - result = re().mk_plus(re().mk_full_char(ele_sort)); - else if (re().is_concat(r, r1, r2)) - // create the resulting concatenation in right-associative form except for the following case - // TODO: maintain the following invariant for A ++ B{m,n} + C - // concat(concat(A, B{m,n}), C) (if A != () and C != ()) - // concat(B{m,n}, C) (if A == () and C != ()) - // where A, B, C are regexes - // Using & below for Intersection and | for Union - // In other words, do not make A ++ B{m,n} into right-assoc form, but keep B{m,n} at the top - // This will help to identify this situation in the merge routine: - // concat(concat(A, B{0,m}), C) | concat(concat(A, B{0,n}), C) - // simplifies to - // concat(concat(A, B{0,max(m,n)}), C) - // analogously: - // concat(concat(A, B{0,m}), C) & concat(concat(A, B{0,n}), C) - // simplifies to - // concat(concat(A, B{0,min(m,n)}), C) - result = mk_regex_concat(r1, mk_regex_concat(r2, s)); - else { - result = re().mk_concat(r, s); - } - return result; -} - /* * calls elim_condition */ diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 30134d1cd..8b4bfd5b7 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -195,9 +195,6 @@ class seq_rewriter { bool check_deriv_normal_form(expr* r, int level = 3); #endif - expr_ref mk_regex_reverse(expr* r); - expr_ref mk_regex_concat(expr* r1, expr* r2); - expr_ref merge_regex_sets(expr* r1, expr* r2, expr* unit, std::function& decompose, std::function& compose); // elem is (:var 0) and path a condition that may have (:var 0) as a free variable diff --git a/src/model/datatype_factory.cpp b/src/model/datatype_factory.cpp index fac858ccc..b93703acd 100644 --- a/src/model/datatype_factory.cpp +++ b/src/model/datatype_factory.cpp @@ -139,9 +139,6 @@ expr * datatype_factory::get_almost_fresh_value(sort * s) { expr * datatype_factory::get_fresh_value(sort * s) { if (!m_util.is_datatype(s)) return m_model.get_fresh_value(s); - if (m_fresh_depth >= m_max_fresh_depth) - return get_last_fresh_value(s); - struct depth_guard { unsigned& d; depth_guard(unsigned& d) : d(d) { ++d; } ~depth_guard() { --d; } } _dg(m_fresh_depth); TRACE(datatype, tout << "generating fresh value for: " << s->get_name() << "\n";); auto& [set, values] = get_value_set(s); // Approach 0) diff --git a/src/model/datatype_factory.h b/src/model/datatype_factory.h index 2d8f216b4..b2a6b75d3 100644 --- a/src/model/datatype_factory.h +++ b/src/model/datatype_factory.h @@ -24,8 +24,6 @@ Revision History: class datatype_factory : public struct_factory { datatype_util m_util; obj_map m_last_fresh_value; - unsigned m_fresh_depth = 0; - static const unsigned m_max_fresh_depth = 512; expr * get_last_fresh_value(sort * s); expr * get_almost_fresh_value(sort * s); diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index a3c56159d..64487a21e 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -22,7 +22,6 @@ Author: #include "ast/ast_util.h" #include "ast/for_each_expr.h" #include -#include namespace smt { @@ -224,40 +223,6 @@ namespace smt { th.add_axiom(~lit); return true; } - // Second pass: deeper exploration for intersection/complement/diff regexes - // These are candidates for dead state detection (the result may be empty) - // For these, do unlimited depth exploration with a time budget - unsigned r_id = get_state_id(r); - expr* r1 = nullptr, *r2 = nullptr; - if (!m_state_graph.is_dead(r_id) && !m_state_graph.is_live(r_id) && - (re().is_intersection(r, r1, r2) || re().is_complement(r, r1) || re().is_diff(r, r1, r2))) { - // Collect all unexplored states and explore them iteratively - // with a time budget - auto pass2_start = std::chrono::steady_clock::now(); - bool changed = true; - while (changed && !m_state_graph.is_dead(r_id)) { - auto elapsed = std::chrono::duration_cast( - std::chrono::steady_clock::now() - pass2_start).count(); - if (elapsed > 100) break; - changed = false; - for (unsigned i = 0; i < m_state_to_expr.size() && !m_state_graph.is_dead(r_id); ++i) { - unsigned st_id = i + 1; - if (m_state_graph.is_done(st_id) || m_state_graph.is_live(st_id) || m_state_graph.is_dead(st_id)) - continue; - // This is an unexplored state — explore it - expr* st = m_state_to_expr.get(i); - if (re().get_info(st).nullable == l_true) - continue; - if (update_state_graph(st, 1)) - changed = true; - } - } - if (m_state_graph.is_dead(r_id)) { - STRACE(seq_regex_brief, tout << "(dead2) ";); - th.add_axiom(~lit); - return true; - } - } } return false; } @@ -851,7 +816,7 @@ namespace smt { /* Update the state graph with expression r and all its derivatives. */ - bool seq_regex::update_state_graph(expr* r, unsigned depth) { + bool seq_regex::update_state_graph(expr* r) { unsigned r_id = get_state_id(r); if (m_state_graph.is_done(r_id)) return false; if (m_state_graph.get_size() >= m_max_state_graph_size) { @@ -894,38 +859,6 @@ namespace smt { m_state_graph.add_edge(r_id, dr_id, maybecycle); } m_state_graph.mark_done(r_id); - // Explore direct targets for dead state detection (depth 1 only) - // This compensates for less-canonical derivative representations - if (depth < 1) { - for (auto const& dr: derivatives) { - unsigned dr_id = get_state_id(dr); - if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id)) - continue; - if (re().get_info(dr).nullable == l_true) - continue; - update_state_graph(dr, depth + 1); - } - } - else if (depth == 1) { - // At depth 1, do lightweight exploration: compute derivatives - // of this state's targets but only to check if they're all dead. - // Don't add complex states to the graph — just mark them dead if - // their get_info says min_length == UINT_MAX or is_empty. - for (auto const& dr: derivatives) { - unsigned dr_id = get_state_id(dr); - if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id)) - continue; - auto dr_info = re().get_info(dr); - if (dr_info.nullable == l_true) { - m_state_graph.add_state(dr_id); - m_state_graph.mark_live(dr_id); - } - else if (re().is_empty(dr) || dr_info.min_length == UINT_MAX) { - m_state_graph.add_state(dr_id); - m_state_graph.mark_done(dr_id); - } - } - } } STRACE(seq_regex, m_state_graph.display(tout);); diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index af03b3c50..5c3fddd25 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -124,7 +124,7 @@ namespace smt { // Note: Doesn't need to be sound or complete (doesn't affect soundness) bool can_be_in_cycle(expr* r1, expr* r2); // Update the graph - bool update_state_graph(expr* r, unsigned depth = 0); + bool update_state_graph(expr* r); // Printing expressions for seq_regex_brief std::string state_str(expr* e); From 4cd4d16868b545becac4242f834aec6072c0e69d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Jun 2026 18:20:49 +0000 Subject: [PATCH 30/32] Apply follow-up derive validation fixes --- src/ast/rewriter/seq_derive.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index ecc3df701..36d1ab560 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -438,7 +438,7 @@ namespace seq { else if (re().is_reverse(r, r1)) result = r1; else if (re().is_concat(r, r1, r2)) - result = mk_concat(mk_regex_reverse(r2), mk_regex_reverse(r1)); + result = re().mk_concat(mk_regex_reverse(r2), mk_regex_reverse(r1)); else if (m.is_ite(r, c, r1, r2)) result = m.mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2)); else if (re().is_union(r, r1, r2)) { @@ -519,7 +519,8 @@ namespace seq { m_br.mk_not(is_nullable(r1), result); } else if (re().is_to_re(r, r1)) { - result = is_nullable(r1); + SASSERT(u().is_seq(r1->get_sort())); + result = m.mk_eq(u().str.mk_empty(r1->get_sort()), r1); } else if (m.is_ite(r, cond, r1, r2)) { m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result); From b6a29b800baa73660fa53fdc0f789283ed8ec87a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Jun 2026 18:53:55 +0000 Subject: [PATCH 31/32] Remove is_nullable_rec from seq_rewriter, delegate to derive::nullable --- src/ast/rewriter/seq_derive.cpp | 3 +- src/ast/rewriter/seq_derive.h | 5 ++ src/ast/rewriter/seq_rewriter.cpp | 96 +------------------------------ src/ast/rewriter/seq_rewriter.h | 3 - 4 files changed, 7 insertions(+), 100 deletions(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 36d1ab560..0cb766d99 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -519,8 +519,7 @@ namespace seq { m_br.mk_not(is_nullable(r1), result); } else if (re().is_to_re(r, r1)) { - SASSERT(u().is_seq(r1->get_sort())); - result = m.mk_eq(u().str.mk_empty(r1->get_sort()), r1); + result = is_nullable(r1); } else if (m.is_ite(r, cond, r1, r2)) { m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result); diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 69f38e72f..676f67f81 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -182,6 +182,11 @@ namespace seq { */ expr_ref operator()(expr* r); + /** + * Nullable check: returns a Boolean expression that is true iff r accepts the empty string. + */ + expr_ref nullable(expr* r) { return is_nullable(r); } + }; } diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index fa200f849..3ea2b630c 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2596,7 +2596,7 @@ expr_ref seq_rewriter::is_nullable(expr* r) { << mk_pp(r, m()) << std::endl;); expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr, nullptr), m()); if (!result) { - result = is_nullable_rec(r); + result = m_derive.nullable(r); m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, nullptr, result); } STRACE(seq_verbose, tout << "is_nullable result: " @@ -2604,100 +2604,6 @@ expr_ref seq_rewriter::is_nullable(expr* r) { return result; } -expr_ref seq_rewriter::is_nullable_rec(expr* r) { - SASSERT(m_util.is_re(r) || m_util.is_seq(r)); - expr* r1 = nullptr, *r2 = nullptr, *cond = nullptr; - sort* seq_sort = nullptr; - unsigned lo = 0, hi = 0; - zstring s1; - expr_ref result(m()); - if (re().is_concat(r, r1, r2) || - re().is_intersection(r, r1, r2)) { - m_br.mk_and(is_nullable(r1), is_nullable(r2), result); - } - else if (re().is_union(r, r1, r2) || re().is_antimirov_union(r, r1, r2)) { - m_br.mk_or(is_nullable(r1), is_nullable(r2), result); - } - else if (re().is_diff(r, r1, r2)) { - m_br.mk_not(is_nullable(r2), result); - m_br.mk_and(result, is_nullable(r1), result); - } - else if (re().is_star(r) || - re().is_opt(r) || - re().is_full_seq(r) || - re().is_epsilon(r) || - (re().is_loop(r, r1, lo) && lo == 0) || - (re().is_loop(r, r1, lo, hi) && lo == 0)) { - result = m().mk_true(); - } - else if (re().is_full_char(r) || - re().is_empty(r) || - re().is_of_pred(r) || - re().is_range(r)) { - result = m().mk_false(); - } - else if (re().is_plus(r, r1) || - (re().is_loop(r, r1, lo) && lo > 0) || - (re().is_loop(r, r1, lo, hi) && lo > 0) || - (re().is_reverse(r, r1))) { - result = is_nullable(r1); - } - else if (re().is_complement(r, r1)) { - m_br.mk_not(is_nullable(r1), result); - } - else if (re().is_to_re(r, r1)) { - result = is_nullable(r1); - } - else if (m().is_ite(r, cond, r1, r2)) { - m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result); - } - else if (m_util.is_re(r, seq_sort)) { - result = is_nullable_symbolic_regex(r, seq_sort); - } - else if (str().is_concat(r, r1, r2)) { - m_br.mk_and(is_nullable(r1), is_nullable(r2), result); - } - else if (str().is_empty(r)) { - result = m().mk_true(); - } - else if (str().is_unit(r)) { - result = m().mk_false(); - } - else if (str().is_string(r, s1)) { - result = m().mk_bool_val(s1.length() == 0); - } - else { - SASSERT(m_util.is_seq(r)); - result = m().mk_eq(str().mk_empty(r->get_sort()), r); - } - return result; -} - -expr_ref seq_rewriter::is_nullable_symbolic_regex(expr* r, sort* seq_sort) { - SASSERT(m_util.is_re(r)); - expr* elem = nullptr, *r1 = r, * r2 = nullptr, * s = nullptr; - expr_ref elems(str().mk_empty(seq_sort), m()); - expr_ref result(m()); - while (re().is_derivative(r1, elem, r2)) { - if (str().is_empty(elems)) - elems = str().mk_unit(elem); - else - elems = str().mk_concat(str().mk_unit(elem), elems); - r1 = r2; - } - if (re().is_to_re(r1, s)) { - // r is nullable - // iff after taking the derivatives the remaining sequence is empty - // iff the inner sequence equals to the sequence of derivative elements in reverse - result = m().mk_eq(elems, s); - return result; - } - // the default case when either r is not a derivative - // or when the nested derivatives are not applied to a sequence - result = re().mk_in_re(str().mk_empty(seq_sort), r); - return result; -} - /* Push reverse inwards (whenever possible). */ diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 8b4bfd5b7..2e05bf3c7 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -179,7 +179,6 @@ class seq_rewriter { void replace_all_subvectors(expr_ref_vector const& as, expr_ref_vector const& bs, expr* c, expr_ref_vector& result); // Calculate derivative, memoized and enforcing a normal form - expr_ref is_nullable_rec(expr* r); expr_ref mk_der_op(decl_kind k, expr* a, expr* b); expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b); expr_ref mk_der_concat(expr* a, expr* b); @@ -189,8 +188,6 @@ class seq_rewriter { expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort); expr_ref mk_der_antimirov_union(expr* r1, expr* r2); bool ite_bdds_compatible(expr* a, expr* b); - /* if r has the form deriv(en..deriv(e1,to_re(s))..) returns 's = [e1..en]' else returns '() in r'*/ - expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort); #ifdef Z3DEBUG bool check_deriv_normal_form(expr* r, int level = 3); #endif From b5afa9200e22209daec4b6d64831f89b8b1dc822 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Wed, 10 Jun 2026 14:02:49 -0700 Subject: [PATCH 32/32] [code-simplifier] simplify seq_subset dead checks and clean profiling artifact (#9811) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change simplifies `seq_subset` by removing redundant/dead subset checks introduced in recent regex-subset refactoring, while preserving behavior. It also removes an accidentally committed profiling output and prevents recurrence. - **`seq_subset` dead-code reduction** - Kept the explicit `e ⊆ a*` fast path. - Removed the duplicate `R ⊆ R*` branch that repeated the same star-subset recursion with a stricter depth budget. - Clarified the remaining `a ⊆ a*` comment and cleaned trailing whitespace. - **Repository hygiene** - Deleted committed runtime artifact: `gmon.out`. - Added `gmon.out` to `.gitignore` to avoid future accidental commits. Example of the simplification in `seq_subset.cpp`: ```cpp // kept // e ⊆ a* if (m_re.is_epsilon(a) && m_re.is_star(b, b1)) return true; // kept // a ⊆ a*: if b = b1* and a ⊆ b1, then a ⊆ b1* if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth)) return true; // removed redundant case: // - duplicate star-subset recursion at depth + 1 ``` --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- .gitignore | 1 + gmon.out | Bin 14458562 -> 0 bytes src/ast/rewriter/seq_subset.cpp | 12 ++++-------- 3 files changed, 5 insertions(+), 8 deletions(-) delete mode 100644 gmon.out diff --git a/.gitignore b/.gitignore index df4e3266d..8e5bd7294 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ crashes/ *.pyo # Ignore callgrind files callgrind.out.* +gmon.out # .hpp files are automatically generated *.hpp .env diff --git a/gmon.out b/gmon.out deleted file mode 100644 index cba10a19b2a99a2517d591f2bf1b928974a77eb5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14458562 zcmeFtu?>JA5Cu?Nz$Jm$C=@KfPLAMW7!&B+;9YZX&b6Hv7{Tx6L3X3p{#SjJsL5m zd(GtLw`#oTtt7o_pGJ8u8-On`dkeH0$ki8}ZVQom%I<7l-H7 z(`+Wky*TT|!ROS?&;8;0^?2=_>>@twef6>_x6$~{?PQ7TyVRJs=CzKSFFv=%cYj@_ zD{*z#cJrS*)RWmg+s(^mv*^WDFAl!2v-vdQU;fG|x>;{>6YyNov)((-Y+m9#;vIkO^wE2h%QLYc4<1&R zl(*)cj!T++d5w8DyzVj1Bjycvi(_2&;wEBV3}tfo@b*9@t{zci?z1duc0rA)RO0TD zHQx3c>(}4@w8PO?@y)Bh^i(TUCGtDOAxQ}067>`fove`$x_(iY3=CYZ@)5EK;b?W&j(_eK_2S|;>cp@9T)hSA(IfumTTgAi|IPYMb1jYE zR^z?ja%yt@SM{4K?@`xT_Bs0N8oxSrQ{p1x<*~F9cM+Qd%|1{!fB84nuh%~aoa`P@ zuk|bcXN?79-;0|st&@40WpW$w^Lo|@c6d{eBqOisS3bD;e%)RKE<9L~s2dZb(i_=~l#jEo_{Yc$+d6rp4%$FQDKQ4az zA9W&MgI(_!M}zumGS^w|T(grJb4ev`uGLP~bH8>Q%WJT~b=u8K97oIp70=mC>SV67 zJfSQi-ZJ)Gg>HM7cJng1ymvcU_PMxEjd>fDqpu_8fyzF2_pOu7ZM6J~ z8q>4vbM;m2K1-Zk*qIzWu@h%csWFdUHlMF*%%hh$d`)L^5i!rGJOr&GHk(g=vTmM} zMQL{?mUAEdZ0A7ppX~A|4!|WeB zaq!PIzB%3_md(d;&I$6l$D?P=XN3At8Bch5mnoCGX?+%KmbCtg8uQ%C=7Z7Q)H%@h#hp0&o=)7otj1h?^?Z3} za(!86a`3&K$@wcflatFkllxwre}6k!ly(unZ&v@X#NjL31C==VvCih>GqFDP^x|Yy zH^1!F^=p}&yuP!~eJ{>`xidNWwHjadi}h>0-6N)aiL+ju_u`@#*AeqtD*If%sUG#b zSZ|4=-)PraZuj9|wDAVzUK}ykS>pbxcCy|P{<4kbX<{ESH!pGczuU=j^!Z7B>zj)& zanXybGx5`p>s&|7&+AIu^d?8~9yxXmpB1?mrxDM& zVKw-!^A2aUJQF{1*#2Y`6Zs+Dvu<;Ui(VX@+j@?UO52FJq!On$s+0L4-qQ~^=gH~; zHRdDj8xObK$?_rXKFb-czpTcW#unw}>*UEb=39(%g)CoC<1PQJeyy9o@KeMKzu=1x z%RU_W_`gkWa@&i$Uff5_w?bu~gBR7K-}E_Oc+G(_-s_7_9X;cNA8{&yj?I>+wlEu8?uWyIob^ zQB~xK&prKPoq2C^*^BF5-1g!wV)OC-KH}B)J#|Kdm)6BMaU8L^;ieJu4S2m6UQsuH z@*mZ&<<7N;nAcLh$MxcFTQ_Sa_q{lHSDnm{Xv#jfan!rVUA)Gp)YE&(L+@2?juJ<4 zr)sX`j1Rt1oh)aR@tilEiqp6*8<4?!>w%iavOWfuBEp z^jKe`G&>XP&C!dCGqK(rXJXlW6J+_a@Jo(Y^FCrOet3+Fht>5x;PlT8ml1y~Zh^AT z?W5{sF22OUqub5Pl7*l>UiQD_; z4?i_I?ZsKdKYm1=EbE*{Y)*6en|0!Sr}sJd?M~c9yyvY>9ev-6)8DBR&6T|9#bqz9 zA~sJb8Jj1R_3w5LwCTm|nOH#fpDMod*H4}1;4Pg44I?)9_ly_CHB}C@Ju_Ku;*7ug zWv37H_WDh}Vkwh@chuNiORKm~Hdo#F_4SV0#91#cdvV>1!(XoZeE;vATIV!gYJBwc z^U*wFbAe6bWx+q*wLWtdkZCW@dvVc=%U)caiRGpH`b@mx@sVM3Cf3`i7k9n5zjr;8 zJk7HC;EOwPcqZ0&6K7)CeB6tZUYy1&s>XBHi}N$F9DQ*nmZQ)A>Hg*Mz4?G>(Tmd$ z)yeb!wtnhfuFqM-X7l~8Ov*kV5d9{j}j-b&pdjG(_Wl+vtFF{;<6W4y}0ScZ7=S7 zad3mqX^whv+>6s*ob}?O7ni-b?!`?n?s{?Gi^Fqzr`d~>UYz#gycZX}xa!4qFK&Br z*NcN2cFt(ni_06;$w!~muQxkBSgm?-bF(^;k6`6qyp7oWKEw9rb@Fxbmc2~wA~x?L zXSb-6&CieKy}0Pb@vZCTm)^TR^^`~N<@q({GgtZCW*6~-+tfcSaew!AvOFg5zqrQB z|ENCvmdU|=+R6G^(4@v@@rz#E^y1)woy{k`xah@gFAg8n+2^zum%X@+m>-Un8*cF6 z&ORr-xah@AFAg8l+2^zum%X@sMBO~!T9$PVAK8h6M|I+=7x%q5{OY=SbItD`S7UR# zk1y)P{nzz2e@cx{|NMH<-nZQ2zNyCdd|Qq6&K0pa&_3ctZ#^}c@!h|FD$bwU+2=B1 zv(I(JW}g|GeNLZN_j$Kxo;uJhVzbX>#2%SIsULM_{GZpVyD6u+@9lH=pX)@k z&vC?NpBbBdE@QG;=c>2QU2mV0Z|YZ1$P4+2=AQn{!|H_POir zGh?&Q$+z~-Jz{h2i-^rSGdBC&_4YaVw%)l%Z1y>h*z9u|u|4Fsmh+h@jRpVMb_);W*ZtaBN$S!c#( zpM&r0>~j>c+2E|^j&ZB$%{LS zpGIspUq);eKX^%J^I^nh^J&Cp^L20YO>gtTcX!Tx9@>VcYlE+aPkTt{s7xr^AI zX2j;vX7ch5-85pe&t=4BpM%Rfn-3#4n@=OQo4>kle#P_7FYkKpeSGILjQD`(*LeKt z%*nVHhd#N20D(jsDhMqNHnp8aUM&k`3AbDt$HBjy>EIQp@=d7e>; zgCDQ4dH+6Jb>cc=exy*=x&GxknTx;84G$pu->$K_7zV%7iNjtT_2Re}C%riB#aS=T zdvVc=%U)dd;_xl)X&!&xcruE3^Y5ycO^N$IsgunI8pA(rH!pAh#}S(kA2K!{K1^cr zrE#;=kA2=>_u1UUyNG$imC3>Xt&>e0MZEM6>mQcMj32yDJ);sQXErYv*fe5Z$z^hW z|3@8MuK8I{iQ_M+F+Y~9`|QP4#9U`N&^lssp#1|n2in}YE;@gLbEm^fPIeJ<^YRds zF@JVemNfXRy7?Qgx=T5U&7qzQ&&0!T=N;m$>tyb;-VM*j!w(KmCK2=8KkvBtv=?^~ z^VWR9@#yp0)B|NK>m1&p#;aa-epzSvO_EVBE+T&5=5>p^9gn^{6U#m)cd8RFzJ2|A z!Qpe>lZ^R;{<5S`#A*Faq7t`fHZS|!_2Rx4$9Haz{=UOw(8;71r=MFV-*UV9b$Fx0 zWX7jIzAou8$J1P$Unlaz#<$=5T8E#6MoiD+MRYQ{d!5W%vpfK9B7X31FFMQ&2KV^9 zlLOtE$%h~Q;A9hbl|0Z79X{4PS>LZc`VEhhgZsDfTS|OHjqkrp{kk6?4z!7w`~1w~ zIy3@@&esZ?&wZNzgP zSO2iY{j7e|Ja-Jgq20XPI~Ea}=ZC&q5dK3BcTb#HRho80y$cfHAdZ*usAy7*>E z8JoqAdy^TP$!TvgV>3DLO=fH+m%Yi1&Ez^Jo4Dz1zU@u!dXuv!Ri$R1^N0=I?i<@H zr2hQq$(<`?^zCi@vg4&VjriPnbbQz`&f_Eaygom0&LJ)%=H$B$4~r**XSMq*7ue|8 zH8#&OY-t=(X!QfWMqfegS?z3Fp%ZPbLs4olSWkEjt-rz8LvUp+L zJY#ue*hG9-9I*UO{^0xSo*?i$s%I@#^BfA^!T_GF<%~(hqa8adVSrziFuk2 zJ)Y4%HqTfl2j5>eJ3o&8x|<&kG>n+PPFCXRzt+io3@Oh?(};QWs}GdaM_`zO$%wKmlKj*%ul=A53f6mu^ z@;a}2)K`CUY=W_y?}KH$-s8UJllS?~Cw=WF#|K}lv!qLJc?voA`R$jS zy5TZj9T!-+i8H?YoX+O=`iD+@?a8SdZhd9FF7t*f`^FGzsS#O_%b?4~QUYteD+r1oUd1kVl`zB&@-`w`%t{3<5 zESQ_uCzRi=>&$1a8yt>wGK=`m->QFD;{Gk|W1od|T{uT)%$bg@?&e#C+Np0U|{(c3&@ zvw6m5^G$5t#BFb%8Jm3$K3>ng*?btW0m;~GKIv^f?QNd1*?bX`c{eOK$2#Kq?>%)B zXS`dyPb!o9m~1}g&G<+EUeB!F#8=mEngfj^Hs?O+#c40jB3^vcQwPe}taBcd&Cyq% zDxM$Dn`QAi+3a)M+dN~ldB$e*;eT{MHW8c6Gd7!VW3rLY*vRjDlNp=I<)Gf)?~K0` zp*)?fA~ydS;5K4nv&2i^J>I$!4A7h|NA{pWa#LB4hl;Y1K30WkFct^2v4ce54&6ALldXLwfma zZ5@+&yC45UnUl>^>gG8)K7J^gG51+sNM!s-d_G?u(lfsCN%h5EiPLXtmsFm$_YrgR zvZRc;c@uL<-!?ov@#W;X-(UA#U$LAy&{rKqPKHmd6M33f-T3&t8S$bYt$$eltBs6# znk8<|Y+hshnq$5asIP4z{@HWt-Lb^=i|YaMsk}}`%(wp~ZsSbyNc^kzxxh|SSA5u2m$A~r`Kzow-tm*OH~ZeHRlVs2iq61P9m?z4Ps zzK@vuEOGdgbu#x^;wWP7v&3b*PP*s|PCfNZ-dZ<5FJ73J%X=F!U;UO#ao3Cci1~WH zOir$<`^<~BJX|ay=Ji=0Ln7unOI*cMc%Ejt;WiO-^Csqx^Xly$ley0l_Yq$nge4Al zb)8L|MEtWKuYXu3XT7-S#dXHGZHF$rz6CC~`>Yq&@!3jFR?joB zUQ5@npC{&ByG$-_P-CsNH*win0C&g8xq2jAbB9Q@Z#9KNa(m#?ldPqRK)_2Q-%x4pRdk-E>k80wYW zi>qE-_u{4(x4pRQ#eFXhezddBVK2^K+fF|Bcrh#@=KHu3m%YhV#Qbls%4EiN^PjE< z%9mT^{=STO@Azn;#8t%RcX)K!%ay`%j1#na!fF@NWzT;7X_`Jmdw zd{8ZMACq~Y5(n?B`^x!os!P~+8auV2gLEMoJ^Z1Xdd^|FcBTubYS`IuZLhueC9 zW}l;odGsgZ(TQUZsK*s z=JFohrZYL~#Z@ovdU15y-adPAe7icCckQCI?ZwfkGdZ|@jm;S??^$Esn*ZnUAab&f zn73w$yNG#n{OoaZA2H=i9DHHjXFjZz&4&?l^Abn#ldybTs(09k&CM~5m=9KEGGjhi zmAHItJ$i0__~HNIr@#@PaEJOTsKiP9=%x9ATE^xFYP*=s(<}$t_u^n*kKRlUdvVl@ z<6fNf;;a|vXJWbLmuF&G{HhnnuY7E|Q(g6y^(5*mz=;3)yYlPh);fM z{fStK)4#9#%$uVeeb$TfUR?CzvKLpqxbDSGFK&Br*NgjJ9DJa2n!{ck_u`}%r@grT zKkb3)8}NwvG*ROGU+ND~|M;WzYq=OQKK7qa#YIf!MO!bMUL3@}@-d`LPJ40qY4r!a z&E&Wj=Xb7?Z~vnDwfqXu;{5u}Me&E_z04|NuCrWV+cU9D?$2yq);avb&XN`xuT%f9 zOs?P-gTYNtCJaTeSEN5KflKOMbq-8 zX}W0lS?`7~>%`sLYi#au`-pjklqC(~R~DKV5*eEp664-v#%6LFlg-g*5u2mWBj)Sm zvZUef*Tpwa(9=I`&*)A^rCG$>XNmKO7sPAuCSHABeFQ6U5tDiJ632gD5A?0^k5QB* zEh6TXT;eExlc2fXcVG84fhT=miy5xVuF;o8hmY0YOauVFK&8q+l$l3)~B3Qsz-lf zjd}EXYsRl@G{0}NjMz-BdT|-gZMpdJ``)XFc`Y?DUviWP#+sarB|iOCr~~N9EaH{7sIP*)<8U#3BEIR~^#$ZJPRHE5+#LJ(m9KmRE0^Nn z*7nC z#$>)qFL4$zU!|8gKBqm~)cnK36zoSm7x z>2aSKbF#$Ar0(;)FRfo6_=xKq_W8j2O)mb&Z*us7+TuYq=97N8LRJxTvfjiGjeS0> zeq9`|x?RMayw`C_lP{|id4ZL*Pfbsb34-{!F~Xai61yx059<&&1-n z>BYe#+RblsIHi+OFD}l+vd`%Sb@OxLroQuW^I62a$2BqUnj-yH2z;uOl|E05dk5?{e~QPJQS!y|A81DwPi#ml4ym9w_3k z#w}3drZ>6kP40V>qpzt)Z}vHk*c^SdXvlElY@&p^23OY{J1wc=}pdilZ)QusyDgmP40S=``+a6 z>pN#Oj@X>hv^P2HO)h(rtKQ_MH@WRi4rZNojv}_}jCjHIPW?{)tT(yrO|E;Bn~2Tz zx$8|1zoD+PIr=DKv(8y>a^9O<^(HsH$$f8f@PxXg=IG;y%{r&O$$4*b*_&MVCbzxG zeZY5A18_g-mKIu(Pdy})? z_LO>gsUZ*te0-1jC2Kh~ieeX7{3a}twHoc1=K^(N=N$whB+*_&K{ zs@N=P+uP@^xB0#|Ik=*uG>q6BXw;jWe5%-NzKY4bYuC5c5%bPfuH^BL*CjP~u1UmZ zpVNrVy<^s!T=(L>w|U0qK*Lo}DPl9ZiI}HZ&S>+Rx_R@{>RrTU^TAKl$>yik8JnM0 zk7BZU_#H>g4|B@Vr@hJ1Pu2syHvaHC?sojI$s*=HOC0`GoxC8{dyn!-RK#ZS%ZT}d zm2&h|#O9OdUBm`t@Y;HyW}l;o4M@fYWFC{b_;U2YPuG3swNxIx=MnShxg;uCg#yg-2O~GQ0}uFeIGIRS>o_#>tyb;iMh`b7yqqJ=Dp(?hk7#ng*KMS@s%~^ zFJqL+X~g_xj1m_SQ|b7b%*irhUN$AJBj$&qW%JFM%}d<9p&mUy!2I~<9ImC+Z`GI= z*cHdiW*adNRO04$>*U3;n-X^s^NdOyzoky*8I?GETYL0(9G>-0hVN>R{+45$yt|#e z)A8K5@2m0Rx7M$pbBz0ldGr#;e^V#&=p`=S-){ck<88E#n46cli9;iq#BEIGI?InCGUhtVFNJ^E#F?Vo!*(Ckbsk2af(aoODB z@CbOakC=4z=>JsrnTsz!8QDk7#Wyh*U*h0Hb@N%eYy|}n;{b(lNbKU8%{mCj~ZeHH;&ErFy=G@m2bDcLX7wxCledcShvibN1HRe9c zlBT^l`;1u9uzoF%46Ae7_=dwy5C7#tjd}EP?xP#mmyco6-FNuAYxS7_;RJ!N!cHcg>#=JSo?LK{cjrrem zmCXki*ZAi9)O8+S9UOkT8ZrMtlQKD;wUfVeT+;9x+W0HSxQm!)R5suD;`9lfB@Lh0 z#%CVxoF~gC)tIMw*W)@zm$b25V9SVkn&%xS2jAH4vn*-ei<@5D_2THsvCq|G4u=>F z%0534<8L!4ahj8-Vk+I}7^hFIlTBQ8 z<2TpI=CT=lODE3fH70Voz-HgpncPNfjy`@yXL8ny^JjG?$KO?Bg1r3j%y+Wr#Z|;q zDmTYE;`wohsJGFg-DkP#CNHQl7hkTr!He3-*B*~PiFjV@yDVuQ@o90#EeG11*}Pn| zoA0jsY~;r;tMOegtY6Dy#@EGe>NOve`Ge{Dz!EWkk*~yc#5>3D?lm!gC|}|>Ci6XP z6Z1i}#9d708`~0BFRzPkCf5<0eQtV_+ur2$!^`T?n*(KRC%?B&rhHlaycf5< zIDSRvG$*|{?ZsKd8{fI^rXKwt>Ng+!i@P5mR}S~}+g{xF z;$UBw)EsEoi{oCLM7;ccr_N~Ai-Ujc9B9;w<6fNh;w)lw^v#Fsfj;nrBU(uE_-p^i`!lt{A+JXy*TN`c`q(|aovl%UR-{p zb8D`9ao>yMkJrsFic7NG5i(wRx6i%ihFhQe`0`Eu%jWgXUBvvC%}d-x%zxRu#KDc~ z<{$XWdTW+Air8#E>BU)Z^LfPlH|fegr#I=Wa~`o-(jsDW^i^+i(~G;_=7XEoqc;Z{ zMr;l=?!{?u^LfPPG&42_ntx{J+?NrX$yG0IdU4l_`(7O0s&nq+UYz#gEMlXy=uIvo zHXyr*_lldPyk1)0p#!ps*nsRJHb>w0;_!}jpUvbrVzbXlZ*ms#?eRceF4{$J^Hndd zd;8q>;yz+?j~m{pF20H5i0zUhHmAAj#dXBy=GZ>2BfpQ>C=D;_dG_KoVzc?I7Z<&_ z>cvgO<}`P`IQY8GI!C=YiP#{|dXtNY%^9tFaovmC-ahvc^MypYz=n_SEPmXJ(_WnQ z;-VK~O$^JT<_ zZXNM=o^@(+7qNLZ+(&Et|Q+3g{R^s zVtSU%cMiroo_mR-C)9oA=H=Wc5gYkw#N20@ocH3QxB03UH@&#)?Q`(Ny3S@v zqlnFt#=XgDFV1?KFM5-!UR?L$wioxkICxU;jCyerv3XS7M7;X%>*sal=GaC2(T{cF zYfnx+2<&^48Jo$$C3T(oiCH<&x)(RSxQp0aVEc&8tvUF{dZ4^M%YAbfj~&hCgJ<{X zBIf30oulsL^Xul#KF7T{jo7Sn){Bc?T=n9*7q=0c#b<0T?_FI{`ZBO%f$2r+YOpp=>uV~M`#8EF!dU4u|^Ilv=OuBNQ zb#HRhi@RPNT;5syuouU@IEk3IQCZUPC+j|+{+W*~Z*a@!^ZSTTyIGC5J9XOdHaw(2_aomfOUfkWh9wR3>M=IPb+pFHUY*5A@te)N?Nf8s4fs_Yz0FIDUSe{JWP|!r^i8vxvFQ z(J>A#t&{ir#X4C$rxA1W@))v=m>~6mC1U>9_hs|R3+g`er-3CduBb78wWf(Leae42 zEGGVfnpI5ZI?Lp`7l*%C_j${&Ii)m-c`;^86a^>s2AU-$VdoptUa=H0Na z^S{^0W=YFl9R6D8K)ZTN9vfO8hi(Xtue9uSgZpu36Cw21#shjuWvKLpqxbDSGFYY5=`nAxOFn+;&b5r# z+zm6{^7Q8Gn9MV(i;wuQ*{Qz~Wq8AS^w<5wsjF^zqZ$*rY(BqPJ6Rs6ceiXOOWfbO z#=L9a<^X*%xJ`{$4C?J(_Bo5#Os*onX*fIHM1!*V`nGlRyyi>XMa*Z|@?<)`U7gG) zz8Xh0=5t&V^O>u}NpCV^GdYdPtKyDZHeW={OR=nT*^8@QT=(Mi_AOoc6^muW1gST1 z#JnGsIJiUIytx=g5zmdM%KB{Bi<>*v&3`XmhnC6Zoof8RmG#yvaUJoFx32fNaxu)l zxK5t?X?3#1Rm3afN-1&rkUE)kEp6N6Z_p#6>R-UsMm&KCnd0&CAWPiP+pmyWZr!7e_Ct2g-GpeU2kG z2b%QaEaGRyE0?;Yh|RcURO~zA8k8JTD+^qW=YFl9Q;FP^ZAEs%mbDC z(K_O#zfk|M#7)FU|HP@di z6L%4tCGC50aQnJ>Gdb+V)m`i4`F~rNR8KQvv-#%p>tu6B$oR?!*W0Qres)zxhVj|al-oLK%2DhzWuX8-j;n&n}E|0(0=mw|b zrCrRyx)D|fX98VdvWu%bs~Q)vuwV3LL18~mesRs zyxT8N%aY1RvfJmx0pg6x-y1V|L5=w=QywZa=Ce$R!xz@chsEY4j(Tw#@qxFmTa?M^ zch_$&f7a>vWA&Rnqw;(Hi&c#uep~%o(^@?l~xh+HfrKyWAhSsF`4gr z%8~}Jt^0ax{PP<1G$ZCdOWej$Zxz^OpZkc9jeo=7HJxO9=YvkgaZKhu3th&1%U0s*?RB$!1S@g+jvDjRm=c%2Ut|6q zSvNggVJE|9JgF=x9|Z1kyg5b@^R8Xuq!(wsxah@IFRpuW+l%{N9DZkAe6!ARFHU=L z-iwP~Tt>_Xma24zOA1{+O_a|nM~|$}8I9*8VzbU^FV1^$(Tl5y`S@9uw29apecRi7 z7ctN1cxjw$Z};SKMmPWYQ_rx&Q72A%aThT!#cLh+Ik|n^{LOzoF8eNVarYYYCl0qb zPA((n4;Px4KZYoAf6i0NzHa}a3$EGc?Bb`EZ}RB3Ec@)m)vQkb$cyUN@?WS;o>0H} zxfj;2W%JFG+R5ed4m-T0#{9M6`yUT9kC=DZZ#l+o#AZo@Z>*a)ar*RjpOx-AJCpl} zdGz}p4>XQ5dFTu37PmUa$&2eZc}Bl^jLUz2+TqJ!kiQ}Pna8+^n3qk7+lYD5mbi%eI*lfOw_=fnwT3vj^{BcQ%!@sBpYR-KcvDxP;Vm^YE&DRl|7rL8>%{q4xn-{wK zi22**bxBv%CFS3n-Nc8++kz6uG5M0X&`O-Xw{G6ZXKZhyk37BHw0Z9+o6jQVx!2EW zZ*aVeUh|K+l$QsKm(>Tp=1N{gY_8;W#O6vK#5NmIjyYcZ>8Qk``&h;B=e9!H^^_sYkc=f;5 zGrGZXa{8NfvUzVf>&5AB^(K39`sUtbFK*vnC$AUSKs=Fv;sMto?z+%5;&N6dYeIKF+|=fA(I-c}_J?^I*Hkf@Upo5^X! ztN#7e9btIuZ!33|e1BA4`OhNe8=RsvyiJ|Vj}*#ZR5FQ}pBj}oJu_K67ZIBe?dPNR z=;c~kMSSHsr=E8AXC}*%Mz^n<=VXcVh!(JGyVuFff3HrK2mfWne2yz|9Wh_2mbi(SuT-0uuT)E%j_ZLw`u(R4w2GJy0%daa z1$8nXlS`aLY$m7ot&{mauAJulel<3e!~55m*HZcKk22;v(-J2!nertrA5k~I=zHtp zA9~!s zr@DD_ml;LOAFNcmi22u;H!=Sj^Ae{q+1&1nh|Q(Air6e^6S29Hx4q5x5t}P{@Y(f@ znthHUHv61JY;L$|FU}%1i(f~~A1ak|-`}|&{o|+a9fP~n*j!*4n+q&s^T0BU&6{g@o_GE=Pg0|Z4|x8mIPJw{#PcR~Hw9!9v3a=2*gW{}dy^TP$>H7V0rF|L z-bN9d+h`dvUn`c$UBu=AaQJz3pUpZmHb)=DRmyHoc>7dvVu``-m^S@uN>&VB;#Eub0Xzt5L*! zy;R~nV)II18L@p_irAdyD)$+$RLjMXvAMsmW3rLo^y1YoBj(YYn8;1c zqc<_1&PrUzK66PW&f}hzFC^;b5%bMmiL2h^x)=8mn|FN6hu70=?lO}{v^QMw%$PS^ ziGxSh$!ErHN*q0^v(H(?=5Cm=xf?EW@|9nC+}+_5xWoT4Uh?Y4Tob3udZ0@#s9($E z{>}B9%YLhVz0bLazbtO~+x44w-o3s`f8dj@cleud&cyPIeCyw7H!nZ1SpRMtue{T> z51+#I;^eLEWc@(nOgw%doqY9^>ypZ?IeSODdHF5bMK4bOpq>2kZ8ouazFjT1(d;kkKJ#bL*f!*VZDjJ`EY@rO#iCJd{lh%;WL^~#JqErxa>`4Y$k{A>+CaQGr2f3 z`5VWhXUxeG2Y*-hc~zW5c?&$e-*+6Y-N7Ii-^AR!#C1$wao@T{iQ9;8``fzj636$i z6F>jIYAo{8h|Og)iQ-zN8)~Z})Qa#U(Z7ZB+KTenyQ~eqp`g zo_xIFGCuUW$6tvY3?6d$-^nH>Gfq#(-29T`=Hu_I`+WPi)~`=G#%08OTqz;L+sT_8<1S(z zy*^YvzfS&p+{Md2w-NIL%o11MT_=;SJh05ar^cMDw^1*yBIbiYnVi0&Zl2GZcRj9i z){FCAT=e4b^19FTtow|Z=U(FE2kT^BwB^xe8u8qCtSfOAG385K{7~IIK}N^LXUuD< z#8pgoV{TrrVCk`7h#?OEKe{;$Or& z=k#R$`&{)(j?F)K`rj)by{+yum2P?5JYy=AIDJQ*%+G#n{7}1jIr=nWZeHT-)1GnI z^)-*l7akuO_7N|Nf4{o?Yt$L9yncOPssC>Eb?fH&pxVTIVOHWSCi5fiuRI*|WOHWo zn;nmyF*h%9dcAs}{GsPpA17yL;-MXGj))0TCI_EhH_w*`MLuJ`JScG(lX*rZj;>!f z&&5CJpnUju18d9!z3MX#aq$^7<_%YnGdA*@+qIMB9+xpEOPq{4lNp=I)wE9L$5RDl z^c6Mc-LS-QFHWCaC-WhwOs*ro{Xf+|EOGLb`c3}R5+zQ*sdJ!NFV1^$(TmGoT=nAe zh4tu9yX2Y23wkg*Jf@%QUsPk>aP>!8FK%PG!){+vW8RweVt84NSH{<#nI=E0kq2 zWBv+diKCdz1C==L#p!>k`%I+*vW%D)?Te2~T1Wi*cwt`RE@IwB4?3RFa9Q`6N3TDV z`o1=nTXPvP&%MOqE9+#&2OW<-?!{TeL@tx_h~r>$oym;N@ee${RUiCR-TamDDE^qki=s<|=6i(0wQ#r;&%|j1GA@b|HxVy?*Qo>TB0lOLcWbbabT zgE!P~n#p0ryg3fWha0XpIgXfD$p6RJ*~fiS&-;Ham1H|>Qt4*1rjpdw)g**i6XFo- zOJ&uvCM31GOeaZg)>N{pV@=WNX0t|Q%W93^Ig~bQ(k;uH3Pr3*x?T26=&Z#0eZTM5 z>)kc)tM{Wn>if8!`+DDe-k+QK%*}Vc+ew~;ny2hPD-Sk=dipQQ`WmF9i@yx-?0XI@ z2(^nvS$eQ0)SqpY7AZ8>f}-mXXnKFbCWLF*n-VBq1*P_;z=UuP&pVL3DmiQfc7L(v zvQJhxK5^Kvl0jhL^3!^-{wvIN9_=xzeDD~ccANx?2#qrNKPZ3op5rADcE5z{`24TH zf>66yVCBJLqm2$Fp+*;2d$94~;BU5>@ae(uaVQtheu=b5N*5FA=0srdcPJN5$A*02 zsB{&fmQMDc3Uzh}Y^c;0$yn+L^-{bj6{=ET@C5VZMSFqKKWsCjQA#PHrZl||H6?-d zKVg3B53mHuUV@g*G2W{m;`V=&hv!syxav6fJaEKhNl z&w%n*$DJ~Y7wkP!MwV<1bDJDFISBnK^ zgjx`Rg$J7#!RL9e$EWmW=fP-CD0I+@GA7hO0uv7oJeYbg*$;vAtVhOtN~nPZ7KEC- zz>?7HtZL7^^jd^uE%ykiveTYI`1O#Ru zfzrVC1VCLGMA=>esN1dQ$!ha;Ks{;;jIIRKJPp)51=gNYp;2}}f=`Wp#3*{c#s-qD zkwP^WSo{P^%~Q<7pE~9Wjd`^JN^=RZCDcoZvW5))7fO9DNRC_?p*|NRF!x|bXf7cZ zolRcMtG%!x(<1LWDu_Z&USQ)X6&huZa3=9hEON$wj-F%fl^1ZC^HYn11QZ? zi231w^WTC`*^oDcdVn@i572nD=3Ow;haBX|hlWsn%GN!c3+0>-q2Fa$YYCT}hq1Kx zs72wsq15v@G0zC6^S+$iq-zMb=bi9yWS$)f^9S$68MHh!5*!6MpNC&L!e)eez3!;d zb;U}Ryk7T?u_0^-XLE6twXP$ab|I2KWE4G^hd_&Yi&*XgH^&0%u#pyZ9|qKZ7Z@FH zmD@`f6RJ{RF-5uCNLlflKU?tzr4%H{;8PzC6qpd|pdGey*i1tG;XhobC7k`LIYUhl z2y6*;ga{1zO%?r#K-oangz8fQDO8_`PNn)382ks4SD!L$!YNRc<$pq{Et2l23H9qs z=%dd;sZ|mf5o(nHzW}A0OEY6a{i2h=giyZ{C9o#cZ&V4)zX+daZT6DMdKMlmJy?0L z_F&_|ZVHq>3NLh0iaZ#5F!A8PgQ*8I59S^$JXm?K_F&_|)`Q8nQ4oEdi){9)?*Z!D zOAOSvml(L&x8E;a{JP=)$JBh{GC*196>ga8LB3^Wi@v$uOf-S?=GP7*Wp+LCeA5w! zOZ2}E7rU{7@Yp{dF$o*OwS4o0{K|9oeVA|gEqs3J8AJ1MHU60R9Cj>nIa=YSul8U` zrJC`ZB{^`f%Nuy_XEA`TR8wocVhPZkXo4b$=Za)^184{#}$NHWGTp?Ml+w7evJ4OW#++_a3wbsqKt1~k$I_8U`hCr zzrRg<4u=093vYsQ!*U#Ya4aUAbp-zLQ~M5;IpMm^d5{{uG3`k>ljk(U9vm=b`XUH{!5WzB>m3aI zHs6XZFr-r7x?te0d?$^-h)R9Ig20$?559^+U_$uVI_wz)4hWCos}2OFgyxCX%!4_h zdCavW)VJ1%&&GqDXCD3pRWd#!4<;T=J(zp2@L)x_=I;HZi)DN&)L%J~7S&Yh)4VSn z>c_I5A$hZnQK&C2kiCt<8y4Xk(*lDA=K7KWOqnTgyHTKG3Y48y?ZL)_;jb8I{@&8e zmyUeKgt~>3<7R#@lsb~-oU0_%gFa5g2-RF*@_U%;+e-uv2z4R|%n0?BCIU-B<1@Gq zKJ}F*q6`W3l_mlsLX&4qsFO>S38B8yL|{s&lS^PmsP8!um=o$NO#~K%tJmXrB(NgX z?UulXP`6tG<3FI8I=}=D2#tA4s2ej;Dl|T$`{C0RBquaoToCTh2M@((OE{MYQ-K}f zvB%@>c><#c5Xj^i6PlD#LetEQ(B?^KMo6VT`Nk2XU+> zv!habbDz<|lK&CSTyrZHRaxr_LY>+ID?;rfft?4VKf$L_mV^g#DH8K=1C+Y_N}dXJ z`4yN@sXhf}9;`eV{VxJ(bohJ}P+w{C-BCdXe+E45`50gV3&KY(!#@h_{$kBv{`OHl zAG7A;$!PW72&m?N-gBfR)Yqh}8y+Xdy3G+)m2x->wg6n+f2B$2!IknhQ(`wIFhgD+x7E*}~O?nx~v2bW?naGJF;S>AO=#UkV#j zxb~84*bzY>Fqxthp9)neFnTt8>iPTT!!6)gLa2FW!(UAp8xU&S1?Gg>c7Y9{=DB!y zJ!MRxMi*G~?JC;#bB7HWQ+OIjpTJ;i=Y|mU=AQ9Z(;~{r8c1^dhU2t6MRlmQG+w zcpWDK0ukzRA)AwSPwVrt;c4Vpu{WSrNtPgmS|x$e$Dq{5U}RZ~33UY%I3U!0oxtE^ znCrqKF!W&T!J5!S4?k{GLY`9~^7LTt!R!<#&)kEh2Wvu;XEOy#p6wJUd3J=hbf2)z z6lLbYobbT8*s02*s!)9j3{Qnmy-F-TBM)|j`jS9V2B*PXeF}^{m=J0~BzihUDX`(@ z{*;s8TYRRU!XIYXWFD-pfKpEfWXvnPlP@F_*igCjw^-H$c7!|dy=w-Z!&hw?c+XCF zF_pk}IikH?@Jhn>qB{Vu0@UNXz=+W78PfZp)GeGW)D@xbKe5ge>NFIX=cA|J zfOE;9|aav9(z#_mY#V>xbgI!GW#O_(A7p3|Js8Iqw1wjQ5J-1E-96SFQXfQ zm8YydWyF@4w#ObUE=KZ3S$Z(~3Y0n&C8gMd$#N)lYb46tgOvxP`KL?AX+bcH3Du{- z+JnLSU~ZHRq461~jxzIL<-y`;m>+!%J|!iEyMDL_J1Vqvq6|L-g@LgLEBZ3Z#)F*) zt49z>yI7(tJd2G$Jw0VjXq2s|4Avu%zS$DZ^k7M7T2yr6>hg=|gg0~M2^M2`q{1rwNh zu=HT~5yxjtc+n;JNAWozG=WMg3~W6ZUJ74E+5C2q^x~YoJ~i2!9ibLP`n`S#%2_+* zqLh?cLhW~f$-__@SbA{qh@&h#*m|&F()v?|G86}Uqa_Mu!;|d;s4c?%&HVs1PdSqc zW2=Nuwpp)P8K@f`ftjZ)2{n)?gPl=Fy;5qRUQ-nqQmGSGU_xl46PiE`pj1KCuX& z61^fcdDeuhPw6QKpF3Z=SUXMzLr%EqkU8>?r;V;eRa4+=p_dFqx#e(jj&+2` z>;&^)4xNsLOAtuA1Az!NkX)XMJ`bf{k&u2*303*>;Sx5M7mhNy08lS+%Uai73OJMh zC@@_LxO$(-flhfW(AZ_9NmL0p-DddrsGrL#?yP| z1(i4R7rCXGrDt9fZhvOayz!LTN<`P~miWvGR~*$dFV{e+V_y2GCe*b~U_-c=YaV*@ zCysgM!ODXTp|11dv-&A~>X;XpuXO^&cLC~FQ)Wj(sM|t;142Ei7npf4C)C~)Wq7v} zC?Yh0VnP!rAv8YYU%{uD9d!$+i~Y99oEZL^z9Up1_#U%=ZDI znZ@O2E|lqKQmQ;yd$4ig))$FSv;S;87;g)u+1F(rEIpXK5aye1K0yM$dHDaa=0$)y zz@&?VJps)`C|(PwH$hO(1aQ%1_!Jl%Y?Xf;9T7T0y+-|-QS|(EP)_?MK1JEkmnuIw z%xSFr5)@DDj!#KGEAfXWzxBvx^>sj#e7D>xpFL7$jH=0ha&(Xi{~wfErN52t4%F8G z>g9b&UZGyz7nuFXi%z)aX8fa!&v>6phDl8Ru!WSaB-A~vY+u8Dq14$S%5(xy2Q8Kh z!VREka?k}mqC!twb8bUn@m0{BoZul*35n?QT0Y1&j5E7b| zVL-U{mzcO>uJFOT9Jpm4W;9>*<5MJgF)s*B@+F~3srF#&!H!T@Fe!3-F`B8}AsyFE zfpUftUjpTlH)1UjbA`*caNydrdoZE7PD2^<1H#oWx?I{WFjdHHjKGpm2ZO-oDx{=a zBPm^ZBcPc_gCAKa%9L>N%jQm2J|jG198ktmPN*YUU~o5lYKz2YwbiBKON)%5NVtGE zlLdy)hSKDj5}wBcwJ0;fnS8)UU>?GJ8FzvL8^YBbiUuxt3`0?1y$#HlT!ltJNw{vM z1DA48h_Y4lzx9*~m)+Kbo%+112b1R_x|U9SR@*u$)r9N1i>0o}lW?3X1X3b2(OWg| zx2RLtPf6jDev6Xrkb+eo>4etYgXtMgpxlGmS)S5^-5;PlbbP7w zk)#yf@4(~%3#A~b2b;e^xq`k$sc_T(;vZ4uC#+JUcZ8}GnEVq;g}V&19m@#UM@U{^ z^j!S0@_Javd0ch?;LSVtnwb+GasWC`0u5dYg@MV-99R-+$BB9U0VrqQ@ouT7oPcK^ z1k@dijL+&AK(m51ggQH9Rm?s_`rxHbeaPP9;~K73>y!|3$0I4W*)4jK*_V60>wN$-SL@tF!NyaNtmB` z9X`cp`WgJOE_dMg*M2bB#Tk{_o04boS(uHV(KA>0dRj=Jl1gJ6x^^z0VE4gJa|w*i`S?E&GsoAHklDETq|FwLyi0-BU+h5eKog?v1}3?6pOV-Kbt3^rPG*%Fsi zpsZjGp+1m+WsUHL2QQoK_m*&*=K@Y2jZcN=uf6uE%9T4pDa)@a^~6C6qVU9p*FV*K z?#(w(!tin1BH2+#Oy%HRuM?$IUZF;pwkv!rLP`c|i;U0nc$#H=u9&#|sezV%ebrN; zHdE@U(Y2WdYLRywIvFe3B0V=0rNZ$SpqWxSg%d2YeC0&pqCfO1ukd{O#$ezd3x3nv zM5%Juz0gb(XeIkddQ+8i?nfXgT}z+S`*|u{($7=jvTb_Zq43Uro(fm~x|gTI`}%n* zT-Q%2-4+4XGSGITA}gG}6aG<3r|=!LkSZnHK{1EsQV@j`GzVn!4(N~hgnxWtSGW;n zv@`zLc}1@`70$Wdf#bX1A_L5r-?Qj?fzlmn{@5RS(=cRIo!W9gE+W)X2cLvG<1pq4 z?K~pXukaY3I_8bJj(G!hxiCKm1S$!&$g{P8h8-O9vo|@)xjZULGc~0>`bSuW+i-&>%3>T7LHA2C z;}mMY%L1ry*ZvZua6!`>^9uDcuL-nZZSR1h%A05PW?Z-*QZmsK!VfTCnQ;owe<>^^ z(11#9ktE+xp-|3H6sk{|2nsd2_*8gw|8k1L+xsI~;bzQ2@>IC$gT2|I&?Mh7Pfbap zD^#V-IE5xpg-4q`1AQL+ie8mEuOI~#>QiQ%LiH)Iq*CjNx%qTZlb04%RBB4{%7~h9 z`s;c#PT|B09k}e)-b7I4d=IYm;A#&}zs>PE(}Uw4oZde}(UfL-@aWHCGlc=>l`c9K z9X9ut-jq?eEstO_7!+>W0TwtO(fo-EdZSL2$DMY)tTsley(uZFa@GUAsjYC)cY3S0 z!j&KD4Y1$@{4w(bmr5X+aS7q3Z}wJig%>{DGgo-?i+d}BLT$U`sZgV1&%i);acP(F zIrsqrteo`{NolL$|Hr}~0-8OTLbC^psGM^Tc0saS1UJHb{%*ZIBSPIkqMoYkZ_H{! zJ#86&<79Y9y9qv5Z(KUE#4%3cCJ)a3o1r)Rk6XM5Wnk=7f5~;vd7McC5VvDLr^IKE=HFCH`3Sek_#&v%3K| z^kL59x>=|dF28Wz$P&v%L8Vzk6q>=H&pzWu z6qpn0&dNaDRSGPr)XlSj`|o@CsD}fA4V7E+7L0*QKh;yVUi9wiph+p)4!-mYL=q_X zU_oe-SGdjEbTavpN*&4KGuj)0HtpG)2o>9GeAXTe_k+24WF?Fp7&jmd;bQgqo80>^xZWYNoDB zl4tx;r~j0yGC76KDOy=5RUCDd=h2n;`S zxin87u@D#$Y8MNP33dM|FeO~MfA4f4BQ$x|gjzZ=PZnP;(GAQ#3#d0iL|GAD#AQQZ zM`-d?Xj&A00X~b1J`bUKHLIgUN&zgxcBRugr=S>dU!4**I)RxQp*-l>*#j%U< z`HbOTV>zMexRP+ySv_Ut!N!BF2RjdjJ0pMz6nQZA;J|~a2XhY=9;`fAd$9FjFvDq4 z=)u^71>tJ`{M^h@A1U1M$~n@30xK%@<cn_4C5-zxLTxuX0pXsNpPk{}`rRMp< zQJ~~(tK579qt5_po&w9yI?C)EM;ToJxa!^blq0OdyA}dUJ(CNqQu0)&N`Wnv)971Z z@Lw=DZI1|5xw*9HA}ICM7GfR|>I+c?W`x>yf#Ic2putkWL)rflC?zz}gUg^)pQ0Ri zFeTJ&j3_e~ei=SzGN35C8^1AZZ4l@PLFI1()GEmWSn}?#zU@t#sZe(pa=EXf^1R>R z;Z+0miB^HFr&MT^!Ow7&(D+nnlp&S+9v%s#P@j?&Sl~JNZAF$_S0mj&RLAz0w5_!Cd=Y z%oS?C3rswvLZcjbN`*$5QK@4I$rI{W5*V&Wo@Ovagk~@(G=rg}(!kn-(H{}WjL+(S z0Zlz?4|X0*JI6dHG#%G?%H&a)n~qBfO+9lDmL9ALO_4i7-6u=sTTVYyy7>Q)l7Yz- zC=;Qbfw6wZQG3gnm&<4!;mN$lCNSLv$`iT32#jU|&Zh-_7W0i3%1Rr(32-52vn)aJ zzXLw}4{UTrS-u%?-TQm{Ie~7c(CE^TRj;;^7~#V9cH??^~HTa}Sn$sKtzs?3)O*X5Q+_V@FQ-sx$GA;kmuQ0JxY=pG0TdGIm=~W4wT}emRN4_jsHf^kp3wAra3!K^N}>!2H6?*Dp{8V@ zrX(<-(tJIn{RRS=wXWigUNe@W??P!>lo0AyVUcnaU@DzG4&|DmOmeH2^^pXTDj zfY6Mk^g1XvUWHGYuodrTYw56oB-DZ!sFykgHdJbN2y8v`gpa@IP!y#?6G)*6l+)bo z(iQ6Xl;^DCPhKz8(IQKr^y#2!drqZkQOD=i4m|_D#b?Gpw2K7}&V)j{LtsIu-660c zT+H{_$vJFGxS9{l2rSQnPaR+av$L)Hm6}=Z#ME#Dl?2Q0n-EIiXpXB0{q*ObE?l zm3lBE)OjR@ zlol0y2F}3RgDs(6JrMKGgT>F#N1Bo-D?&Aw@fq9>rQT2zW$3|@qfPHgiL&)z_$&A{ z%IZ0{$O`s%j{WhW{aCjR;AVXoZ4bC)0X}8MB|GDfXIJq3AH(d&QbP49uqM=(mkMkN zHITq~27JD%@3Y>;fi0na4|?BG@(T5P&;o;*FxMOXvN9Be+9H927g%LGq+@XesNVq> zpB3S%?eLGHY+r~!bfK2+2=)Lp?`NoA1ZckdpU(m`c?SCc>aP+@o*ALOlGMO8&xeJ9 zD|YR{bYJ|T>%1uQ{Q%AP4GTg&ph%0t7)reXBru%q)I{1}cYOfEipZp;h9bN|%~6zvFey~8Nc6&mw!XDGEGq8t#Kw&#Se zW|c%4L@+nXh!4J-Krx}ov+|U+r)>BvzVX=-8lTA*ek{Fc4!hkYfO?2T@|QX=Uh0^K zUjj4}HYPMFC4{E(3iWQaL{F(STeyr+e>6dq3hlmb8B)^Q;-ZWRjn9P8n5QMoC;DIR z%L(Jy?3M_F(71Xr-6uj{tRUUO7A!VNqAOe`dNZ>oJ^O?(glPkCeP*0Xoc@LI%K&hv| zqHH}F-svdo3lP=3CZ#3R7D+SnFF>jNE-?KfpnmU6U`A-nOTy*9J6GntC@W7{6VAP$ zw=y)IvL!TM(N%cBc9%^CQfR)n8(fOy^@t!Tg@iMwqgnz>!mBt-1P;Cmb3Kof$`^#j zXUeWMFe99~q}R;WQ-)U}(4wO+oovy7aQ)+eGV0o^pxkhKukGnK0L@p86yDYULaRdk zX16p`p?-yY`e?!`)Ni1RGV=l@D-hibhVEN{=9>#)1*qTG7M}|BtJxA=p?-;5lrhcq zYyJWgLKCPaG$|=OXi=}o^)*PzbX-HIOQi(r2p4|i_Q^g9u64{qLSr6#N`=Pfz%$Q0 zbA`sd@XSj>qpUoiwP)UX=ACCAd=CXNZI1{|^qA08KJm;`&ph+YbI-i=VCBKugN+AU z4|X05u5$_!dNA@}?7_r?0}rMi%sp6mu=HT%!N!BF2Rjc2tGp_CFd?+VhR_VK)HBaL zSa`7XVCBKugN+AU4|X05uJ;P!!N`NL2NMqtJeYbg_h8|{(u1`J8xOV~>^vBJA5}8_ z9uk@n5)qnpDe>UIgQ*8I59S^$JXm_L@?h=3hS0RVB{XgCJo9k1Gr%Gb#vV*OI3P4B zrGzG>%rh@M^U^b~JoDC5Dl{ntKXBR}5*lShXp)aT^UPB!G(HQ@ye2ft#`D>F=3(tj zgow~4PiQ8>z%x%h^UO0ZJoC~suRQb4GY@V+i%jw%p-Da=G^HDO=Ba01dP;@HXYHAH zghm7&UPC@J&zL@T??hcca2V0JHnM8xNvl~ zhU=dSXa2l*QB>jhEq6^uSGe{j2Ts2W#l~qM11-AQQy#Z$GJ5zu3 zq{{hJ%F{jy^%9L-TT}R*iC)_kPT#SYr^2}(c9bi3aFpvjIN{~F==qNMp2s?YHtpgl z4}Q2eLNw3WUZ6S0J4OD=(RWVvrkYRplArCt2@h`C*~xR0*UU|x`N}(cqwdL+ycSKg zPQS19M#zTWIm(ItNY+3%c=cT3Dc5?*|HVsbnU~Tc&*!cmar$VE7ihj$kd2k_oC1A;0CXmi#+9)A8{6zC0_IiFZq*R;Iw^%7if1+ zxx`bh_TYFwrR0Y=A}l%|pK?ISZp0rOE}b{If+^g%%i&LjJAC}8r@|S@Tb~M#JbKO~ zEN?;}J;KTeiEakegQ?sz?QXS7z}p;U_9Ls57S%rkG?&d=LX9rU=r>TBXAfe+hhN*< z9wdZ^vRY!E5}G?=8KDW36CUuqUX=<$vtKF+O(2C2CcTtuFOb4jyeBDnHdKCKY41(T ztrxu`G=ak3qDu34s}Q9lG|h|&wMEj!3h%tB*Y*LGCQwQ^&Ks2yDDwj4g!^~Bp;!=V zbTKapO`yv2*?KTp=M1ot&;+UoO~)xT(HknS;oV^L5#e91!(Bmvtrtk)#E!kWsnB#s z=b4AULyJtF3Qe97l_pPxCeN5k6Fni^|5?3-Wk6_3mlB$k6q=NBD!2M}uS$jIQ{h>` zk&{c1LX&6dnb(9SdPAtU+R;aZ#%D*k#c$?LMh{w4-n2a=G=U;Q<1_Zm6&~=&5tD%u zPdV@cWrX+5e9xr0LK8jre3qWiitrph0U=eY360N2;Tw8YY6(r1Izm&W;9h5LhJ?pH zh<}vm5utfezO{m}f zKerDL>~Js@*iw1!Vz?2QZ?HbM8ipHd3H6z%b4EVXjaDgD$_Uqg9{(t?_`6lgDR4=+ zxQnGl0xQCg@aZ{$t!ExQ0iOr(n2ghE!etNld`5)!5sOVWCCM|Nj$YJy0^o|`0m)+MJp9-HivUk*0xaS9Z`?`dI z9{Ybd1(s1aAYA)OWFfF1)D}td9pU(HZq7{%^{Dy^=bq`nYu5DE5LM3m zTCeR2AAV`C?Fwi7thY5%IB{IR?FXYJx-FFCgMSD7%k!`ZNHfE?0GjRyJ`QLeW(x_; z_{<4^_u2k5B;0W69;0!9p;!=(^HC)kAqtoAQ6&Rs?9hL#$cx?)uJ{Y4p_qrKAbFFg z!g+t|DPt;iWf1d(@QJ^!l`a;T66$;xm=Ui3d(UT1_}?$+J#(V)jQ{H?3o5_QRZ)B@ zJdrDez>>-vzTYcydj^`R;}hqwX9BL^^VDLV5FWG$|0u8_Jnv0C*b-j!J_oLt(}NwA zJ8&5mpYf-mSiMW{`cguud5W?-8%h_xX@}lHD*6nRi{acg zU$oO+lk=z|G|Itkjxr-O$wxnO@=Sm1z;KNN8xN*Gag?P82S0U`gSCL>^1i|a*EExD zPpMpS(6>i@hE-AFmS69Ud4x>(_I-t)n*@lwhNH-5YqsBDn@TE2^0%xgkZ z`RoBGpSTa7qRa``@A-?#W-459O>eOZ9)j7byk|am7=M_#sc`zXy=_eTh@-4LSbMPX zVC%tPy%Q+(;NXvrGWB5P!P4SdOgQc0-h3|z zO~=)qc{2sdK`PsTJk6+Uw#2aB`7^!cx0?cG8%9$5IDKI6}g<4=nsAnhwb3#2C z5g7a%d>+JA9IH3sB4!~lCtUMKM8^qE2=ldD<5R8y2Cu~*W*)_yy#@{l%~@pd-!Ru- z1H`8EI>7mR_GdBSIq&Piig5N`Jy;X!?*_^&E^mg<*)N1r0#yo++*_u$z?yI+qZ_#9 zT(}W9xCMWhoqtAX+TJ}5r8W~O{T*=LQN84o&G5(iD|-86h59x^F;A&nK;HtZZD77` zw_Y(uzUlPd#!R94E?bf!x)xcgq)>}&pcWbRVy zB2=aL9C%8F=CGT2<{^*Pnv(d832h)k<1_WlGta#6%uCO_^2{60y!FgG&pcX=dYa^8 zLeuty(6l}C%)=LBub{Kz8n9y{(bLl^&e%;J61jZ0-5<;DiUqJ-A_-=hK4| z9vt`JhG%;|JviaPaSv|T%Jb>L2@j5YaKp1apB|j>;J61jZ0Y&*;DiUqJ-FeSo=*=> zcyQc<8@BL#dT^o-+bLsy_tBUy_p%dVbetV)o(|gIDx%W-)RCH-pE^=#ezsNN%KlHm zDqQBl)gD~yLG$yn>T?5?vfWZRr~i{o3g>%p!h?%EXnwd#ed-Ta;n=~_ZQAqQqlFtM zVyc|e|2Zm!=GUwgPI$^i9$ezVWgcAVLGue)8hx#&+~C1Y9yC9&r9RCMY$-H9u%&P| ze|5{$bB+g>d=BsDkVB-Jo8KB%xc2JpC%Yrt2eZSTS`zB9Lwpth&mN7EoKO#ZvdK?hVrQ|yhESCfD1VvtDVL6GLRHGwX{(o8p8}%;0ac0QUuk_} zr}`?Z6!YL9$7e=pe3t+G7aR@HunM*UZBeJS$pPmMRA_vbo_S4Zl#S=J^~}S~y=D>`pE03n`@mDCp3lrP zFFfA4p$p>3FgSNfFX?yTPM;Q_tpAn%M-mzys@XS-sJoC)+^lmx6>)uB0 zR+oe(dUXPnvyS_fSjd*6CNvKZC|q$9ER1qZ@aswQ!AGGmfl|VIuIOE4R=9vKd=j4u zpSY)YFN(U+Y*E^UAANLdih{$vl99;^uUby#9vdoVbg_1t>j$s#9&YAz`aJeYk3 z=9_-ryHS-BPX7gGuJ(N&+)N?bgy^P9?FPVKaFWThaKZmNFd@_# zC(5pKl<}j0dZS8|384uzAk??Hin1UyL$USDgFhqCU*3mLF%Jo^I2^-9U_|(qqk77m z@QHW#;DE;=eZ#i&QAT*i&b{TgAiQ(X8=n=SIrI#U$Fa@8V1WZG4;CMR(rjZ&57r)R zJlJ}$^I&j-6Fu@^?7_r?0}rMi%sg0lu=HT%!PcPx| zxd#gmmL9A<*m$t@VCTW$qfV6~55^u$JUH-R>cQNDg$GLyR)pVP^6trF#~?$C^c`-p zGNgp&MR~agOG2}!RcOqUl?Y^(%9`-zclMUG=37vj19Vtloq(n1z8Ct$>BVf=XRfajkNSPl4?1xAG0c7cfpb3%PDu_zU4iv*Tbp2LUh1=gO=hS0q5 zyd}JWJ8Chneu<>l^0Y;+&nukxS?^q?_!X48A(tYDzX#NF8R;X1%c9#SOPBuv%IV($ zl#BMo{eU`Xrw#RE3eWpAb}Zsk;ig}}LX^P+)_izkI-J@M0&ez?{Uo};giv?X0&7Bz zE-5vH=ik?ByTYUI2b7e`hY)Dynx0bO64uiw=k)7Y(|l!L8H{1?rRUmG;J4Xi!bc(5frd26^4pHYZE6rx3h8c2qB>%ndt zo02Gl=Q=Rl)`5`+8xOV~>^vB3=L8Bp7tl>TEvFH#Br;iRO$rSkI zQJ~C&EumifMaL29wO@hBVFA?xl ze1n(#%zjD*yF_zQ{kMA4xIQJpo}_I&i|65T=uvF@4D83GymR$rI%-# zLZQh^A7z9a-`O+I3H9fw(3^y((6_+KgSBU_(3m%B-v13sg(jue^QqAI?9^vJ&){e@ z&nROL4m_B8F!x~L!P0}Z2OAG|9t_^^v?%gmOlW#jp(#k>nWvt)LSvqJN`*$5Q)z}` zL8uF$46x{fXr?AFYu(@&K)XGd0%daLAA(YM7cybfk6MWJlTdvMOfo2!J$tS!)G#NU zKdlFI!aFBmAyTFd{>@?AEAE+uK@R1C%X=vmgfpId`J}QTeB$^|PQv&M zm|w)-h>+wZ>T*&sn8R*zN+W#9`vU01vwY zDaqp{>BWExd5Dx#jN($jCoaS%#{5$Jp?M0-2+iy$zXYWYT8XZ3$)l%BJq4y;hH}|I z0A+TRggOlc)`VZ>QY5e;)OAT{2r7#xg=0YID@OWnAe2Hyz$KA>)>~V52hY$xFTpuaxl&RW_=2*2~~;nxUXXj>8T~w zy5)d+yF_42sOADw4^|$`uXF+xgeFi$XacoY!F&aX73ChRJQ#lyzE-gY zXy!h@m4#2EOBWBu9atRzsJ$slg|j||e-xPK_`^J%(wqgj>v270I|WKg-4rN3+fO?_ zJ3^CE_gN=U_<2Cx>`9>9gJA)sDM;eM=6oopFSt)iC<83G0C0R>4@QKu`^x%a8%WHn zO8~V+0^3WiQVJ3^fLHt!pQuvQN-A>>Lv#a|y!Q9-c$2@g8y!O1?# z2+anv;5=V+F#1R?GRG(54}Dg1*P)ZK>=X;7Ma|WK%Rh$C=Z%yd;hXNqrDK8dN+=90 z2sMx>TS8ra1xDY3xh{Zmh#U}_F`xXvD&_8KN~p@sMtN2@LAmt8`zNbZ6Y7kU@!1is zds%O>3U7w_vOcT`HISHxw?KL0A5aj1?X7^??*a$60Z#i@FQuH&m=!r5G#r90}~q14Jt^z^lW^SF(bHKZV1 zz|;h$36!%+d~QCfXGN$!1$Kl>Z|Et5qwt5WHnPzTJs7{wDkb^EgDIhod1+DO!PbNE zsR*PinA9^~2xyK71H#o0{a%&}>5i08yTia=zZU&2F!PiJ;Tz7`chYC+!QgbHq(=mF z+?h^vBL z!D(jX!Nh~X7oCEHQ=lXtc`zX~1sQlS_sk0qR-SpxbJrQg?_|oz2q_758e-I4WLqRF zSWam2RH%bNl#OTJdgcm^d2q4Q9iayk4-PyST?(Ju@;U?N5us^O;lYaV-Xq~gmfzY_ zDl{{$^_1PyL1P{)^$J2bkF!}?6cU3IYdqL`=A8%g z%g}andQ}jblyV+f^>`!)(}Hl_Ik?~^u=HT`6$COvvE<&%Z1x&Lvx%y?_cF?k&?w_; z5J;7>F-r-Jvbz?_`@f4%IZU+N7MNy+KZL?GGxA_^6O=j_P^I7jSrKNRf=?Ms9se*X zrJsO8lYh?W5LuoIxb$`SlomCF)|?j^&9%zhg9V|vfLVL6@nGk{@ME~dX|8|99vpZu zBV2!P?~&<_&^&P+os2-{iED-CIqKL`Dm2Q3N*$k=?;l4X10xS6gr=SY4`v=LJXm?K zAv8s9J!No;(;Xq9x$+!&aIj?iQ4Q?NxKhG5Z*{4RXsJ@kx2Kt*7@jgi%$II`n7xG$|=G$+uLRJd*-} zHvG0XVKWaFgl1hT3C)zL2utId7dbS2+I4p3mTWP;Ll%N`?tcMuQ=}QlRg!0zqI!=CDC>8X_NyGR;!@= z&c>s9J_$|qa5a>AjYUdV`~dKnGxn0^8hGm`9C*q(Jy?0>3XQo!W8Qe?3XQo!W8Tty z!_6&Bk4qq)gbq4BBE_*7_o1`VQ~SR#-t)S(9>LJfo^-Gj|(&zC~n zzW|@2YzepIr@)SX=sGVjSO_x%lPOSkR)Z-}%ySPGUxiOyKSddS&4B|CRvs*`aLkjh zJ8Z zH(YbZqS;UypDCe<9{&PLHGlrdJS9{mPHf*FN%G@g`kE|j7$F`EJ_w~Q{^(7@Q)l$d zBTpH7%EVI+o(`Hospm8E%yUm!c*@dKR!;{_L2A!uV^Owv=2S2-{uoZVL@gqtqznU|h&@R40dzJv9*ph^;`^k6;#<)Xjzlr`a@JM>_u@bf24 zwx~E6<}(-IQv%hWz#sZtx(u6O3E(lyUnHshZrJf-3UzPuhY`#kh0=6KPH4KLAT-?( z{u$<`ixWbfT#{$z!F1YglHbxdeq5R7eo1JP$t2k0L2t{9&9Q$u#V!)E{FN80-M^>HE!-f(VQV^{hu=LukG^ z+s=f!seJJQ2PTmNI}g_TK&k5|Qrg!s561zuAkyy@p-Cw?z?!4-9;^tby#NIfW$;p% z>oqHZG2ycR!s)8O!4#$370L)TB~g~IgwJ^&#lj+2p0nA2x_*kXCe+FcYzVdO0$UGu z9?TL1(jv<&4iB~vDNTW*EM5oY5yxYIiL!jXHJ6mCHv;MjxR}>(vP$@TGoTh(`n@L9 zB4a3e%GQJFTj0}-`I2z@HYZPx`ReUZYLO*S@D9M;cj;Y`$leKf#Ia~Qk|#9HtO&I? z(RRW^SK+8FuqB*+Ce~zu@nHzG>EH1wDJ6tvZkB|mbPb^?UF#`34Io)T*E z0&_w=lq1h0V7`Q_j=*>>pf11gNvN{}K3#Yu%yrOWz7y(1kh7k_QBWG6N$Qv&H$7aRaDMhj8P{(YeF3%0z1M@??Or_-TR$9 z6G9Cn<_b+p11hJLy*yJjXY%L{!Wpak$rEakMOhQBKNU(0Z-t-g!Sn-2e%>2;i>ktl zj_tvU%2})ds-&>rM-Aa@(?=gfpbh_lMODn>V*uxVs26BJxNcexW`x@=eTQrl#JnQZ z$tAEQ)X60<_z(gaW$3|}P?rlaPYBIMHz(AU0VzGH?B&@Jn&{E7NXg_G6B=dW!IaSC znGu>iYeF5~G8h^{9o_<~4} zb`MsldL#PW2%ojkZM+AY{M*#Jm%mQ;leJ8WP(t|ajseI$XmT)eIIeeag z=;nCT5$ake%J3>E%|3Z>6QCAEx+C>q?!n^e;DK}Zo;-1g-+&Wut*0#1wFlETTBV%W zrtbsP3&N6TmjY@t1x80Zuprdv@=R3cDbx4c=%TDX?D&k2b71=s2WBT&C~Yr1m`qqD zmR}EcQ=r^c4?k+n1(qIcJXmEg*TH}#eG0@I4o|d7X;CxA379_&k{b z1?GgBlE7jLl<4_Fn7@N{l{`B_?RRNWbUKteY>@mXy%sHUlwl4y`*?i*e7Msc8=URH z<}-k+PQ~Zm!$@PSFayk77GF~Pk#iTOE2ib;&HD^ ze|KQ*!NC)bGXE!__PdPaickj_X2%vU9DP3dm;u^OxCc{1+jp``F^_fyT=G183hbT^ z?s{YIZeY9{%+8B?=Ed&#!{pf!ew)4#i11dr5g5D><~y*t0<#wZ-bo9Xf1FYIj{>7p z0T0!G92RFR`xKz5XP#RqRVqAKda(9jGX+XYtq0@9h;FKsoQC_^cfF~1BQ+&l)PMU^ zMYv?wJx4JDSrrxT@ane;MD$DGQ{N6TzQwR03Gcry+!NoZ66N6O;M#ullFAiFy>}FR zc>Qm*SP{-mdobc1dOa)u(ny&T>T?3m=|g=^z$m9bjyv_(gVB8Y4!CIl*^zlm`10vk zSj4BoMX$gU1>#fT65jn6p9(iTuje!RGD>$M4;Ob07tpbiaNYL38|W3`^s_K=cO4Zu z_zuj^_|aVH4p~$q560I*x%O9>T(VFrJnU;d*xg}!QwD8(C!oHmN#KC+m_6{yM}aw^ z4hD>Q!b4uvQ+897Na^Ryv*@KXAUuMvGD0B2&G8#@ za|7?>h!z+=0Oh*XsJy^@8m>)mn1vPzEC|P@_d2e8A(Unswl4x)yLWFMb%f{bj(-%N z*`D}g_H%naD;Mqs<)R1^cGTFhickZ|jg|BzP_DikMHbk-+y)w5j~W{sXrUB&@Jc|_ zqWTcPNB;-alA+iT>aTknxNs%v3Ha|Y*Y#5ZH44Add-9oxaTrd z{)?j`t54n2{-F=gTZWWQ>PKI>H+&kn`YX?rg5ZW4lfUM|g9W~!?{m>*h<@8}$~_r< z>FvFTTQsGWtBxER>gI$#&%b@&$;xMhrsMKMQMwiED=BhG_}KW|NuLTI`%PHmpE5ZYK$}X_>l+nPpU7`o? za9}tEiqFV{i3d}{`M2S~ckb}M_?SYi(yOHHo_XQH^68+tB2jzFmhiuK#3CRq>ZT~A zAmKZmfA`ji+Fiw}FMa{1|w`Mhyl*)!%_RF2PiyF1WyoVMux_q_J0(a&Du zz^|OrJ3vQ=IUN^!a4-c*$0;-^rJgeLVBx{igWVJ;r3(&6@@5GNJ(v>eJd)%yLc4}| zu=HT%!Ito@{_ zgHj(Koi*A>6olF$93qdm(FG=iCQ#wQU;)f^=P!Yh3BXy$;!_r@=+pQ^&t>i&?L)G& z0X3!FM@3FP3#iehkAiam^(E2r9$zUeG%}yfAkJ#4dJ3=v4KQN9?bs>=6WV2 zgCV>Ka3KTkAw!XH*#pDj&TXdoY0! z;a67nU`jai1HEIh!aYyy9g7vt_)hOwtZ?00T;Y*5MBznI@35=z$VI(%N#Sv)CZiOw z6HvI*KYNErg`3{$S>C%K_(qA2+P_AMF)e!UKK-^A)3}Mpr_4(Qc^He~zxtD_r|x2Tq)TX38c? zmCJYn{)Umyf<9F#DJj%I25KOI&5w|hUZ%jH{V||9hiwV332@m@l-;kO)TI&^?SBoZ z*KM#S6Y6pyFecRNHUe|P6_?<0ih+N*tOv_`5NQ1;kdm}0{TtvtbR#g>3J2zE*j#}j z;kwg$gCU#-fWP?3Rk4PF|Tl4WaH>MA;H9nATH<&vDvb5}Iaqgr=DaO*0jmW-2tz3`0a;_bhx$o)Mw$6$Ms= z&+m6fwGGWHtTr<08bbXsRt!bLP3w9WbK>X1T)(6s$^qg0Phu>C@aE8 zcjz5qv+bc=z#A-35*nZ3^Pn^^Av~I2rV;bpQ-(V@<_)3gj(A5$S$i6a~I5jPkZN% z(DYF~1E!7JcXpCvZ%_BtC)4$O|& zQ!2duhMuyaQr8gi8NL#M42(QDAk>SZvIHqS{uCS$#Aiz7N&ok*$viW{=|919)S}D@ zbxjsn5E`E~p(%*M+ke<=X6q>xp7Gt@b;aOSX#4VGKRKCaNO*Gp#|T8((K}b(3}E43BP~Bq_Xq^DSXG)7fmWFDoy2U zLi0;Wji>B9n7$fqH$L0h4(vP_yw*{Mgl5=8o-!sh!zS^Rod=`Di5`0}@nA=&kABHW z4i1J-12aN1;|jtP`@d#Y5^6nh&?h{jf6>13eAa~Ku&dBsW4Qo%F8B?e>XbkUq50}o z>cPx|xd#gmmL9A;SbMPbVCTW$3uvZkQRuJeU)jFSse(p5K0x?ohbw z-rng|LG$THVGR*wJtdHw4k$doe-5i~-fw#+p$gye>^F|w;e1o!x?NF18QuzS81ydl zD%|F*-hN);*j^6&%9W1!uFve9?W+0Fn>o-7T2*ejZ|`ik*#`SLokw3AUF8UPSHSE9 zVh`4YYA$VWJQ(keKzi9n?l885dXYq6wg;3N{qj*tHKAT#dhV$D8E;`OVR0xVC*d}$@sIh)XZ%b2p-*7S6^Q}iS1-ekT3|)Et^t&>l-%W*r-Zv3 zb3!$j)4=F%C(rO#)~6&N5#C3i0%Jnc%#=_ALAf42S3HXA403C@`75CIk=$}k9s|_M z%j1qMq2B11Tf@OdC=HAVFXAHsxQR@tfdsaMHqa*cJoI%qZIL|7f7z5|QyK=i%%{Uf znwb)+Pf^BWP#Rbe8uQ>8)~AfRl2C8wJ!6;A_$1VcAj)(zYc45OgeH3DDU;2eJZnOe zXSju<%n6OMnWB_D>t|YXz%4D5^)q~yg;LLCD?k&dAk=2c59Vgic9e|=vuTbp*xG@y z2M5!kJnuIcHlrVrA1eqCx*fwuV693n1_JZ#V7}X0C}nE5gwwu-f0VX&ggTN17TZH{ z-}1haaM^cpI`Ga>N*$rzaS)j82s2fR&zf*4Zz9VyjCO)jpLPg z1V+y%WZSWV%>+D%&%g>y31>al^H~#WbV(_EflX<+cn`086RzabUgEQipwtl}=J5-e z5<702k$J9A7i@uhK&gEs%E5~PwR8eg50-?Qyc9Xu3+4*Zc0#QuQkrG+6lJ5ZU(aqI zC`~=XeF059GeQ%+_Fym$b5qa3et=p}S#9bVQ0J!1;`SgwbC~En7`)n1h91la&*eaq z=n8-QGPn_#drE~yS$ax^Mp=1Eg+|%E-L^;$<=p`Ak=^kry{Yi1*YsfV4*c;JcD0x* zT=y3IBlZmM#2;E@z{3DFkR0>}gc=Atfy13Z5upiG5NaSPa(N8Qr*nCedr}JDG9Uja zF!+#F$_P=YN`dKzp}c1|tcuvJ5zg5W&y>D*G&eKCnS9XHC@1)!={H9EI#tdY!*ivJ_hsSxW7X)3HA0Us&q1x3MG1U3ZU-m-Y}$N6``hd_b7RV>(9q&i*#JZ zSAgBKdGFSS!Z~O5&UzF+!PF#rM{_+#xPA1zVtE=;(sz#SB&8$Nw|@Pg4=?&SEWX}{ zn!G&upgtEbf>NfT!k68JF)s%xg_}-=Ql_@T4gIHN6>8~>xfaAgEr@}-b(eyu&n>vG zN0k;L745julaOP<>44hr0wY4bS1uE_Ak-p{-d;G?dNBSZe6Hc)LMB2=xQ>e}3PL!W z3!%V*#nsYDpqfyhiU&Lsfezx>?}qkc@maP-cqo)m+b*ynG%X4i!~C8H(3{(h#!_(( zpx(9IS_T;5MW4Yx3JlM+N@-C{s7iqep{~gW>YyFQ89p>Zr4CwwIpOM+2q@+Sp>8z= z2IoOB;}6)F35*GKSBdC^`pk*I{6Ar?OC^>yLbC*g=R;|lSzG|9kF1E#Cm~BHu=+13 zwLA746?t$mpmxU_WYk>>c+f@o6c{hH<~TDY)OuntEQ3;KoSa%FgiE$XzXN^?O0B2t z00-AtD35KlQ=lxW@r_WfJ)?KOIPqY*2}-@#CLUVsJJ!EtQ3IY>o@J7J#J8&6I%-f>@&3&%u{eUKVxaBPIX<$S+Yo~k^ zkN0H=jn9P8+fc)=uC#K4EBbZxd{>ynv@jIzIt?y&!HGm`KrHSL`ySc&s^c_f7iRI6#gfo+dzcN z_W#smpoGvAdEl9+9?U%R+*1}FEIl~jlLU5vc`)Hat7o6kd-h;JXxiR*u;h0z_MhE* zAfX~O=A8!vu1AZ*p1DF}9#Uyw!KWxpGfNM~|G%&MkNc%8>jXY0R2I?W*OJmu8bg;h z+J{k8MC>^Ipn{W!Q7}vtClr5lwx=mI)Z@}r8g8sO{z?!#JJ}KndK$zcMR8o#s4yI% zG?#tRD30J>-tbt9o{i%=?ifGGm_5N5BLAzegf&M%w9%*x2My0roF;ZVg5ay9^Y2m zN1+l(T^baQ3MYlDM~GqO3JZlLF#2g1x|PBj3+twzNcQ3eNF!?R4|m`IV;inRt&k}gogOY=4qas`2O*qe6p%M5K zxFR4t=I&&Y3w-?$7g2UP8x$_v(AwFi#Qt$sWg@|IqQi_a%>l1YUSmg87vmNmuCwFw(7hV5A$i zDl#x4edcfAF4=r`z=$-QKF(eG{Gkiw9f9%M!xtS2-O-O6dFi2Jb7)#L#bqc3j zJTg9B*?orU(~S0QOZFMZsD9LX>Iz`Y5@C$Ua6n}2hKBQPT*y9J=6~%Nowu=9f8$8U zrS7vF=^O4Vj`55x;okrBd^;N&uKt%tUVeV^{d)nV^uX^+(+5Px^C8~m|MoTsbBT9( zPudo*rUsfbsBi+(acNe#`hGFYLSd;edxEF$IVC-v0XI`D6;=vc zg`L7);ixb>(fdhVS}DvG4o~s)jdVoSNdo7m3M@|+I6cjg&hLSAc~GC?k+c*Wg{{I~ z;h=C-I4R7YF1oZ*m@6z5RzO;`wZdMd4+=+x-H(cyUx0L$S?v;$fpqE_5J}sp_(@NH z`Ik47i@j3?b_xfDv%>7BM4P$7LSdz_RoE%)6^;rgg|os8@3G>kQ>{sf;_!|Z4LKj>_@0`kcec<}{iY@Oo@;QO)Pdz&xuHfhbT6gI&4X2jB4;FC zst}O6)C8u5I^cb?f9GR`df@wRy=AM=0Hha@1=5SiMnpaVZz=Q60}p&9DKr7;udU5M z`fF?12Yi_H#{_dAMHWCBd;bwnkJq5t&4<8v4cfccvmY3*L9=h}zJuv&&?dAQZ}GMC z!1!}%h6^I&%S6KXa+Wauju_!}Px8;3_VsR2 z6B1t{TcLg%+So^6+%+9H*`OIU_yBZs~zBONOhDVUqFK zhHZ|(DD8CIxymLvTRrBm1|92M3t@HXR%6)TjT>$vw|QoR83Sw`U?@rB;z$(R@`RM(F2 zGgTkvn;of8tFZ1w8( zj|{ZP>E|6|LJWuJiOBXG5m`K+BW=Sqz_@Ae{d|E(UbO3p>jj-XWPO&YfZuws|H%e> z(y0f=h}O4>cFe$=@f4M{*`MoeQsnA|jxlxCW)6(0^XII9@pzuK*#M6?!ndL4r|%leMEFv%Yl?$04co$#>)_FJ{KSz>vFsyl-kUH z*Q>{K0yfF>Wsb4K+K94uFT=|ElJW<5<^38c+Y0cmU2z<3RX6>5O-)SO`lq(VKAHgOOxj#PwY3j;f_Xxtg6<8?De?vreK>D;B7(X_* zsT)*epo~0tRx+XruhxnKpl>7Ki{6|>b}DjIkrlqupv}<$X>+tdS~i_Z?^XJ!A}1BO z0ORdCHq7i#eRwp<<^3V0&xoXDQ+(Ll{P2az&%G-kMK&MtNZK4VekD#jY^$(SxS%S{ zeD>?!6`J`HNL}g`4y~tCWR36P2wNak?|@Xj2hw&QfbrTH+eRahE_4?lRj*#-U%3&E zc;hPVo0G!ftDa7gqrwSDhsyeEo*vKF`F^CZQ`iG<+IRFiGqdyoNb56@cJ1OHz52~h z@mEhcIWYy0};G6%GnV;1}?sQ7hDb)4Rlr0mzF1NQ+?v(jGSf zX+mb;zKfC(6?c2}_$#H>rAA?|F#ndPpS(Y96G+u-Ae~TpAT7n!J>Di^3B3G!imjOs zyy!6rR)~!E8rg_yAa!Ylmo?BaByiVt>08iMWT~)L*eL82_6i4ulfqeHw&Ax;d6)`w zg{8tuVWY5B*a0`symM=7_9}8zI4PVJE(%vChzZFR779y+wZcYWtFTu%C>#~e3KxZ| zLXBNvp|Da|D{K{Z3I~Ow!b#zxFnfX;rouvDrLcIiw;A_X>^5(uuzQL}#)o?Q8mpx|LVOSTt#KM{G;83|Trb0JHw6*j=Q!ETYQikyKlOMWtao)+AH9BKe%VBP!6QCb^)Z$SHOc0 z-IH_)uLZySlXq^lIRPm>f1P*f;&&zKg~C!{t*`-Jmma8F;GPr*g`>h5NS$ARM_%;B ztzj1b86(20r+w7{_r1uUDK%`~cC@9_y5tW1F!xIX4W~ZY#{Pgq|9{8o?P%*< zACc``H84hGkv%X*WH`UW(_@k?eZk)Q)^mI<*>NfVZyp&3D~qiDzvBiXZATb@Z~3)k zDUQH61X<*yu=u*SdDA^VvNg;K82gcRegHG@AWR( z*r#pi2Z3*Tq2I*29}HK(uO99DyA{f}q21N&flr-$vWeB8zYFFb!+q#b15; z)-KZkW0Ebh1;&KCfcbDTXtKc3_m-y_&*A~3#3AdK%346CPNtz#wI z9@hc6`Zl!4;bc#b!>?iWG)GDwfpmslRAhdNr_=ic0^^Hmn~=cxVwy0%nD$A2t+yGw zjP}whVlK5e|+OrmTuLDKj%o>i)Uc;(>8JWS04E){EuD;NRizw9vO?)(g%g3 z!tB#r$adHUNQJV=Bd@#1j}TU<|BS%;Hb+WdecthofAn-4QMV1P&F&75j3bzjeH+^4 zR{M7%a@dA;WEgPm70cVsMtd7 zkXFcy$XFq^7^ZJ~=XXE-i(7?eAa%aZ{Hm9*RXA)zTZ-d0wCc0MZigr|D4Y~73iI#s zHe)GT^+I6z&BvHzCwUhJ%5BKR2}6QyP>6bK-%uZ_ln37 z7*_(8z5ppbd!$HT0V%yySSy@>$3Ojp*dg+Ae5x?)fpOP}Fzy-| z4v4(*AN=+{;RWCDoy%}WBn>lrw~zf8OtwX?fE1Z4tQ0n?&30R)ZOsn&U|b$}=YhvQ z+GpO@XTG1eksX2xg(HwI@@L?+XZUr9T`v{?1&aaq!3-;4jNPzN*aA1+@6Gr%7%)B? z62>MrT=kxI^bxJdp( zHvmt>M`0_JAMollpUnpyV|iO-cCBL^E)4rS9Qo#$!tC!n@{Y~qG|?-}XAznIJqo?n zPyUA;n(7?|@Oa#Su?4nz<{9gMvkUP_#c_{gyzI!*2jJd^`EiNxv||$-5qaRb-g%2G z&+s-M|Ej_Y8aBX~CBs(X1l-YlX=~Zcz*}DIg{<@av$^wz3vlmYJ~4*bnI3sLwt(RZ z7-KigfiWVF{5em*{!8Abb-u^D?CA@&QQ@R;z>j?Rya}W)d?z5?I$pinyY%&!CclEI zfz)OLq&6%3Vb|Nw_hxLEGmt*EWY>5E;RuYaY3bRu9(ni^pRl#z0`J_vv4tzVh@Yk| zmw3GYs`;2}PFUCQ`|~ya1niOcAHDO1N7BXKqHxA9rD>KHAk9*ClDEk-52WX;a^Mkr z{cT0or3Of6?MY#dKUY8#QYx$zHVRvXox(xksBl)eC|teXr;bK6<1e1lm-2!1oqiyF zr=MNyZSH=bzfs>Nxd7gQf`%24zUU9!pGNj7eR!)Ey5&oLN7rvt055vk8Mc!B1bw+j zo_*E@h7Vb<(YbewZwIVP<;NW3YRz85yWD{E^lVuFjpLd4A1%H9tRpS&{%=Kk{&|7T zw;ZFo-DBy1u|9o7_ju%;cl*CSpAzpCg<6HH`?$JY8s*;>>BC|E2HWVTKN${;ep;7W zyz4e1?H+Xxj7a;qx&Y&=jej^a&K;`6E5xp8v+sAj`Y+ErG(TD9m)XGB5ey69wIB8EZ&<3xT193@d!cxO-O_tt?68Ef z!x|2VJQ9O2995g`W4zE+Z}fiJ*oSRs^En;kk+GHx%O7xzwPaW+EOL*GS+d9)7_(&9 z19$&+@|u(ZNGIsPy_Y1Bql%n>N5AT^>l%KBoq?A>+Ph}e7vSkQiW_E+^$5beaEukQ zzJT9R0Aqz1mcSUhVGWG28#XGkQ<259z50IqlJ2?d>g8FEu^(-&u>!`s6f80@b}ozT z5P3A(w#We(Z-6JD% z$aBzN!vYu+Vz>a~_Z^1yTRc6+Zu@2fjIkRQZ^tmvd0VtYAUf~M8yG(*FkHRE)1&i- z^`(vv|81WT!xp$3y)f*7ci~7zNITbr$QZjtE|+_uIM&&SR__*A0%LhwdIgM4Y*?$v zMn(3(ST>fP|DIRBC!NnOFrCjCNb{NP_jH=i>h~RK>N?A38&ey-l* zZC;IIv*8GgYX-ya3XhCo+9XfFC`5P*Hj6K?1D<|S<5z%&B`}6**Z||OX4nFERaZoj!fscPgb?E33f1Gjf$m5Ule;>R)u=K!4H(Xug5%;_)*^6u7sSkR-wP`z7 z2aI(4w7US~kZ$MA)d#&z!X6mu*5;saR5&TDKIDa7bE%)stor<6#~8cKQvMOgxDxP1 z3ydoP!vYxnG@Neq^m8$VKA->HF;<90mY;Ntsk2!s|IBeOnlY?_QHbzSd;KS7hQ&>u zeisTl{;lJ|U-bp{*+YZhQ39h)!xp&f#s2xouIIaL>C-xWxZT^l<#)VI!x^|2Yv1Sd ze|qHY&q~@Xf$<62ehyXvqo4jmJcXUY{);?JZ}T4=&ndm1KJyRrMG!M@*Z@y^!>`&( zHe9Vd^7xPW`=|{IU@Th08W=mQVF!#I)^JvltA~3biY$P!YkPHI94-ti6Sr~^~kf)jJ>))kj`;| zdye&VD-?M0>AsClU3V$)n0F)_Zu&v){KbbpqU@M~`%dr)@vF*XJ@URc`U-i|I6R|*S-rNTyG3yhh!LY;~nRpg{_2EG<|B&^Ns$vz^w|Cs~n zR%Z#k@UncJiF@h5T`9J}`z}o)J0NZM9!SF+wxO-M5lEe%wnf^AW)->IA5!(~DZJ*b z%@vTU=PI(eKcw^$ku)KJ6j`Y@YlV%%78tL@w-E(W^-iT{C#eYmQu+!=ozH=EFFlZd zR;eNjAVqdS+HgIPx-_WBNks#||K-zFKke1D&(zBDj(=?(S__lq?hyp3HP?62kMVsA^3Y-+q z3Kt*^Gu!2D2HIgQ2hzS7$el+dVFjc!Y#>E;Dt%RZm*Rt%EruLOUu70R`YN*m(ywug z^SsT|zM9++u7UIscvM)v$kQouQdquNL^cXXh1L0AdJm*74M6JB2&68xFZV88b4>CzZU>|n7XJR!ue8fGwzf=V!d#u%S81OFHfh%PyuN%)Id4}^+1}C>|%c*JK_8R$Lrsl zysbU(>sKYXAd-%s*#VCrtp4Kc^&DhT=xH|0Rv^83b@iJb$;Txi zMb^NmZiTu?Pv1}52d3}59f0v?2Q9sQz368Lr0N5Z`Wd+Ee{gOOG!yVTPO$Ni3! z{vD(ZcLgFpopfmc?n_5B0%&XOZ~XhD&A?mk^zRL=OFbgb zJ?~xX;$D3O?mIuh>VsZ=-!~GRfVAEFYdw-T9FR8gq%i-Gryn~e^VtJG_j&&%vfa=K z-17w=k>70m6HmWyXEIAI@ZcAdu~(n)Na}nby|p{5-(fRO8!iXZ!KwgW_~)KslUxDm zWLm2>8zAj*qlygNchzUMHr%Yz^IuS%2U2=602e@eDd;Q5z)B-utPasXca ziBE1tj=*<(BWZI2(!N>0*yoe>xE@HS@zf^GAOrnWY>Udz>AYifw4SVYvioxLeOo(t!tdCFv!QKG@E{Sl-t8`BYD%LuLEZjuCn2 z8?8g1CV&xXr^xX%5xF{@+w_y^ZpQ;Z?*BUeEJxbd$LBfHFel(av~7{gy_c{5&91&N zIlpKB?06v#nHISMKI_h8C09U7uYrf(okRxSfhn{$2bDeocVbakE*Y*^9R%PKuWI=NuBS36gdFzo02Y# zDm}~I{jazC3V2JJo&%|R1KgiR22z(=l|BLYyf#@Ofs{V0^kRp1{+Kj9kkU&;Qs+D1 zfqhB!KuYfsdGIw!WFTGe42XQ}tCGl3RiA)#J_@9B$Bali@deUun0=Q|$PM4{HD$X@ z4!keL61eM|Nn{PYFvTX?OtFhLQyikryOTDjXfwqn+DvhEn0NlZ6m#INZzXM(zzb8X zfj6XBKGfT!KZ=y^6xb?U6c!H?>5alc;i9lusWuf33P*+6!$q5=!d7AS-JZ@@0zkSF zD1dw=pdvfqDfmN1wmxTIoFWZ}@8RlphFu-y7=`?r;rlq!(rX}pXae4TFu5bqtH?n` zE-Es6q*tdlbKot9J#p*QlRw%c={lqa(%47f-Me41l|BRcIs|yf&9B~y%pSwDWD^qj z*HU;q1^D^z;$$?b8ROyvUuT}b#U1P(%@ZMza2)yy@&)?ccfnP{($<9bW_D_@a zz=KapK2`_rfBe7OYI9N51F!vl5lPjv9o~5=6i9_uh@^fNz}tT+DO4(~6xIq`g`L7) z;izy@I4fKfuD;7>iH4agEEJXsD~0tov;%ds4Q)T_6!zQD(uZwm>7&9~;i51*Oiaj1 zVWF^8SShR(HVRvXox)z>5SZ>0bI>jUD=sg4J=)HQnhzz{^gumH3R2GP&^a@CkwQ93b>77dNRr;XPXB8Pp zU0PIn{-Zv2iY$OMbtRC-UaRy*rMD`5P?3SurBS6XK#I(EsjUg5&KE#RFI9S_(pwc7 zNNsj1eFReEq}rTSdiD&l;Z{J}a5<31Ua81H>Qb%JJCz4Qq2fD}2aHW!t?D#h4y zAdS5M(%5Si8Ax4fReBGk$U(I^s`Oc%iNRf?dvsLN6N*`4EsL~e|8Ax5q zPE|WBkRnSUZTCv0H!3oa+U!*N0HnxKwK=KuMWtsyCAQ`YNE@yIQuR`$S1P?#k%81^ zr_x6tMNX>CS*2$`t#()-ZMYmrW3N5YnPRhykkA5{9N(kGRk zoh~+9Aa#C)NZN2EkRmJ9X06g&mENiJUZqbeGLX77tMt`wvBTy--kLxfd!^EAmENfI zPNnxMeNgF>N}pBwqSEu9727?KrmjRJjlBj^WTV<_ReG<|2bDgm^hHGmQkSx4svQU^)#M<7K` zs?AxYug(yQHjvsZ5J_XNfD~D)HXD`Rsq|i@4=R0Dk%82uMWyGJ*zN_8wtESrvDYfS zQR%HpA5>%@b$(Ro3y>nSXQ{0Tq|O&WN-tG7N|oNI$X2!4sq{gmk1Bmq>DhC{z8OfJUm=plUIHny zQf<~My;bR*O7B(rq#^^UOS4K}{hV5~K;D`_8hfSEYn9%p^j<{5WQnRr;VJ1F1`sN?(8!nLSsmNBmAke5Kl~fwx@o z*IQp@Ho)VbntZ?60_is+fz+i=6&h5VfmD4&&t)&7;oiA0PKq^!r@{Ug@4+Gb#P@@X9Kx(rCZl;|Nq^avwp;5INNW+{E zxhpLcNQD+vD0_i#4r+4+r1U@1Pk&-u5iP^Sv@Kx%UU(!>9OH1<&ynpK;DG@lD1k4b+G5lDqrFBID-2U42_kkSKb zm?a|Vw-}WwRI5UbDilbCT0~x$4zp8*dR1ruQkx_2K-y*?jeSyu7S(1T&1Y8o%-@j~ z3Zz0gBIyuR0IAIqNa=wz%nFh8E0|gpYE+?C6$+$69U^z7Key>sp+OZIfz;*{k$8cD zU55nH*k@HJ`vsqQYBP}LbA?FyQBoikDiBHAs031*6_C;csY^8?ccnjxYE+?C73x%> zKq}NDl7FS83XQ7J1f(|SX!8}xPh$dU?29V2I!|n)K$_1Skvr30A_r2T5|OlxDj>C4 z11UX_hS?x;S6aPQg*sKJSA_zp(16Izw9u#uO{&lgq&64ef%F$ufi(8)Md~yGq&9OP z&1V5Tm=+48LX|31t2P@Tr3X@%T10NpFTYfwUKJWtp+G7$B62e=G^s+fDzpHp&FsZK z^YpW|KpOiBk+i%EAhj7t+o(k39ckwSsZgy7H9%^!1yXt-b*V$-&etV}i(VBPRH0E7 z3Zz04A~(}QvnsTxLfQH1xCGplwz&e**mEFlqf)gQNb^}Cl75*ONQD|zs0C7+9gxxk zY3h1J(ho2PRcKU&CRHeq3eAY5ABrxjQ1%kv95kX8klM_F^i$2iW7D5J2hxNTNT&@~ zsX~F&r5cgcX0O^Dfw!NS+$0J7>W6%|cEK~NHUl>wm_+6;_5YyG7eH#W1XA@HNSzNn zkQVAyp;5INNY!UmDDYrfD1Vt4Q3<3rDuxbdK5mKq=pvj@`DjX)|i0jbao+)N8C zKq{2KT#X1wg(@Hws(}a6LJg1#4L}|dkP1yeDl`KRriB(D6)JvFjR;7EDj*fAfg6XX z+XzU7dLWMoNQEXK6`FyYX`uy3g^ImuL_jK30jW?8JdhS@fK;dl@`!*`XaZ898F(-) zv;e74@gLNPfK;dgQlSP&Z3b@qn`FZcs?em`45aFdDipYx7Ak&8jHm)qn>CO}1U!(o z*#c>p1CX}S1f)VUkP0opgK42`(>qUv3Lq7#fK;dkQlSR8aYV8?S|Ak~fmCP$QlS}0 zg%;puS}6NvF`^Pkg=!$R8A#Pz)n?#2h-{mBB@XVq|OIYp-vSF+&D7XaFZ&u0IALF6`n@bSHR7*%^XNmR{?oM zKq}M%sZa+zkQVBJRA>THp#?~VvRA4R0S~5yav&9|fmEmgQlS<|BkEO~fg2A=w$ZE# zWv>#Q52WfjBB}F%n`xmMku;(fNNskidJjC1wmAT)`V6E(*{}JCsL%>XQ+$s0C7?4oHQ1;KoCf%`pI}&;q1F*#&AuKq{02H`77|kP09iCBH`8SkxS@~^0uf1vpuo-a za1pqnkdE^aNvEE`&Gd{AxS^2F?-5B?7lE7Ul|bNzLb}+CNV?Pv+)OVE12+`XMSeum zM~uMD^y5I_hC=#q7m@VQGH^5f7#O&rkUqahB;7p-+)O{|*T3~ad)DOKuOu%O3B2f& z$s0TZ|KR;1@`hgEQSVK#L7`K2B(L!ayy4y?vO{FN^u}IT5qNG|C~)_alYRys`+t9E z>p_%WwHdfS9ed!F2a+z;hhJ;6wE4qH=Ud=iH{QBcr~_XAIDxm}{gqBUw)y#K9T#ob6e?w2T~ljRkt<+&%q0I45w|8*3ZDtrELcO z%B?4?GyP4=`4K*K^bVrHH~!N4Z+OK!nGyL{Z}TN-)yoHa`YV1UxnCD}%0FyP^1lFt C(qiub diff --git a/src/ast/rewriter/seq_subset.cpp b/src/ast/rewriter/seq_subset.cpp index 2fc4d1f71..f12bc32b1 100644 --- a/src/ast/rewriter/seq_subset.cpp +++ b/src/ast/rewriter/seq_subset.cpp @@ -19,7 +19,7 @@ Author: bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const { while (true) { - + if (a == b) return true; if (m_re.is_empty(a)) @@ -30,7 +30,7 @@ bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const { return true; if (depth >= m_max_depth) - return false; + return false; expr* a1 = nullptr, * a2 = nullptr, * b1 = nullptr, * b2 = nullptr; unsigned la, ua, lb, ub; @@ -39,16 +39,12 @@ bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const { if (m_re.is_dot_plus(b) && m_re.get_info(a).nullable == l_false) return true; - // a ⊆ a* - if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth)) - return true; - // e ⊆ a* if (m_re.is_epsilon(a) && m_re.is_star(b, b1)) return true; - // R ⊆ R* - if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth + 1)) + // a ⊆ a*: if b = b1* and a ⊆ b1, then a ⊆ b1* + if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth)) return true; // R1* ⊆ R2* if R1 ⊆ R2