diff --git a/.gitignore b/.gitignore index 2d268c988..df4e3266d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ *~ rebase.cmd +reports/ +crashes/ *.pyc *.pyo # Ignore callgrind files diff --git a/src/ast/CMakeLists.txt b/src/ast/CMakeLists.txt index 6a50c3b05..dd1db135c 100644 --- a/src/ast/CMakeLists.txt +++ b/src/ast/CMakeLists.txt @@ -46,6 +46,7 @@ z3_add_component(ast recfun_decl_plugin.cpp reg_decl_plugins.cpp seq_decl_plugin.cpp + seq_derive.cpp shared_occs.cpp special_relations_decl_plugin.cpp static_features.cpp diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 4453c94a7..b6e472b1a 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2914,15 +2914,11 @@ bool seq_rewriter::check_deriv_normal_form(expr* r, int level) { #endif expr_ref seq_rewriter::mk_derivative(expr* r) { - sort* seq_sort = nullptr, * ele_sort = nullptr; - VERIFY(m_util.is_re(r, seq_sort)); - VERIFY(m_util.is_seq(seq_sort, ele_sort)); - expr_ref v(m().mk_var(0, ele_sort), m()); - return mk_antimirov_deriv(v, r, m().mk_true()); + return m_derive(r); } expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - return mk_antimirov_deriv(ele, r, m().mk_true()); + return m_derive(ele, r); } expr_ref seq_rewriter::mk_antimirov_deriv(expr* e, expr* r, expr* path) { @@ -3138,13 +3134,18 @@ expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr* VERIFY(m_util.is_seq(seq_sort, ele_sort)); expr_ref result(m()); expr* c, * a, * b; - if (re().is_empty(d1)) + if (m_re_deriv_depth >= m_max_re_deriv_depth) { + // Depth limit reached: construct intersection without further decomposition + result = mk_regex_inter_normalize(d1, d2); + } + else if (re().is_empty(d1)) result = d1; else if (re().is_empty(d2)) result = d2; else if (m().is_ite(d1, c, a, b)) { expr_ref path_and_c(simplify_path(e, m().mk_and(path, c)), m()); expr_ref path_and_notc(simplify_path(e, m().mk_and(path, m().mk_not(c))), m()); + ++m_re_deriv_depth; if (m().is_false(path_and_c)) result = mk_antimirov_deriv_intersection(e, b, d2, path); else if (m().is_false(path_and_notc)) @@ -3152,22 +3153,32 @@ expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr* else result = m().mk_ite(c, mk_antimirov_deriv_intersection(e, a, d2, path_and_c), mk_antimirov_deriv_intersection(e, b, d2, path_and_notc)); + --m_re_deriv_depth; } - else if (m().is_ite(d2)) + else if (m().is_ite(d2)) { // swap d1 and d2 + ++m_re_deriv_depth; result = mk_antimirov_deriv_intersection(e, d2, d1, path); + --m_re_deriv_depth; + } else if (d1 == d2 || re().is_full_seq(d2)) result = mk_antimirov_deriv_restrict(e, d1, path); else if (re().is_full_seq(d1)) result = mk_antimirov_deriv_restrict(e, d2, path); - else if (re().is_union(d1, a, b)) + else if (re().is_union(d1, a, b)) { // distribute intersection over the union in d1 + ++m_re_deriv_depth; result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, a, d2, path), mk_antimirov_deriv_intersection(e, b, d2, path)); - else if (re().is_union(d2, a, b)) + --m_re_deriv_depth; + } + else if (re().is_union(d2, a, b)) { // distribute intersection over the union in d2 + ++m_re_deriv_depth; result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, d1, a, path), mk_antimirov_deriv_intersection(e, d1, b, path)); + --m_re_deriv_depth; + } else result = mk_regex_inter_normalize(d1, d2); return result; @@ -3177,13 +3188,22 @@ expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) { expr_ref result(m()); expr_ref _r(r, m()), _d(d, m()); expr* c, * t, * e; - if (m().is_ite(d, c, t, e)) { + if (m_re_deriv_depth >= m_max_re_deriv_depth) { + // Depth limit reached: construct concat without further decomposition + result = mk_re_append(d, r); + } + else if (m().is_ite(d, c, t, e)) { + ++m_re_deriv_depth; auto r2 = mk_antimirov_deriv_concat(e, r); auto r1 = mk_antimirov_deriv_concat(t, r); + --m_re_deriv_depth; result = m().mk_ite(c, r1, r2); } - else if (re().is_union(d, t, e)) + else if (re().is_union(d, t, e)) { + ++m_re_deriv_depth; result = mk_antimirov_deriv_union(mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r)); + --m_re_deriv_depth; + } else result = mk_re_append(d, r); SASSERT(result.get()); @@ -3199,7 +3219,11 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) { auto dotplus = [&]() { return expr_ref(re().mk_plus(re().mk_full_char(d->get_sort())), m()); }; expr_ref result(m()); expr* c, * t, * e; - if (re().is_empty(d)) + if (m_re_deriv_depth >= m_max_re_deriv_depth) { + // Depth limit reached: construct complement without further decomposition + result = re().mk_complement(d); + } + else if (re().is_empty(d)) result = dotstar(); else if (re().is_epsilon(d)) result = dotplus(); @@ -3207,12 +3231,21 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) { result = nothing(); else if (re().is_dot_plus(d)) result = epsilon(); - else if (m().is_ite(d, c, t, e)) + else if (m().is_ite(d, c, t, e)) { + ++m_re_deriv_depth; result = m().mk_ite(c, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); - else if (re().is_union(d, t, e)) + --m_re_deriv_depth; + } + else if (re().is_union(d, t, e)) { + ++m_re_deriv_depth; result = mk_antimirov_deriv_intersection(elem, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e), m().mk_true()); - else if (re().is_intersection(d, t, e)) + --m_re_deriv_depth; + } + else if (re().is_intersection(d, t, e)) { + ++m_re_deriv_depth; result = mk_antimirov_deriv_union(mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); + --m_re_deriv_depth; + } else if (re().is_complement(d, t)) result = t; else @@ -3253,15 +3286,21 @@ expr_ref seq_rewriter::mk_antimirov_deriv_restrict(expr* e, expr* d, expr* cond) result = re().mk_empty(d->get_sort()); else if (re().is_empty(d) || m().is_true(cond)) result = d; + else if (m_re_deriv_depth >= m_max_re_deriv_depth) + result = d; else if (m().is_ite(d, c, a, b)) { expr_ref path_and_c(simplify_path(e, m().mk_and(cond, c)), m()); expr_ref path_and_notc(simplify_path(e, m().mk_and(cond, m().mk_not(c))), m()); + ++m_re_deriv_depth; result = re().mk_ite_simplify(c, mk_antimirov_deriv_restrict(e, a, path_and_c), mk_antimirov_deriv_restrict(e, b, path_and_notc)); + --m_re_deriv_depth; } else if (re().is_union(d, a, b)) { + ++m_re_deriv_depth; expr_ref a1(mk_antimirov_deriv_restrict(e, a, cond), m()); expr_ref b1(mk_antimirov_deriv_restrict(e, b, cond), m()); + --m_re_deriv_depth; result = mk_antimirov_deriv_union(a1, b1); } return result; @@ -3909,221 +3948,6 @@ expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) { return result; } -expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { - expr_ref result(m()); - sort* seq_sort = nullptr, *ele_sort = nullptr; - VERIFY(m_util.is_re(r, seq_sort)); - VERIFY(m_util.is_seq(seq_sort, ele_sort)); - SASSERT(ele_sort == ele->get_sort()); - expr* r1 = nullptr, *r2 = nullptr, *p = nullptr; - auto mk_empty = [&]() { return expr_ref(re().mk_empty(r->get_sort()), m()); }; - unsigned lo = 0, hi = 0; - if (re().is_concat(r, r1, r2)) { - expr_ref is_n = is_nullable(r1); - expr_ref dr1 = mk_derivative(ele, r1); - result = mk_der_concat(dr1, r2); - if (m().is_false(is_n)) { - return result; - } - expr_ref dr2 = mk_derivative(ele, r2); - is_n = re_predicate(is_n, seq_sort); - if (re().is_empty(dr2)) { - //do not concatenate [], it is a deade-end - return result; - } - else { - // Instead of mk_der_union here, we use mk_der_antimirov_union to - // force the two cases to be considered separately and lifted to - // the top level. This avoids blowup in cases where determinization - // is expensive. - return mk_der_antimirov_union(result, mk_der_concat(is_n, dr2)); - } - } - else if (re().is_star(r, r1)) { - return mk_der_concat(mk_derivative(ele, r1), r); - } - else if (re().is_plus(r, r1)) { - expr_ref star(re().mk_star(r1), m()); - return mk_derivative(ele, star); - } - else if (re().is_union(r, r1, r2)) { - return mk_der_union(mk_derivative(ele, r1), mk_derivative(ele, r2)); - } - else if (re().is_intersection(r, r1, r2)) { - return mk_der_inter(mk_derivative(ele, r1), mk_derivative(ele, r2)); - } - else if (re().is_diff(r, r1, r2)) { - return mk_der_inter(mk_derivative(ele, r1), mk_der_compl(mk_derivative(ele, r2))); - } - else if (m().is_ite(r, p, r1, r2)) { - // there is no BDD normalization here - result = m().mk_ite(p, mk_derivative(ele, r1), mk_derivative(ele, r2)); - return result; - } - else if (re().is_opt(r, r1)) { - return mk_derivative(ele, r1); - } - else if (re().is_complement(r, r1)) { - return mk_der_compl(mk_derivative(ele, r1)); - } - else if (re().is_loop(r, r1, lo)) { - if (lo > 0) { - lo--; - } - result = mk_derivative(ele, r1); - //do not concatenate with [] (emptyset) - if (re().is_empty(result)) { - return result; - } - else { - //do not create loop r1{0,}, instead create r1* - return mk_der_concat(result, (lo == 0 ? re().mk_star(r1) : re().mk_loop(r1, lo))); - } - } - else if (re().is_loop(r, r1, lo, hi)) { - if (hi == 0) { - return mk_empty(); - } - hi--; - if (lo > 0) { - lo--; - } - result = mk_derivative(ele, r1); - //do not concatenate with [] (emptyset) or handle the rest of the loop if no more iterations remain - if (re().is_empty(result) || hi == 0) { - return result; - } - else { - return mk_der_concat(result, re().mk_loop_proper(r1, lo, hi)); - } - } - else if (re().is_full_seq(r) || - re().is_empty(r)) { - return expr_ref(r, m()); - } - else if (re().is_to_re(r, r1)) { - // r1 is a string here (not a regexp) - expr_ref hd(m()), tl(m()); - if (get_head_tail(r1, hd, tl)) { - // head must be equal; if so, derivative is tail - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv to_re" << std::endl;); - result = m().mk_eq(ele, hd); - result = mk_der_cond(result, ele, seq_sort); - expr_ref r1(re().mk_to_re(tl), m()); - result = mk_der_concat(result, r1); - return result; - } - else if (str().is_empty(r1)) { - //observe: str().is_empty(r1) checks that r = () = epsilon - //while mk_empty() = [], because deriv(epsilon) = [] = nothing - return mk_empty(); - } - else if (str().is_itos(r1)) { - // - // here r1 = (str.from_int r2) and r2 is non-ground - // or else the expression would have been simplified earlier - // so r1 must be nonempty and must consists of decimal digits - // '0' <= elem <= '9' - // if ((isdigit ele) and (ele = (hd r1))) then (to_re (tl r1)) else [] - // - hd = mk_seq_first(r1); - // isolate nested conjunction for deterministic evaluation - auto a0 = u().mk_le(m_util.mk_char('0'), ele); - auto a1 = u().mk_le(ele, m_util.mk_char('9')); - auto a2 = m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))); - auto a3 = m().mk_eq(hd, ele); - auto inner = m().mk_and(a2, a3); - m_br.mk_and(a0, a1, inner, result); - tl = re().mk_to_re(mk_seq_rest(r1)); - return re_and(result, tl); - } - else { - // recall: [] denotes the empty language (nothing) regex, () denotes epsilon or empty sequence - // construct the term (if (r1 != () and (ele = (first r1)) then (to_re (rest r1)) else [])) - hd = mk_seq_first(r1); - m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result); - tl = re().mk_to_re(mk_seq_rest(r1)); - return re_and(result, tl); - } - } - else if (re().is_reverse(r, r1)) { - if (re().is_to_re(r1, r2)) { - // First try to extract hd and tl such that r = hd ++ tl and |tl|=1 - expr_ref hd(m()), tl(m()); - if (get_head_tail_reversed(r2, hd, tl)) { - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv reverse to_re" << std::endl;); - result = m().mk_eq(ele, tl); - result = mk_der_cond(result, ele, seq_sort); - result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd))); - return result; - } - else if (str().is_empty(r2)) { - return mk_empty(); - } - else { - // construct the term (if (r2 != () and (ele = (last r2)) then reverse(to_re (butlast r2)) else [])) - // hd = first of reverse(r2) i.e. last of r2 - // tl = rest of reverse(r2) i.e. butlast of r2 - //hd = str().mk_nth_i(r2, m_autil.mk_sub(str().mk_length(r2), one())); - hd = mk_seq_last(r2); - // factor nested constructor calls to enforce deterministic argument evaluation order - auto a_non_empty = m().mk_not(m().mk_eq(r2, str().mk_empty(seq_sort))); - auto a_eq = m().mk_eq(hd, ele); - m_br.mk_and(a_non_empty, a_eq, result); - tl = re().mk_to_re(mk_seq_butlast(r2)); - return re_and(result, re().mk_reverse(tl)); - } - } - } - else if (re().is_range(r, r1, r2)) { - // r1, r2 are sequences. - zstring s1, s2; - if (str().is_string(r1, s1) && str().is_string(r2, s2)) { - if (s1.length() == 1 && s2.length() == 1) { - expr_ref ch1(m_util.mk_char(s1[0]), m()); - expr_ref ch2(m_util.mk_char(s2[0]), m()); - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv range zstring" << std::endl;); - expr_ref p1(u().mk_le(ch1, ele), m()); - p1 = mk_der_cond(p1, ele, seq_sort); - expr_ref p2(u().mk_le(ele, ch2), m()); - p2 = mk_der_cond(p2, ele, seq_sort); - result = mk_der_inter(p1, p2); - return result; - } - else { - return mk_empty(); - } - } - expr* e1 = nullptr, * e2 = nullptr; - if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) { - SASSERT(u().is_char(e1)); - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv range str" << std::endl;); - expr_ref p1(u().mk_le(e1, ele), m()); - p1 = mk_der_cond(p1, ele, seq_sort); - expr_ref p2(u().mk_le(ele, e2), m()); - p2 = mk_der_cond(p2, ele, seq_sort); - result = mk_der_inter(p1, p2); - return result; - } - } - else if (re().is_full_char(r)) { - return expr_ref(re().mk_to_re(str().mk_empty(seq_sort)), m()); - } - else if (re().is_of_pred(r, p)) { - array_util array(m()); - expr* args[2] = { p, ele }; - result = array.mk_select(2, args); - // Use mk_der_cond to normalize - STRACE(seq_verbose, tout << "deriv of_pred" << std::endl;); - return mk_der_cond(result, ele, seq_sort); - } - // stuck cases: re.derivative, re variable, - return expr_ref(re().mk_derivative(ele, r), m()); -} /************************************************* ***** End Derivative Code ***** @@ -4571,6 +4395,16 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { } std::swap(a, b); } + // Hoist ite out of concat: concat(ite(c, r1, r2), b) → ite(c, concat(r1, b), concat(r2, b)) + expr* c = nullptr; + if (m().is_ite(a, c, a1, b1)) { + result = m().mk_ite(c, re().mk_concat(a1, b), re().mk_concat(b1, b)); + return BR_REWRITE3; + } + if (m().is_ite(b, c, a1, b1)) { + result = m().mk_ite(c, re().mk_concat(a, a1), re().mk_concat(a, b1)); + return BR_REWRITE3; + } return BR_FAILED; } @@ -4663,6 +4497,21 @@ br_status seq_rewriter::mk_re_union0(expr* a, expr* b, expr_ref& result) { result = b; return BR_DONE; } + // r ∪ ~r → Σ* (complement absorption) + if (are_complements(a, b)) { + result = re().mk_full_seq(a->get_sort()); + return BR_DONE; + } + // Hoist ite out of union: union(ite(c, r1, r2), b) → ite(c, union(r1, b), union(r2, b)) + expr *c = nullptr, *r1 = nullptr, *r2 = nullptr; + if (m().is_ite(a, c, r1, r2)) { + result = m().mk_ite(c, re().mk_union(r1, b), re().mk_union(r2, b)); + return BR_REWRITE3; + } + if (m().is_ite(b, c, r1, r2)) { + result = m().mk_ite(c, re().mk_union(a, r1), re().mk_union(a, r2)); + return BR_REWRITE3; + } return BR_FAILED; } @@ -4705,6 +4554,12 @@ br_status seq_rewriter::mk_re_complement(expr* a, expr_ref& result) { result = re().mk_plus(re().mk_full_char(a->get_sort())); return BR_DONE; } + // Hoist ite out of complement: ~(ite(c, r1, r2)) → ite(c, ~r1, ~r2) + expr* c = nullptr; + if (m().is_ite(a, c, e1, e2)) { + result = m().mk_ite(c, re().mk_complement(e1), re().mk_complement(e2)); + return BR_REWRITE3; + } return BR_FAILED; } @@ -4730,6 +4585,21 @@ br_status seq_rewriter::mk_re_inter0(expr* a, expr* b, expr_ref& result) { result = a; return BR_DONE; } + // r ∩ ~r → ∅ (complement absorption) + if (are_complements(a, b)) { + result = re().mk_empty(a->get_sort()); + return BR_DONE; + } + // Hoist ite out of intersection: inter(ite(c, r1, r2), b) → ite(c, inter(r1, b), inter(r2, b)) + expr *c = nullptr, *r1 = nullptr, *r2 = nullptr; + if (m().is_ite(a, c, r1, r2)) { + result = m().mk_ite(c, re().mk_inter(r1, b), re().mk_inter(r2, b)); + return BR_REWRITE3; + } + if (m().is_ite(b, c, r1, r2)) { + result = m().mk_ite(c, re().mk_inter(a, r1), re().mk_inter(a, r2)); + return BR_REWRITE3; + } return BR_FAILED; } @@ -4884,7 +4754,9 @@ br_status seq_rewriter::mk_re_star(expr* a, expr_ref& result) { result = re().mk_full_seq(b1->get_sort()); return BR_REWRITE2; } - + // Hoist ite out of star: (ite c r1 r2)* → ite(c, r1*, r2*) + result = m().mk_ite(c, re().mk_star(b1), re().mk_star(c1)); + return BR_REWRITE3; } return BR_FAILED; } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 583720911..9668c299c 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -19,6 +19,7 @@ Notes: #pragma once #include "ast/seq_decl_plugin.h" +#include "ast/seq_derive.h" #include "ast/ast_pp.h" #include "ast/arith_decl_plugin.h" #include "ast/rewriter/rewriter_types.h" @@ -130,10 +131,13 @@ class seq_rewriter { seq_util m_util; arith_util m_autil; bool_rewriter m_br; + seq::derive m_derive; // re2automaton m_re2aut; op_cache m_op_cache; expr_ref_vector m_es, m_lhs, m_rhs; bool m_coalesce_chars; + unsigned m_re_deriv_depth { 0 }; + static const unsigned m_max_re_deriv_depth = 512; enum length_comparison { shorter_c, @@ -172,7 +176,6 @@ class seq_rewriter { // Calculate derivative, memoized and enforcing a normal form expr_ref is_nullable_rec(expr* r); - expr_ref mk_derivative_rec(expr* ele, expr* r); expr_ref mk_der_op(decl_kind k, expr* a, expr* b); expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b); expr_ref mk_der_concat(expr* a, expr* b); @@ -340,7 +343,7 @@ class seq_rewriter { public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): - m_util(m), m_autil(m), m_br(m, p), // m_re2aut(m), + m_util(m), m_autil(m), m_br(m, p), m_derive(m), // m_re2aut(m), m_op_cache(m), m_es(m), m_lhs(m), m_rhs(m), m_coalesce_chars(true) { } diff --git a/src/ast/seq_derive.cpp b/src/ast/seq_derive.cpp new file mode 100644 index 000000000..96ec04059 --- /dev/null +++ b/src/ast/seq_derive.cpp @@ -0,0 +1,679 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + seq_derive.cpp + +Abstract: + + Symbolic derivative computation for regular expressions. + Produces an ITE-tree (transition regex) representation following + the approach of RE# (Varatalu, Veanes, Ernits - POPL 2025). + + The symbolic derivative δ(r) maps each character to the resulting + derivative state via an ITE-tree. The free variable (:var 0) represents + the input character. + +Authors: + + Nikolaj Bjorner (nbjorner) 2025-06-03 + +--*/ + +#include "ast/seq_derive.h" +#include "ast/ast_pp.h" +#include "ast/array_decl_plugin.h" +#include "ast/rewriter/bool_rewriter.h" +#include + +namespace seq { + + derive::derive(ast_manager& m) : + m(m), + m_util(m), + m_autil(m), + m_br(m), + m_trail(m), + m_ele(m) { + m_br.set_flat_and_or(false); + } + + void derive::reset() { + m_cache.reset(); + m_trail.reset(); + } + + expr_ref derive::operator()(expr* ele, expr* r) { + SASSERT(m_util.is_re(r)); + m_ele = ele; + m_depth = 0; + expr_ref result = derive_rec(r); + m_ele = nullptr; + return result; + } + + expr_ref derive::operator()(expr* r) { + SASSERT(m_util.is_re(r)); + sort* seq_sort = nullptr, * ele_sort = nullptr; + VERIFY(m_util.is_re(r, seq_sort)); + VERIFY(m_util.is_seq(seq_sort, ele_sort)); + expr_ref v(m.mk_var(0, ele_sort), m); + return (*this)(v, r); + } + + // ------------------------------------------------------- + // Core derivative computation + // ------------------------------------------------------- + + expr_ref derive::derive_rec(expr* r) { + SASSERT(m_util.is_re(r)); + + // Check cache + expr* cached = nullptr; + if (m_cache.find(r, cached)) + return expr_ref(cached, m); + + // Depth check + if (m_depth >= m_max_depth) { + // Return stuck derivative (the derivative operator applied symbolically) + return expr_ref(re().mk_derivative(m_ele, r), m); + } + + ++m_depth; + expr_ref result = derive_core(r); + --m_depth; + + // Cache the result + m_cache.insert(r, result); + m_trail.push_back(r); + m_trail.push_back(result); + return result; + } + + // Forward declaration helper + expr_ref derive::derive_core(expr* r) { + sort* s = nullptr; + VERIFY(m_util.is_re(r, s)); + + auto nothing = [&]() { return expr_ref(re().mk_empty(r->get_sort()), m); }; + auto epsilon = [&]() { return expr_ref(re().mk_to_re(u().str.mk_empty(s)), m); }; + auto dotstar = [&]() { return expr_ref(re().mk_full_seq(r->get_sort()), m); }; + + expr* r1 = nullptr; + expr* r2 = nullptr; + expr* cond = nullptr; + unsigned lo = 0, hi = 0; + + // δ(∅) = ∅, δ(ε) = ∅ + if (re().is_empty(r) || re().is_epsilon(r)) + return nothing(); + + // δ(Σ*) = Σ*, δ(.+) = Σ* + if (re().is_full_seq(r) || re().is_dot_plus(r)) + return dotstar(); + + // δ(.) = ε (full char accepts any single character) + if (re().is_full_char(r)) + return epsilon(); + + // δ(str.to_re(s)) - derivative of a literal string + if (re().is_to_re(r, r1)) + return derive_to_re(r1, s); + + // δ(re.range(lo, hi)) - character range + if (re().is_range(r, r1, r2)) + return derive_range(r1, r2, s); + + // δ(re.of_pred(p)) - predicate-based regex + if (re().is_of_pred(r, r1)) + return derive_of_pred(r1, s); + + // δ(r1 · r2) = δ(r1) · r2 ∪ (if nullable(r1) then δ(r2) else ∅) + if (re().is_concat(r, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d1_r2 = mk_deriv_concat(d1, r2); + expr_ref nullable_r1 = is_nullable(r1); + if (m.is_true(nullable_r1)) + return mk_union(d1_r2, derive_rec(r2)); + if (m.is_false(nullable_r1)) + return d1_r2; + // Conditional: nullable is a Boolean expression + expr_ref d2 = derive_rec(r2); + expr_ref guarded = mk_ite(nullable_r1, d2, nothing()); + return mk_union(d1_r2, guarded); + } + + // δ(r1 ∪ r2) = δ(r1) ∪ δ(r2) + if (re().is_union(r, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d2 = derive_rec(r2); + return mk_union(d1, d2); + } + + // δ(r1 ∩ r2) = δ(r1) ∩ δ(r2) + if (re().is_intersection(r, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d2 = derive_rec(r2); + return mk_inter(d1, d2); + } + + // δ(~r1) = ~δ(r1) + if (re().is_complement(r, r1)) { + expr_ref d1 = derive_rec(r1); + return mk_complement(d1); + } + + // δ(r1*) = δ(r1) · r1* + if (re().is_star(r, r1)) { + expr_ref d1 = derive_rec(r1); + expr_ref star_r1(re().mk_star(r1), m); + return mk_deriv_concat(d1, star_r1); + } + + // δ(r1+) = δ(r1) · r1* + if (re().is_plus(r, r1)) { + expr_ref d1 = derive_rec(r1); + expr_ref star_r1(re().mk_star(r1), m); + return mk_deriv_concat(d1, star_r1); + } + + // δ(r1?) = δ(r1) + if (re().is_opt(r, r1)) + return derive_rec(r1); + + // δ(r1{lo,hi}) + if (re().is_loop(r, r1, lo, hi)) { + if (hi == 0 || hi < lo) + return nothing(); + expr_ref d1 = derive_rec(r1); + expr_ref tail(re().mk_loop_proper(r1, (lo == 0 ? 0 : lo - 1), hi - 1), m); + return mk_deriv_concat(d1, tail); + } + + // δ(r1{lo,}) - unbounded loop + if (re().is_loop(r, r1, lo)) { + expr_ref d1 = derive_rec(r1); + expr_ref tail(re().mk_loop(r1, (lo == 0 ? 0 : lo - 1)), m); + return mk_deriv_concat(d1, tail); + } + + // δ(r1 \ r2) = δ(r1) ∩ ~δ(r2) + if (re().is_diff(r, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d2 = derive_rec(r2); + expr_ref neg_d2 = mk_complement(d2); + return mk_inter(d1, neg_d2); + } + + // δ(ite(c, r1, r2)) = ite(c, δ(r1), δ(r2)) + if (m.is_ite(r, cond, r1, r2)) { + expr_ref d1 = derive_rec(r1); + expr_ref d2 = derive_rec(r2); + return mk_ite(cond, d1, d2); + } + + // δ(reverse(r1)) - stuck: return symbolic derivative + if (re().is_reverse(r, r1)) + return expr_ref(re().mk_derivative(m_ele, r), m); + + // Stuck/uninterpreted case + return expr_ref(re().mk_derivative(m_ele, r), m); + } + + // ------------------------------------------------------- + // Derivative of specific regex constructs + // ------------------------------------------------------- + + expr_ref derive::derive_to_re(expr* s, sort* seq_sort) { + sort* re_sort = re().mk_re(seq_sort); + // δ(to_re("")) = ∅ + if (u().str.is_empty(s)) + return expr_ref(re().mk_empty(re_sort), m); + + // δ(to_re("c₁c₂...cₙ")) = ite(ele = c₁, to_re("c₂...cₙ"), ∅) + zstring zs; + if (u().str.is_string(s, zs)) { + if (zs.length() == 0) + return expr_ref(re().mk_empty(re_sort), m); + // First character + expr_ref head(m_util.mk_char(zs[0]), m); + expr_ref cond(m.mk_eq(m_ele, head), m); + // Tail string + expr_ref tail_str(u().str.mk_string(zs.extract(1, zs.length() - 1)), m); + expr_ref tail_re(re().mk_to_re(tail_str), m); + expr_ref empty(re().mk_empty(re_sort), m); + return mk_ite(cond, tail_re, empty); + } + + // Non-ground sequence: δ(to_re(s)) = ite(s ≠ "" ∧ ele = s[0], to_re(s[1:]), ∅) + expr_ref empty_seq(u().str.mk_empty(seq_sort), m); + expr_ref is_non_empty(m.mk_not(m.mk_eq(s, empty_seq)), m); + expr_ref zero(m_autil.mk_int(0), m); + expr_ref first(u().str.mk_nth_i(s, zero), m); + expr_ref eq_first(m.mk_eq(m_ele, first), m); + expr_ref guard(m.mk_and(is_non_empty, eq_first), m); + expr_ref one(m_autil.mk_int(1), m); + expr_ref len(u().str.mk_length(s), m); + expr_ref rest_len(m_autil.mk_sub(len, one), m); + expr_ref rest(u().str.mk_substr(s, one, rest_len), m); + expr_ref rest_re(re().mk_to_re(rest), m); + expr_ref empty(re().mk_empty(re_sort), m); + return mk_ite(guard, rest_re, empty); + } + + expr_ref derive::derive_range(expr* lo, expr* hi, sort* seq_sort) { + sort* re_sort = re().mk_re(seq_sort); + expr_ref empty(re().mk_empty(re_sort), m); + expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m); + + // Extract character values from unit strings + expr_ref c_lo(m), c_hi(m); + if (u().str.is_unit_string(lo, c_lo) && u().str.is_unit_string(hi, c_hi)) { + // ite(lo <= ele && ele <= hi, ε, ∅) + expr_ref ge_lo(m_util.mk_le(c_lo, m_ele), m); + expr_ref le_hi(m_util.mk_le(m_ele, c_hi), m); + expr_ref in_range(m.mk_and(ge_lo, le_hi), m); + return mk_ite(in_range, eps, empty); + } + + // Fallback: stuck derivative + return expr_ref(re().mk_derivative(m_ele, re().mk_range(lo, hi)), m); + } + + expr_ref derive::derive_of_pred(expr* pred, sort* seq_sort) { + sort* re_sort = re().mk_re(seq_sort); + expr_ref empty(re().mk_empty(re_sort), m); + expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m); + + // Apply predicate to the element + array_util autil(m); + expr* args[2] = { pred, m_ele }; + expr_ref cond(autil.mk_select(2, args), m); + return mk_ite(cond, eps, empty); + } + + // ------------------------------------------------------- + // Nullability - uses info class from seq_decl_plugin.h + // ------------------------------------------------------- + + expr_ref derive::is_nullable(expr* r) { + // First, try the static info which handles ground/interpreted regex + lbool nb = re().get_info(r).nullable; + if (nb == l_true) + return expr_ref(m.mk_true(), m); + if (nb == l_false) + return expr_ref(m.mk_false(), m); + // info is undetermined (l_undef) — fall back to recursive computation + return is_nullable_rec(r); + } + + expr_ref derive::is_nullable_rec(expr* r) { + expr* r1 = nullptr, * r2 = nullptr, * cond = nullptr; + sort* s = nullptr; + unsigned lo = 0, hi = 0; + + if (re().is_concat(r, r1, r2) || re().is_intersection(r, r1, r2)) { + expr_ref n1 = is_nullable(r1); + expr_ref n2 = is_nullable(r2); + expr_ref result(m); + m_br.mk_and(n1, n2, result); + return result; + } + if (re().is_union(r, r1, r2)) { + expr_ref n1 = is_nullable(r1); + expr_ref n2 = is_nullable(r2); + expr_ref result(m); + m_br.mk_or(n1, n2, result); + return result; + } + if (re().is_complement(r, r1)) { + expr_ref n1 = is_nullable(r1); + expr_ref result(m); + m_br.mk_not(n1, result); + return result; + } + if (re().is_diff(r, r1, r2)) { + expr_ref n1 = is_nullable(r1); + expr_ref n2 = is_nullable(r2); + expr_ref not_n2(m); + m_br.mk_not(n2, not_n2); + expr_ref result(m); + m_br.mk_and(n1, not_n2, result); + return result; + } + if (re().is_to_re(r, r1)) { + if (u().str.is_empty(r1)) + return expr_ref(m.mk_true(), m); + zstring zs; + if (u().str.is_string(r1, zs)) + return expr_ref(m.mk_bool_val(zs.length() == 0), m); + return expr_ref(m.mk_eq(r1, u().str.mk_empty(r1->get_sort())), m); + } + if (m.is_ite(r, cond, r1, r2)) { + expr_ref n1 = is_nullable(r1); + expr_ref n2 = is_nullable(r2); + expr_ref result(m); + m_br.mk_ite(cond, n1, n2, result); + return result; + } + // Unknown: use membership test + if (m_util.is_re(r, s)) + return expr_ref(re().mk_in_re(u().str.mk_empty(s), r), m); + + return expr_ref(m.mk_true(), m); + } + + // ------------------------------------------------------- + // Smart constructors with simplification + // ------------------------------------------------------- + + expr_ref derive::mk_union(expr* a, expr* b) { + // Identity / annihilator + if (a == b) return expr_ref(a, m); + if (re().is_empty(a)) return expr_ref(b, m); + if (re().is_empty(b)) return expr_ref(a, m); + if (re().is_full_seq(a)) return expr_ref(a, m); + if (re().is_full_seq(b)) return expr_ref(b, m); + + // Complement absorption: r ∪ ~r = Σ* + expr* c = nullptr; + if (re().is_complement(a, c) && c == b) + return expr_ref(re().mk_full_seq(a->get_sort()), m); + if (re().is_complement(b, c) && c == a) + return expr_ref(re().mk_full_seq(a->get_sort()), m); + + // ITE combination: if both are ITE with same condition, merge + expr *c1, *t1, *e1, *c2, *t2, *e2; + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { + expr_ref then_br = mk_union(t1, t2); + expr_ref else_br = mk_union(e1, e2); + return mk_ite(c1, then_br, else_br); + } + + // ACI: flatten, sort, deduplicate + expr_ref_vector args(m); + flatten_union(a, args); + flatten_union(b, args); + + // Sort by expr id for canonical form + std::stable_sort(args.data(), args.data() + args.size(), + [](expr* x, expr* y) { return x->get_id() < y->get_id(); }); + + // Deduplicate + unsigned j = 0; + for (unsigned i = 0; i < args.size(); ++i) { + if (j > 0 && args.get(i) == args.get(j - 1)) + continue; // skip duplicate + if (re().is_empty(args.get(i))) + continue; // skip empty + if (re().is_full_seq(args.get(i))) + return expr_ref(args.get(i), m); // universal absorbs + args.set(j++, args.get(i)); + } + args.shrink(j); + + if (args.empty()) + return expr_ref(re().mk_empty(a->get_sort()), m); + + return mk_union_from_sorted(args); + } + + expr_ref derive::mk_inter(expr* a, expr* b) { + // Identity / annihilator + if (a == b) return expr_ref(a, m); + if (re().is_empty(a)) return expr_ref(a, m); + if (re().is_empty(b)) return expr_ref(b, m); + if (re().is_full_seq(a)) return expr_ref(b, m); + if (re().is_full_seq(b)) return expr_ref(a, m); + + // Complement absorption: r ∩ ~r = ∅ + expr* c = nullptr; + if (re().is_complement(a, c) && c == b) + return expr_ref(re().mk_empty(a->get_sort()), m); + if (re().is_complement(b, c) && c == a) + return expr_ref(re().mk_empty(a->get_sort()), m); + + // ITE combination: if both are ITE with same condition, merge + expr *c1, *t1, *e1, *c2, *t2, *e2; + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) { + expr_ref then_br = mk_inter(t1, t2); + expr_ref else_br = mk_inter(e1, e2); + return mk_ite(c1, then_br, else_br); + } + + // ACI: flatten, sort, deduplicate + expr_ref_vector args(m); + flatten_inter(a, args); + flatten_inter(b, args); + + std::stable_sort(args.data(), args.data() + args.size(), + [](expr* x, expr* y) { return x->get_id() < y->get_id(); }); + + unsigned j = 0; + for (unsigned i = 0; i < args.size(); ++i) { + if (j > 0 && args.get(i) == args.get(j - 1)) + continue; + if (re().is_full_seq(args.get(i))) + continue; // skip universal + if (re().is_empty(args.get(i))) + return expr_ref(args.get(i), m); // empty absorbs + args.set(j++, args.get(i)); + } + args.shrink(j); + + if (args.empty()) + return expr_ref(re().mk_full_seq(a->get_sort()), m); + + return mk_inter_from_sorted(args); + } + + expr_ref derive::mk_concat(expr* a, expr* b) { + if (re().is_empty(a)) return expr_ref(a, m); + if (re().is_empty(b)) return expr_ref(b, m); + if (re().is_epsilon(a)) return expr_ref(b, m); + if (re().is_epsilon(b)) return expr_ref(a, m); + + // to_re(s1) · to_re(s2) → to_re(s1 ++ s2) + expr* s1 = nullptr, * s2 = nullptr; + if (re().is_to_re(a, s1) && re().is_to_re(b, s2)) + return expr_ref(re().mk_to_re(u().str.mk_concat(s1, s2)), m); + + // r* · r* → r* + expr* a1 = nullptr, * b1 = nullptr; + if (re().is_star(a, a1) && re().is_star(b, b1) && a1 == b1) + return expr_ref(a, m); + + // Right-associate: (a · b) · c → a · (b · c) + if (re().is_concat(a, a1, b1)) { + expr_ref tail = mk_concat(b1, b); + return expr_ref(re().mk_concat(a1, tail), m); + } + + return expr_ref(re().mk_concat(a, b), m); + } + + expr_ref derive::mk_complement(expr* a) { + // ~~r → r + expr* r = nullptr; + if (re().is_complement(a, r)) + return expr_ref(r, m); + // ~∅ → Σ* + if (re().is_empty(a)) + return expr_ref(re().mk_full_seq(a->get_sort()), m); + // ~Σ* → ∅ + if (re().is_full_seq(a)) + return expr_ref(re().mk_empty(a->get_sort()), m); + + // Push through ITE: ~(ite(c, t, e)) → ite(c, ~t, ~e) + expr* c, * t, * e; + if (m.is_ite(a, c, t, e)) { + expr_ref ct = mk_complement(t); + expr_ref ce = mk_complement(e); + return mk_ite(c, ct, ce); + } + + return expr_ref(re().mk_complement(a), m); + } + + expr_ref derive::mk_ite(expr* c, expr* t, expr* e) { + if (m.is_true(c) || t == e) + return expr_ref(t, m); + if (m.is_false(c)) + return expr_ref(e, m); + return expr_ref(m.mk_ite(c, t, e), m); + } + + // ------------------------------------------------------- + // ACI normalization helpers + // ------------------------------------------------------- + + void derive::flatten_union(expr* r, expr_ref_vector& args) { + expr* a = nullptr, * b = nullptr; + if (re().is_union(r, a, b)) { + flatten_union(a, args); + flatten_union(b, args); + } + else { + args.push_back(r); + } + } + + void derive::flatten_inter(expr* r, expr_ref_vector& args) { + expr* a = nullptr, * b = nullptr; + if (re().is_intersection(r, a, b)) { + flatten_inter(a, args); + flatten_inter(b, args); + } + else { + args.push_back(r); + } + } + + expr_ref derive::mk_union_from_sorted(expr_ref_vector& args) { + if (args.empty()) { + // All elements were identity/absorbed - should not happen in practice + // but handle gracefully + UNREACHABLE(); + return expr_ref(m.mk_true(), m); + } + if (args.size() == 1) + return expr_ref(args.get(0), m); + // Build right-associated union + expr_ref result(args.back(), m); + for (unsigned i = args.size() - 1; i > 0; ) { + --i; + result = expr_ref(re().mk_union(args.get(i), result), m); + } + return result; + } + + expr_ref derive::mk_inter_from_sorted(expr_ref_vector& args) { + if (args.empty()) { + UNREACHABLE(); + return expr_ref(m.mk_true(), m); + } + if (args.size() == 1) + return expr_ref(args.get(0), m); + // Build right-associated intersection + expr_ref result(args.back(), m); + for (unsigned i = args.size() - 1; i > 0; ) { + --i; + result = expr_ref(re().mk_inter(args.get(i), result), m); + } + return result; + } + + // ------------------------------------------------------- + // ITE-tree combinators (analogous to REsharp mk_binary/mk_unary) + // ------------------------------------------------------- + + expr_ref derive::ite_combine_binary(expr* d1, expr* d2, + std::function const& op) { + expr *c1, *t1, *e1, *c2, *t2, *e2; + + // Both are leaves (non-ITE) + if (!m.is_ite(d1, c1, t1, e1) && !m.is_ite(d2, c2, t2, e2)) + return op(d1, d2); + + // d1 is ITE, d2 is not + if (m.is_ite(d1, c1, t1, e1) && !m.is_ite(d2, c2, t2, e2)) { + expr_ref then_r = ite_combine_binary(t1, d2, op); + expr_ref else_r = ite_combine_binary(e1, d2, op); + return mk_ite(c1, then_r, else_r); + } + + // d2 is ITE, d1 is not + if (!m.is_ite(d1, c1, t1, e1) && m.is_ite(d2, c2, t2, e2)) { + expr_ref then_r = ite_combine_binary(d1, t2, op); + expr_ref else_r = ite_combine_binary(d1, e2, op); + return mk_ite(c2, then_r, else_r); + } + + // Both are ITE + VERIFY(m.is_ite(d1, c1, t1, e1)); + VERIFY(m.is_ite(d2, c2, t2, e2)); + + if (c1 == c2) { + // Same condition: combine pairwise + expr_ref then_r = ite_combine_binary(t1, t2, op); + expr_ref else_r = ite_combine_binary(e1, e2, op); + return mk_ite(c1, then_r, else_r); + } + + // Order by condition id (larger id on outside for canonical form) + if (c1->get_id() > c2->get_id()) { + expr_ref then_r = ite_combine_binary(t1, d2, op); + expr_ref else_r = ite_combine_binary(e1, d2, op); + return mk_ite(c1, then_r, else_r); + } + else { + expr_ref then_r = ite_combine_binary(d1, t2, op); + expr_ref else_r = ite_combine_binary(d1, e2, op); + return mk_ite(c2, then_r, else_r); + } + } + + expr_ref derive::ite_combine_unary(expr* d, + std::function const& op) { + expr* c, * t, * e; + if (m.is_ite(d, c, t, e)) { + expr_ref then_r = ite_combine_unary(t, op); + expr_ref else_r = ite_combine_unary(e, op); + return mk_ite(c, then_r, else_r); + } + return op(d); + } + + // ------------------------------------------------------- + // Distribute concat through ITE/union structure of derivative + // ------------------------------------------------------- + + expr_ref derive::mk_deriv_concat(expr* d, expr* tail) { + expr_ref _d(d, m), _tail(tail, m); + expr* c, * t, * e; + + if (re().is_empty(d)) + return expr_ref(d, m); + if (re().is_epsilon(d)) + return expr_ref(tail, m); + + if (m.is_ite(d, c, t, e)) { + expr_ref then_r = mk_deriv_concat(t, tail); + expr_ref else_r = mk_deriv_concat(e, tail); + return mk_ite(c, then_r, else_r); + } + + if (re().is_union(d, t, e)) { + expr_ref left = mk_deriv_concat(t, tail); + expr_ref right = mk_deriv_concat(e, tail); + return mk_union(left, right); + } + + return mk_concat(d, tail); + } + +} + + + diff --git a/src/ast/seq_derive.h b/src/ast/seq_derive.h new file mode 100644 index 000000000..b093366cc --- /dev/null +++ b/src/ast/seq_derive.h @@ -0,0 +1,130 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + seq_derive.h + +Abstract: + + Symbolic derivative computation for regular expressions. + Produces an ITE-tree (transition regex) representation where + the free variable is de Bruijn index 0 representing the input character. + + Based on the theory of symbolic derivatives and transition regexes: + - Veanes et al., "On Symbolic Derivatives and Transition Regexes" (LPAR 2024) + - Varatalu, Veanes, Ernits, "RE#" (POPL 2025) + - Stanford, Veanes, Bjørner, "Symbolic Boolean Derivatives" (PLDI 2021) + +Authors: + + Nikolaj Bjorner (nbjorner) 2025-06-03 + +--*/ + +#pragma once + +#include "ast/seq_decl_plugin.h" +#include "ast/arith_decl_plugin.h" +#include "ast/array_decl_plugin.h" +#include "ast/rewriter/bool_rewriter.h" + +namespace seq { + + /** + * Symbolic derivative engine for regular expressions. + * + * Given a regex r, operator()(r) computes a symbolic derivative δ(r) + * represented as an ITE-tree over character predicates (using de Bruijn + * variable 0 for the character). Evaluating the ITE-tree for a concrete + * character 'a' yields the classical Brzozowski derivative δ_a(r). + * + * The ITE-tree structure implicitly defines minterms (equivalence classes + * of characters indistinguishable by the regex). + * + * Key properties: + * - Results are memoized for termination on cyclic derivative graphs + * - Union/intersection operands are sorted for ACI canonicalization + * - Depth-bounded to prevent stack overflow + */ + class derive { + ast_manager& m; + seq_util m_util; + arith_util m_autil; + bool_rewriter m_br; + + // Cache: maps regex expr to its symbolic derivative + obj_map m_cache; + expr_ref_vector m_trail; // pin cached results + + // Depth limiting + unsigned m_depth { 0 }; + static const unsigned m_max_depth = 512; + + seq_util::rex& re() { return m_util.re; } + seq_util& u() { return m_util; } + + // The element (character) for the current derivative computation + expr_ref m_ele; + + // Core derivative computation + expr_ref derive_rec(expr* r); + expr_ref derive_core(expr* r); + + // Helpers for specific regex constructs + expr_ref derive_to_re(expr* s, sort* seq_sort); + expr_ref derive_range(expr* lo, expr* hi, sort* seq_sort); + expr_ref derive_of_pred(expr* pred, sort* seq_sort); + + // Nullable check: returns a Boolean expression + expr_ref is_nullable(expr* r); + expr_ref is_nullable_rec(expr* r); + + // Smart constructors with simplification and ACI canonicalization + expr_ref mk_union(expr* a, expr* b); + expr_ref mk_inter(expr* a, expr* b); + expr_ref mk_concat(expr* a, expr* b); + expr_ref mk_complement(expr* a); + expr_ref mk_ite(expr* c, expr* t, expr* e); + + // Flatten and sort for ACI normal form + void flatten_union(expr* r, expr_ref_vector& args); + void flatten_inter(expr* r, expr_ref_vector& args); + expr_ref mk_union_from_sorted(expr_ref_vector& args); + expr_ref mk_inter_from_sorted(expr_ref_vector& args); + + // ITE-tree binary combinator (analogous to REsharp mk_binary) + // Combines two ITE-tree derivatives with a binary regex operation + expr_ref ite_combine_binary(expr* d1, expr* d2, + std::function const& op); + + // ITE-tree unary combinator (analogous to REsharp mk_unary) + expr_ref ite_combine_unary(expr* d, std::function const& op); + + // Distribute concatenation through ITE/union in derivative + expr_ref mk_deriv_concat(expr* d, expr* tail); + + sort* re_sort(expr* r) { return r->get_sort(); } + sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; } + sort* ele_sort(expr* r) { sort* s = seq_sort(r); sort* e = nullptr; m_util.is_seq(s, e); return e; } + + public: + derive(ast_manager& m); + + /** + * Compute the derivative of regex r with respect to element ele. + * When ele is a de Bruijn variable, produces a symbolic ITE-tree. + * When ele is a concrete character, produces the concrete derivative. + */ + expr_ref operator()(expr* ele, expr* r); + + /** + * Convenience: symbolic derivative using de Bruijn var 0. + */ + expr_ref operator()(expr* r); + + void reset(); + }; + +} +