From c530faab4e79e4fc356f5f0263ee827bd97b1875 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 25 Jun 2026 19:40:35 -0700 Subject: [PATCH] remove deprecated derivative code --- src/ast/rewriter/seq_rewriter.cpp | 496 ------------------------------ src/ast/rewriter/seq_rewriter.h | 17 +- src/smt/seq_regex.cpp | 16 +- 3 files changed, 9 insertions(+), 520 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 85adb94d17..bd8961ccf3 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2837,10 +2837,6 @@ br_status seq_rewriter::mk_re_reverse(expr* r, expr_ref& result) { } } -/*************************************************** - ***** Begin Derivative Code ***** - ***************************************************/ - /* Symbolic derivative: seq -> regex -> regex seq should be single char @@ -2864,98 +2860,6 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) { return BR_DONE; } -/* - Note: Derivative Normal Form - - When computing derivatives recursively, we preserve the following - BDD normal form: - - - At the top level, the derivative is a union of antimirov derivatives - (Conceptually each element of the union is a different derivative). - We currently express this derivative using an internal op code: - _OP_RE_antimirov_UNION - - An antimirov derivative is a nested if-then-else term. - if-then-elses are pushed outwards and sorted by condition ID - (cond->get_id()), from largest on the outside to smallest on the - inside. Duplicate nested conditions are eliminated. - - The leaves of the if-then-else BDD can have unions themselves, - but these are interpreted as Regex union, not as separate antimirov - derivatives. - - To debug the normal form, call Z3 with -dbg:seq_regex: - this calls check_deriv_normal_form (below) periodically. - - The main logic is in mk_der_op_rec for combining normal forms - (some also in mk_der_compl_rec). -*/ - -#ifdef Z3DEBUG -/* - Debugging to check the derivative normal form that we assume - (see definition above). - - This may fail on unusual/unexpected REs, such as those containing - regex variables, but this is by design as this is only checked - during debugging, and we have not considered how normal form - should apply in such cases. -*/ -bool seq_rewriter::check_deriv_normal_form(expr* r, int level) { - if (level == 3) { // top level - STRACE(seq_verbose, tout - << "Checking derivative normal form invariant...";); - } - expr *r1 = nullptr, *r2 = nullptr, *p = nullptr, *s = nullptr; - unsigned lo = 0, hi = 0; - STRACE(seq_verbose, tout << " (level " << level << ")";); - int new_level = 0; - if (re().is_antimirov_union(r)) { - SASSERT(level >= 2); - new_level = 2; - } - else if (m().is_ite(r)) { - SASSERT(level >= 1); - new_level = 1; - } - - SASSERT(!re().is_diff(r)); - SASSERT(!re().is_opt(r)); - SASSERT(!re().is_plus(r)); - - if (re().is_antimirov_union(r, r1, r2) || - re().is_concat(r, r1, r2) || - re().is_union(r, r1, r2) || - re().is_intersection(r, r1, r2) || - re().is_xor(r, r1, r2) || - m().is_ite(r, p, r1, r2)) { - check_deriv_normal_form(r1, new_level); - check_deriv_normal_form(r2, new_level); - } - else if (re().is_star(r, r1) || - re().is_complement(r, r1) || - re().is_loop(r, r1, lo) || - re().is_loop(r, r1, lo, hi)) { - check_deriv_normal_form(r1, new_level); - } - else if (re().is_reverse(r, r1)) { - SASSERT(re().is_to_re(r1)); - } - else if (re().is_full_seq(r) || - re().is_empty(r) || - re().is_range(r) || - re().is_full_char(r) || - re().is_of_pred(r) || - re().is_to_re(r, s)) { - // OK - } - else { - SASSERT(false); - } - if (level == 3) { - STRACE(seq_verbose, tout << " passed!" << std::endl;); - } - return true; -} -#endif expr_ref seq_rewriter::mk_derivative(expr* r) { auto result = m_derive(seq::derivative_kind::antimirov_t, r); @@ -3162,35 +3066,6 @@ expr_ref seq_rewriter::merge_regex_sets(expr* r1, expr* r2, expr* unit, } } -/* -* calls elim_condition -*/ -expr_ref seq_rewriter::simplify_path(expr* elem, expr* path) { - expr_ref result(path, m()); - elim_condition(elem, result); - return result; -} - - -expr_ref seq_rewriter::mk_der_antimirov_union(expr* r1, expr* r2) { - return mk_der_op(_OP_RE_ANTIMIROV_UNION, r1, r2); -} - -expr_ref seq_rewriter::mk_der_union(expr* r1, expr* r2) { - return mk_der_op(OP_RE_UNION, r1, r2); -} - -expr_ref seq_rewriter::mk_der_inter(expr* r1, expr* r2) { - return mk_der_op(OP_RE_INTERSECT, r1, r2); -} - -expr_ref seq_rewriter::mk_der_xor(expr* r1, expr* r2) { - return mk_der_op(OP_RE_XOR, r1, r2); -} - -expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) { - return mk_der_op(OP_RE_CONCAT, r1, r2); -} /* Utility functions to decide char <, ==, !=, and <=. @@ -3213,377 +3088,6 @@ bool seq_rewriter::le_char(expr* ch1, expr* ch2) { return eq_char(ch1, ch2) || lt_char(ch1, ch2); } -/* - Utility function to decide if a simple predicate (ones that appear - as the conditions in if-then-else expressions in derivatives) - implies another. - - Return true if we deduce that a implies b, false if unknown. - - Current cases handled: - - a and b are char <= constraints, or negations of char <= constraints - - a and b are equalities (element = constant char), or their negations. - These arise from derivatives of single characters and must be pruned - when combining BDDs so that no unreachable branch such as - ite(x = 'a', ite(x = 'b', ...), ...) (with 'a' != 'b') is created. -*/ -bool seq_rewriter::pred_implies(expr* a, expr* b) { - STRACE(seq_verbose, tout << "pred_implies: " - << "," << mk_pp(a, m()) - << "," << mk_pp(b, m()) << std::endl;); - expr *cha1 = nullptr, *cha2 = nullptr, *nota = nullptr, - *chb1 = nullptr, *chb2 = nullptr, *notb = nullptr; - // (element = constant char), returning the element and char code. - auto is_char_eq = [&](expr* e, expr*& x, unsigned& v) { - expr* t1 = nullptr, *t2 = nullptr; - if (!m().is_eq(e, t1, t2)) - return false; - if (u().is_const_char(t2, v)) { x = t1; return true; } - if (u().is_const_char(t1, v)) { x = t2; return true; } - return false; - }; - expr *xa = nullptr, *xb = nullptr; - unsigned va = 0, vb = 0; - if (is_char_eq(a, xa, va)) { - // a is (xa = va) - if (is_char_eq(b, xb, vb) && xa == xb) - // (x = va) => (x = vb) iff va == vb - return va == vb; - if (m().is_not(b, notb) && is_char_eq(notb, xb, vb) && xa == xb) - // (x = va) => not (x = vb) iff va != vb - return va != vb; - } - if (m().is_not(a, nota) && - m().is_not(b, notb)) { - return pred_implies(notb, nota); - } - else if (u().is_char_le(a, cha1, cha2) && - u().is_char_le(b, chb1, chb2)) { - return le_char(chb1, cha1) && le_char(cha2, chb2); - } - else if (u().is_char_le(a, cha1, cha2) && - m().is_not(b, notb) && - u().is_char_le(notb, chb1, chb2)) { - return (le_char(chb2, cha1) && lt_char(cha2, chb1)) || - (lt_char(chb2, cha1) && le_char(cha2, chb1)); - } - else if (u().is_char_le(b, chb1, chb2) && - m().is_not(a, nota) && - u().is_char_le(nota, cha1, cha2)) { - return le_char(chb1, cha2) && le_char(cha1, chb2); - } - return false; -} - -/* - Utility function to decide if two BDDs (nested if-then-else terms) - have exactly the same structure and conditions. -*/ -bool seq_rewriter::ite_bdds_compatible(expr* a, expr* b) { - expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr; - expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr; - if (m().is_ite(a, ca, a1, a2) && m().is_ite(b, cb, b1, b2)) { - return (ca == cb) && ite_bdds_compatible(a1, b1) - && ite_bdds_compatible(a2, b2); - } - else if (m().is_ite(a) || m().is_ite(b)) { - return false; - } - else { - return true; - } -} - -/* - Apply a binary operation, preserving normal form on derivative expressions. - - Preconditions: - - k is one of the following binary op codes on REs: - OP_RE_INTERSECT - OP_RE_UNION - OP_RE_CONCAT - _OP_RE_antimirov_UNION - - a and b are in normal form (check_deriv_normal_form) - - Postcondition: - - result is in normal form (check_deriv_normal_form) -*/ -expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { - STRACE(seq_verbose, tout << "mk_der_op_rec: " << k - << "," << mk_pp(a, m()) - << "," << mk_pp(b, m()) << std::endl;); - expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr; - expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr; - expr_ref result(m()); - - // Simplify if-then-elses whenever possible - auto mk_ite = [&](expr* c, expr* a, expr* b) { - return (a == b) ? a : m().mk_ite(c, a, b); - }; - // Use character code to order conditions - auto get_id = [&](expr* e) { - expr *ch1 = nullptr, *ch2 = nullptr; - unsigned ch; - if (u().is_char_le(e, ch1, ch2) && u().is_const_char(ch2, ch)) - return ch; - // Fallback: use expression ID (but use same ID for negation) - m().is_not(e, e); - return e->get_id(); - }; - - // Choose when to lift a union to the top level, by converting - // it to an antimirov union - // This implements a restricted form of antimirov derivatives - if (k == OP_RE_UNION) { - if (re().is_antimirov_union(a) || re().is_antimirov_union(b)) { - k = _OP_RE_ANTIMIROV_UNION; - } - #if 0 - // Disabled: eager antimirov lifting unless BDDs are compatible - // Note: the check for BDD compatibility could be made more - // sophisticated: in an antimirov union of n terms, we really - // want to check if any pair of them is compatible. - else if (m().is_ite(a) && m().is_ite(b) && - !ite_bdds_compatible(a, b)) { - k = _OP_RE_ANTIMIROV_UNION; - } - #endif - } - if (k == _OP_RE_ANTIMIROV_UNION) { - result = re().mk_antimirov_union(a, b); - return result; - } - if (re().is_antimirov_union(a, a1, a2)) { - expr_ref r1(m()), r2(m()); - r1 = mk_der_op(k, a1, b); - r2 = mk_der_op(k, a2, b); - result = re().mk_antimirov_union(r1, r2); - return result; - } - if (re().is_antimirov_union(b, b1, b2)) { - expr_ref r1(m()), r2(m()); - r1 = mk_der_op(k, a, b1); - r2 = mk_der_op(k, a, b2); - result = re().mk_antimirov_union(r1, r2); - return result; - } - - // Remaining non-union case: combine two if-then-else BDDs - // (underneath top-level antimirov unions) - if (m().is_ite(a, ca, a1, a2)) { - expr_ref r1(m()), r2(m()); - expr_ref notca(m().mk_not(ca), m()); - if (m().is_ite(b, cb, b1, b2)) { - // --- Core logic for combining two BDDs - expr_ref notcb(m().mk_not(cb), m()); - if (ca == cb) { - r1 = mk_der_op(k, a1, b1); - r2 = mk_der_op(k, a2, b2); - result = mk_ite(ca, r1, r2); - return result; - } - // Order with higher IDs on the outside - bool is_symmetric = k == OP_RE_UNION || k == OP_RE_INTERSECT || k == OP_RE_XOR; - if (is_symmetric && get_id(ca) < get_id(cb)) { - std::swap(a, b); - std::swap(ca, cb); - std::swap(notca, notcb); - std::swap(a1, b1); - std::swap(a2, b2); - } - // Simplify if there is a relationship between ca and cb - if (pred_implies(ca, cb)) { - r1 = mk_der_op(k, a1, b1); - } - else if (pred_implies(ca, notcb)) { - r1 = mk_der_op(k, a1, b2); - } - if (pred_implies(notca, cb)) { - r2 = mk_der_op(k, a2, b1); - } - else if (pred_implies(notca, notcb)) { - r2 = mk_der_op(k, a2, b2); - } - // --- End core logic - } - if (!r1) r1 = mk_der_op(k, a1, b); - if (!r2) r2 = mk_der_op(k, a2, b); - result = mk_ite(ca, r1, r2); - return result; - } - if (m().is_ite(b, cb, b1, b2)) { - expr_ref r1 = mk_der_op(k, a, b1); - expr_ref r2 = mk_der_op(k, a, b2); - result = mk_ite(cb, r1, r2); - return result; - } - switch (k) { - case OP_RE_INTERSECT: - if (BR_FAILED == mk_re_inter(a, b, result)) - result = re().mk_inter(a, b); - break; - case OP_RE_UNION: - if (BR_FAILED == mk_re_union(a, b, result)) - result = re().mk_union(a, b); - break; - case OP_RE_CONCAT: - if (BR_FAILED == mk_re_concat(a, b, result)) - result = re().mk_concat(a, b); - break; - case OP_RE_XOR: - if (BR_FAILED == mk_re_xor(a, b, result)) - result = re().mk_xor(a, b); - break; - default: - UNREACHABLE(); - break; - } - - return result; -} - -expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { - expr_ref _a(a, m()), _b(b, m()); - expr_ref result(m()); - - // Pre-simplification assumes that none of the - // transformations hide ite sub-terms, - // Rewriting that changes associativity of - // operators may hide ite sub-terms. - // - // When either operand is an ite (a derivative BDD), skip the - // pre-simplification: its blind ite-hoisting would bypass the - // pred_implies-based pruning in mk_der_op_rec and create unreachable - // branches such as ite(x = 'a', ite(x = 'b', ...), ...) with 'a' != 'b'. - bool has_ite = m().is_ite(a) || m().is_ite(b); - if (!has_ite) { - switch (k) { - case OP_RE_INTERSECT: - if (BR_FAILED != mk_re_inter0(a, b, result)) - return result; - break; - case OP_RE_UNION: - if (BR_FAILED != mk_re_union0(a, b, result)) - return result; - break; - case OP_RE_CONCAT: - if (BR_FAILED != mk_re_concat(a, b, result)) - return result; - break; - case OP_RE_XOR: - if (BR_FAILED != mk_re_xor0(a, b, result)) - return result; - break; - default: - break; - } - } - result = m_op_cache.find(k, a, b, nullptr); - if (!result) { - result = mk_der_op_rec(k, a, b); - m_op_cache.insert(k, a, b, nullptr, result); - } - CASSERT("seq_regex", check_deriv_normal_form(result)); - return result; -} - -expr_ref seq_rewriter::mk_der_compl(expr* r) { - STRACE(seq_verbose, tout << "mk_der_compl: " << mk_pp(r, m()) - << std::endl;); - expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr, nullptr), m()); - if (!result) { - expr* c = nullptr, * r1 = nullptr, * r2 = nullptr; - if (re().is_antimirov_union(r, r1, r2)) { - // Convert union to intersection - // Result: antimirov union at top level is lost, pushed inside ITEs - expr_ref res1(m()), res2(m()); - res1 = mk_der_compl(r1); - res2 = mk_der_compl(r2); - result = mk_der_inter(res1, res2); - } - else if (m().is_ite(r, c, r1, r2)) { - result = m().mk_ite(c, mk_der_compl(r1), mk_der_compl(r2)); - } - else if (BR_FAILED == mk_re_complement(r, result)) - result = re().mk_complement(r); - m_op_cache.insert(OP_RE_COMPLEMENT, r, nullptr, nullptr, result); - } - CASSERT("seq_regex", check_deriv_normal_form(result)); - return result; -} - -/* - Make an re_predicate with an arbitrary condition cond, enforcing - derivative normal form on how conditions are written. - - Tries to rewrite everything to (ele <= x) constraints: - (ele = a) => ite(ele <= a-1, none, ite(ele <= a, epsilon, none)) - (a = ele) => " - (a <= ele) => ite(ele <= a-1, none, epsilon) - (not p) => mk_der_compl(...) - (p and q) => mk_der_inter(...) - (p or q) => mk_der_union(...) - - Postcondition: result is in BDD form -*/ -expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) { - STRACE(seq_verbose, tout << "mk_der_cond: " - << mk_pp(cond, m()) << ", " << mk_pp(ele, m()) << std::endl;); - sort *ele_sort = nullptr; - VERIFY(u().is_seq(seq_sort, ele_sort)); - SASSERT(ele_sort == ele->get_sort()); - expr *c1 = nullptr, *c2 = nullptr, *ch1 = nullptr, *ch2 = nullptr; - unsigned ch = 0; - expr_ref result(m()), r1(m()), r2(m()); - if (m().is_eq(cond, ch1, ch2) && u().is_char(ch1)) { - r1 = u().mk_le(ch1, ch2); - r1 = mk_der_cond(r1, ele, seq_sort); - r2 = u().mk_le(ch2, ch1); - r2 = mk_der_cond(r2, ele, seq_sort); - result = mk_der_inter(r1, r2); - } - else if (u().is_char_le(cond, ch1, ch2) && - u().is_const_char(ch1, ch) && (ch2 == ele)) { - if (ch > 0) { - result = u().mk_char(ch - 1); - result = u().mk_le(ele, result); - result = re_predicate(result, seq_sort); - result = mk_der_compl(result); - } - else { - result = m().mk_true(); - result = re_predicate(result, seq_sort); - } - } - else if (m().is_not(cond, c1)) { - result = mk_der_cond(c1, ele, seq_sort); - result = mk_der_compl(result); - } - else if (m().is_and(cond, c1, c2)) { - r1 = mk_der_cond(c1, ele, seq_sort); - r2 = mk_der_cond(c2, ele, seq_sort); - result = mk_der_inter(r1, r2); - } - else if (m().is_or(cond, c1, c2)) { - r1 = mk_der_cond(c1, ele, seq_sort); - r2 = mk_der_cond(c2, ele, seq_sort); - result = mk_der_union(r1, r2); - } - else { - result = re_predicate(cond, seq_sort); - } - STRACE(seq_verbose, tout << "mk_der_cond result: " - << mk_pp(result, m()) << std::endl;); - CASSERT("seq_regex", check_deriv_normal_form(result)); - return result; -} - - -/************************************************* - ***** End Derivative Code ***** - *************************************************/ - - /* * pattern match against all ++ "abc" ++ all ++ "def" ++ all regexes. */ diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index fc47f2c636..d20e4f50ee 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -184,32 +184,17 @@ class seq_rewriter { // - occurrences of a_ch are replaced by empty (replace_all never outputs a) expr_ref re_replace_char(expr *r, unsigned a_ch, unsigned b_ch, expr *a_str, expr *b_str); - // Calculate derivative, memoized and enforcing a normal form - expr_ref mk_der_op(decl_kind k, expr* a, expr* b); - expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b); - expr_ref mk_der_concat(expr* a, expr* b); - expr_ref mk_der_union(expr* a, expr* b); - expr_ref mk_der_inter(expr* a, expr* b); - expr_ref mk_der_xor(expr* a, expr* b); - expr_ref mk_der_compl(expr* a); - expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort); - expr_ref mk_der_antimirov_union(expr* r1, expr* r2); - bool ite_bdds_compatible(expr* a, expr* b); - #ifdef Z3DEBUG - bool check_deriv_normal_form(expr* r, int level = 3); - #endif expr_ref merge_regex_sets(expr* r1, expr* r2, expr* unit, std::function& decompose, std::function& compose); // elem is (:var 0) and path a condition that may have (:var 0) as a free variable // simplify path, e.g., (:var 0) = 'a' & (:var 0) = 'b' is simplified to false - expr_ref simplify_path(expr* elem, expr* path); + // expr_ref simplify_path(expr* elem, expr* path); bool lt_char(expr* ch1, expr* ch2); bool eq_char(expr* ch1, expr* ch2); bool neq_char(expr* ch1, expr* ch2); bool le_char(expr* ch1, expr* ch2); - bool pred_implies(expr* a, expr* b); bool are_complements(expr* r1, expr* r2) const; bool is_subset(expr* r1, expr* r2) const; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 5a801550cd..45c70097c1 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -581,15 +581,15 @@ namespace smt { expr_ref_pair_vector cofactors(m); seq_rw().get_cofactors(hd, d, cofactors); - for (auto const& p : cofactors) { - if (is_member(p.second, u)) + for (auto const& [c, r] : cofactors) { + if (is_member(r, u)) continue; - expr_ref cond(p.first, m); + expr_ref cond(c, m); seq_rw().elim_condition(hd, cond); rewrite(cond); if (m.is_false(cond)) continue; - expr_ref next_non_empty = sk().mk_is_non_empty(p.second, re().mk_union(u, p.second), n); + expr_ref next_non_empty = sk().mk_is_non_empty(r, re().mk_union(u, r), n); if (!m.is_true(cond)) next_non_empty = m.mk_and(cond, next_non_empty); lits.push_back(th.mk_literal(next_non_empty)); @@ -834,10 +834,10 @@ namespace smt { literal_vector lits; expr_ref_pair_vector cofactors(m); seq_rw().get_cofactors(hd, d, cofactors); - for (auto const& p : cofactors) { - if (is_member(p.second, u)) + for (auto const& [c, r] : cofactors) { + if (is_member(r, u)) continue; - expr_ref cond(p.first, m); + expr_ref cond(c, m); seq_rw().elim_condition(hd, cond); rewrite(cond); if (m.is_false(cond)) @@ -848,7 +848,7 @@ namespace smt { expr_ref ncond(mk_not(m, cond), m); lits.push_back(th.mk_literal(mk_forall(m, hd, ncond))); } - expr_ref is_empty1 = sk().mk_is_empty(p.second, re().mk_union(u, p.second), n); + expr_ref is_empty1 = sk().mk_is_empty(r, re().mk_union(u, r), n); lits.push_back(th.mk_literal(is_empty1)); th.add_axiom(lits); }