3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-29 03:48:51 +00:00

remove deprecated derivative code

This commit is contained in:
Nikolaj Bjorner 2026-06-25 19:40:35 -07:00
parent 110a670ea1
commit c530faab4e
3 changed files with 9 additions and 520 deletions

View file

@ -2837,10 +2837,6 @@ br_status seq_rewriter::mk_re_reverse(expr* r, expr_ref& result) {
}
}
/***************************************************
***** Begin Derivative Code *****
***************************************************/
/*
Symbolic derivative: seq -> regex -> regex
seq should be single char
@ -2864,98 +2860,6 @@ br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
return BR_DONE;
}
/*
Note: Derivative Normal Form
When computing derivatives recursively, we preserve the following
BDD normal form:
- At the top level, the derivative is a union of antimirov derivatives
(Conceptually each element of the union is a different derivative).
We currently express this derivative using an internal op code:
_OP_RE_antimirov_UNION
- An antimirov derivative is a nested if-then-else term.
if-then-elses are pushed outwards and sorted by condition ID
(cond->get_id()), from largest on the outside to smallest on the
inside. Duplicate nested conditions are eliminated.
- The leaves of the if-then-else BDD can have unions themselves,
but these are interpreted as Regex union, not as separate antimirov
derivatives.
To debug the normal form, call Z3 with -dbg:seq_regex:
this calls check_deriv_normal_form (below) periodically.
The main logic is in mk_der_op_rec for combining normal forms
(some also in mk_der_compl_rec).
*/
#ifdef Z3DEBUG
/*
Debugging to check the derivative normal form that we assume
(see definition above).
This may fail on unusual/unexpected REs, such as those containing
regex variables, but this is by design as this is only checked
during debugging, and we have not considered how normal form
should apply in such cases.
*/
bool seq_rewriter::check_deriv_normal_form(expr* r, int level) {
if (level == 3) { // top level
STRACE(seq_verbose, tout
<< "Checking derivative normal form invariant...";);
}
expr *r1 = nullptr, *r2 = nullptr, *p = nullptr, *s = nullptr;
unsigned lo = 0, hi = 0;
STRACE(seq_verbose, tout << " (level " << level << ")";);
int new_level = 0;
if (re().is_antimirov_union(r)) {
SASSERT(level >= 2);
new_level = 2;
}
else if (m().is_ite(r)) {
SASSERT(level >= 1);
new_level = 1;
}
SASSERT(!re().is_diff(r));
SASSERT(!re().is_opt(r));
SASSERT(!re().is_plus(r));
if (re().is_antimirov_union(r, r1, r2) ||
re().is_concat(r, r1, r2) ||
re().is_union(r, r1, r2) ||
re().is_intersection(r, r1, r2) ||
re().is_xor(r, r1, r2) ||
m().is_ite(r, p, r1, r2)) {
check_deriv_normal_form(r1, new_level);
check_deriv_normal_form(r2, new_level);
}
else if (re().is_star(r, r1) ||
re().is_complement(r, r1) ||
re().is_loop(r, r1, lo) ||
re().is_loop(r, r1, lo, hi)) {
check_deriv_normal_form(r1, new_level);
}
else if (re().is_reverse(r, r1)) {
SASSERT(re().is_to_re(r1));
}
else if (re().is_full_seq(r) ||
re().is_empty(r) ||
re().is_range(r) ||
re().is_full_char(r) ||
re().is_of_pred(r) ||
re().is_to_re(r, s)) {
// OK
}
else {
SASSERT(false);
}
if (level == 3) {
STRACE(seq_verbose, tout << " passed!" << std::endl;);
}
return true;
}
#endif
expr_ref seq_rewriter::mk_derivative(expr* r) {
auto result = m_derive(seq::derivative_kind::antimirov_t, r);
@ -3162,35 +3066,6 @@ expr_ref seq_rewriter::merge_regex_sets(expr* r1, expr* r2, expr* unit,
}
}
/*
* calls elim_condition
*/
expr_ref seq_rewriter::simplify_path(expr* elem, expr* path) {
expr_ref result(path, m());
elim_condition(elem, result);
return result;
}
expr_ref seq_rewriter::mk_der_antimirov_union(expr* r1, expr* r2) {
return mk_der_op(_OP_RE_ANTIMIROV_UNION, r1, r2);
}
expr_ref seq_rewriter::mk_der_union(expr* r1, expr* r2) {
return mk_der_op(OP_RE_UNION, r1, r2);
}
expr_ref seq_rewriter::mk_der_inter(expr* r1, expr* r2) {
return mk_der_op(OP_RE_INTERSECT, r1, r2);
}
expr_ref seq_rewriter::mk_der_xor(expr* r1, expr* r2) {
return mk_der_op(OP_RE_XOR, r1, r2);
}
expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) {
return mk_der_op(OP_RE_CONCAT, r1, r2);
}
/*
Utility functions to decide char <, ==, !=, and <=.
@ -3213,377 +3088,6 @@ bool seq_rewriter::le_char(expr* ch1, expr* ch2) {
return eq_char(ch1, ch2) || lt_char(ch1, ch2);
}
/*
Utility function to decide if a simple predicate (ones that appear
as the conditions in if-then-else expressions in derivatives)
implies another.
Return true if we deduce that a implies b, false if unknown.
Current cases handled:
- a and b are char <= constraints, or negations of char <= constraints
- a and b are equalities (element = constant char), or their negations.
These arise from derivatives of single characters and must be pruned
when combining BDDs so that no unreachable branch such as
ite(x = 'a', ite(x = 'b', ...), ...) (with 'a' != 'b') is created.
*/
bool seq_rewriter::pred_implies(expr* a, expr* b) {
STRACE(seq_verbose, tout << "pred_implies: "
<< "," << mk_pp(a, m())
<< "," << mk_pp(b, m()) << std::endl;);
expr *cha1 = nullptr, *cha2 = nullptr, *nota = nullptr,
*chb1 = nullptr, *chb2 = nullptr, *notb = nullptr;
// (element = constant char), returning the element and char code.
auto is_char_eq = [&](expr* e, expr*& x, unsigned& v) {
expr* t1 = nullptr, *t2 = nullptr;
if (!m().is_eq(e, t1, t2))
return false;
if (u().is_const_char(t2, v)) { x = t1; return true; }
if (u().is_const_char(t1, v)) { x = t2; return true; }
return false;
};
expr *xa = nullptr, *xb = nullptr;
unsigned va = 0, vb = 0;
if (is_char_eq(a, xa, va)) {
// a is (xa = va)
if (is_char_eq(b, xb, vb) && xa == xb)
// (x = va) => (x = vb) iff va == vb
return va == vb;
if (m().is_not(b, notb) && is_char_eq(notb, xb, vb) && xa == xb)
// (x = va) => not (x = vb) iff va != vb
return va != vb;
}
if (m().is_not(a, nota) &&
m().is_not(b, notb)) {
return pred_implies(notb, nota);
}
else if (u().is_char_le(a, cha1, cha2) &&
u().is_char_le(b, chb1, chb2)) {
return le_char(chb1, cha1) && le_char(cha2, chb2);
}
else if (u().is_char_le(a, cha1, cha2) &&
m().is_not(b, notb) &&
u().is_char_le(notb, chb1, chb2)) {
return (le_char(chb2, cha1) && lt_char(cha2, chb1)) ||
(lt_char(chb2, cha1) && le_char(cha2, chb1));
}
else if (u().is_char_le(b, chb1, chb2) &&
m().is_not(a, nota) &&
u().is_char_le(nota, cha1, cha2)) {
return le_char(chb1, cha2) && le_char(cha1, chb2);
}
return false;
}
/*
Utility function to decide if two BDDs (nested if-then-else terms)
have exactly the same structure and conditions.
*/
bool seq_rewriter::ite_bdds_compatible(expr* a, expr* b) {
expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
if (m().is_ite(a, ca, a1, a2) && m().is_ite(b, cb, b1, b2)) {
return (ca == cb) && ite_bdds_compatible(a1, b1)
&& ite_bdds_compatible(a2, b2);
}
else if (m().is_ite(a) || m().is_ite(b)) {
return false;
}
else {
return true;
}
}
/*
Apply a binary operation, preserving normal form on derivative expressions.
Preconditions:
- k is one of the following binary op codes on REs:
OP_RE_INTERSECT
OP_RE_UNION
OP_RE_CONCAT
_OP_RE_antimirov_UNION
- a and b are in normal form (check_deriv_normal_form)
Postcondition:
- result is in normal form (check_deriv_normal_form)
*/
expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
STRACE(seq_verbose, tout << "mk_der_op_rec: " << k
<< "," << mk_pp(a, m())
<< "," << mk_pp(b, m()) << std::endl;);
expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr;
expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr;
expr_ref result(m());
// Simplify if-then-elses whenever possible
auto mk_ite = [&](expr* c, expr* a, expr* b) {
return (a == b) ? a : m().mk_ite(c, a, b);
};
// Use character code to order conditions
auto get_id = [&](expr* e) {
expr *ch1 = nullptr, *ch2 = nullptr;
unsigned ch;
if (u().is_char_le(e, ch1, ch2) && u().is_const_char(ch2, ch))
return ch;
// Fallback: use expression ID (but use same ID for negation)
m().is_not(e, e);
return e->get_id();
};
// Choose when to lift a union to the top level, by converting
// it to an antimirov union
// This implements a restricted form of antimirov derivatives
if (k == OP_RE_UNION) {
if (re().is_antimirov_union(a) || re().is_antimirov_union(b)) {
k = _OP_RE_ANTIMIROV_UNION;
}
#if 0
// Disabled: eager antimirov lifting unless BDDs are compatible
// Note: the check for BDD compatibility could be made more
// sophisticated: in an antimirov union of n terms, we really
// want to check if any pair of them is compatible.
else if (m().is_ite(a) && m().is_ite(b) &&
!ite_bdds_compatible(a, b)) {
k = _OP_RE_ANTIMIROV_UNION;
}
#endif
}
if (k == _OP_RE_ANTIMIROV_UNION) {
result = re().mk_antimirov_union(a, b);
return result;
}
if (re().is_antimirov_union(a, a1, a2)) {
expr_ref r1(m()), r2(m());
r1 = mk_der_op(k, a1, b);
r2 = mk_der_op(k, a2, b);
result = re().mk_antimirov_union(r1, r2);
return result;
}
if (re().is_antimirov_union(b, b1, b2)) {
expr_ref r1(m()), r2(m());
r1 = mk_der_op(k, a, b1);
r2 = mk_der_op(k, a, b2);
result = re().mk_antimirov_union(r1, r2);
return result;
}
// Remaining non-union case: combine two if-then-else BDDs
// (underneath top-level antimirov unions)
if (m().is_ite(a, ca, a1, a2)) {
expr_ref r1(m()), r2(m());
expr_ref notca(m().mk_not(ca), m());
if (m().is_ite(b, cb, b1, b2)) {
// --- Core logic for combining two BDDs
expr_ref notcb(m().mk_not(cb), m());
if (ca == cb) {
r1 = mk_der_op(k, a1, b1);
r2 = mk_der_op(k, a2, b2);
result = mk_ite(ca, r1, r2);
return result;
}
// Order with higher IDs on the outside
bool is_symmetric = k == OP_RE_UNION || k == OP_RE_INTERSECT || k == OP_RE_XOR;
if (is_symmetric && get_id(ca) < get_id(cb)) {
std::swap(a, b);
std::swap(ca, cb);
std::swap(notca, notcb);
std::swap(a1, b1);
std::swap(a2, b2);
}
// Simplify if there is a relationship between ca and cb
if (pred_implies(ca, cb)) {
r1 = mk_der_op(k, a1, b1);
}
else if (pred_implies(ca, notcb)) {
r1 = mk_der_op(k, a1, b2);
}
if (pred_implies(notca, cb)) {
r2 = mk_der_op(k, a2, b1);
}
else if (pred_implies(notca, notcb)) {
r2 = mk_der_op(k, a2, b2);
}
// --- End core logic
}
if (!r1) r1 = mk_der_op(k, a1, b);
if (!r2) r2 = mk_der_op(k, a2, b);
result = mk_ite(ca, r1, r2);
return result;
}
if (m().is_ite(b, cb, b1, b2)) {
expr_ref r1 = mk_der_op(k, a, b1);
expr_ref r2 = mk_der_op(k, a, b2);
result = mk_ite(cb, r1, r2);
return result;
}
switch (k) {
case OP_RE_INTERSECT:
if (BR_FAILED == mk_re_inter(a, b, result))
result = re().mk_inter(a, b);
break;
case OP_RE_UNION:
if (BR_FAILED == mk_re_union(a, b, result))
result = re().mk_union(a, b);
break;
case OP_RE_CONCAT:
if (BR_FAILED == mk_re_concat(a, b, result))
result = re().mk_concat(a, b);
break;
case OP_RE_XOR:
if (BR_FAILED == mk_re_xor(a, b, result))
result = re().mk_xor(a, b);
break;
default:
UNREACHABLE();
break;
}
return result;
}
expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
expr_ref _a(a, m()), _b(b, m());
expr_ref result(m());
// Pre-simplification assumes that none of the
// transformations hide ite sub-terms,
// Rewriting that changes associativity of
// operators may hide ite sub-terms.
//
// When either operand is an ite (a derivative BDD), skip the
// pre-simplification: its blind ite-hoisting would bypass the
// pred_implies-based pruning in mk_der_op_rec and create unreachable
// branches such as ite(x = 'a', ite(x = 'b', ...), ...) with 'a' != 'b'.
bool has_ite = m().is_ite(a) || m().is_ite(b);
if (!has_ite) {
switch (k) {
case OP_RE_INTERSECT:
if (BR_FAILED != mk_re_inter0(a, b, result))
return result;
break;
case OP_RE_UNION:
if (BR_FAILED != mk_re_union0(a, b, result))
return result;
break;
case OP_RE_CONCAT:
if (BR_FAILED != mk_re_concat(a, b, result))
return result;
break;
case OP_RE_XOR:
if (BR_FAILED != mk_re_xor0(a, b, result))
return result;
break;
default:
break;
}
}
result = m_op_cache.find(k, a, b, nullptr);
if (!result) {
result = mk_der_op_rec(k, a, b);
m_op_cache.insert(k, a, b, nullptr, result);
}
CASSERT("seq_regex", check_deriv_normal_form(result));
return result;
}
expr_ref seq_rewriter::mk_der_compl(expr* r) {
STRACE(seq_verbose, tout << "mk_der_compl: " << mk_pp(r, m())
<< std::endl;);
expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr, nullptr), m());
if (!result) {
expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
if (re().is_antimirov_union(r, r1, r2)) {
// Convert union to intersection
// Result: antimirov union at top level is lost, pushed inside ITEs
expr_ref res1(m()), res2(m());
res1 = mk_der_compl(r1);
res2 = mk_der_compl(r2);
result = mk_der_inter(res1, res2);
}
else if (m().is_ite(r, c, r1, r2)) {
result = m().mk_ite(c, mk_der_compl(r1), mk_der_compl(r2));
}
else if (BR_FAILED == mk_re_complement(r, result))
result = re().mk_complement(r);
m_op_cache.insert(OP_RE_COMPLEMENT, r, nullptr, nullptr, result);
}
CASSERT("seq_regex", check_deriv_normal_form(result));
return result;
}
/*
Make an re_predicate with an arbitrary condition cond, enforcing
derivative normal form on how conditions are written.
Tries to rewrite everything to (ele <= x) constraints:
(ele = a) => ite(ele <= a-1, none, ite(ele <= a, epsilon, none))
(a = ele) => "
(a <= ele) => ite(ele <= a-1, none, epsilon)
(not p) => mk_der_compl(...)
(p and q) => mk_der_inter(...)
(p or q) => mk_der_union(...)
Postcondition: result is in BDD form
*/
expr_ref seq_rewriter::mk_der_cond(expr* cond, expr* ele, sort* seq_sort) {
STRACE(seq_verbose, tout << "mk_der_cond: "
<< mk_pp(cond, m()) << ", " << mk_pp(ele, m()) << std::endl;);
sort *ele_sort = nullptr;
VERIFY(u().is_seq(seq_sort, ele_sort));
SASSERT(ele_sort == ele->get_sort());
expr *c1 = nullptr, *c2 = nullptr, *ch1 = nullptr, *ch2 = nullptr;
unsigned ch = 0;
expr_ref result(m()), r1(m()), r2(m());
if (m().is_eq(cond, ch1, ch2) && u().is_char(ch1)) {
r1 = u().mk_le(ch1, ch2);
r1 = mk_der_cond(r1, ele, seq_sort);
r2 = u().mk_le(ch2, ch1);
r2 = mk_der_cond(r2, ele, seq_sort);
result = mk_der_inter(r1, r2);
}
else if (u().is_char_le(cond, ch1, ch2) &&
u().is_const_char(ch1, ch) && (ch2 == ele)) {
if (ch > 0) {
result = u().mk_char(ch - 1);
result = u().mk_le(ele, result);
result = re_predicate(result, seq_sort);
result = mk_der_compl(result);
}
else {
result = m().mk_true();
result = re_predicate(result, seq_sort);
}
}
else if (m().is_not(cond, c1)) {
result = mk_der_cond(c1, ele, seq_sort);
result = mk_der_compl(result);
}
else if (m().is_and(cond, c1, c2)) {
r1 = mk_der_cond(c1, ele, seq_sort);
r2 = mk_der_cond(c2, ele, seq_sort);
result = mk_der_inter(r1, r2);
}
else if (m().is_or(cond, c1, c2)) {
r1 = mk_der_cond(c1, ele, seq_sort);
r2 = mk_der_cond(c2, ele, seq_sort);
result = mk_der_union(r1, r2);
}
else {
result = re_predicate(cond, seq_sort);
}
STRACE(seq_verbose, tout << "mk_der_cond result: "
<< mk_pp(result, m()) << std::endl;);
CASSERT("seq_regex", check_deriv_normal_form(result));
return result;
}
/*************************************************
***** End Derivative Code *****
*************************************************/
/*
* pattern match against all ++ "abc" ++ all ++ "def" ++ all regexes.
*/

View file

@ -184,32 +184,17 @@ class seq_rewriter {
// - occurrences of a_ch are replaced by empty (replace_all never outputs a)
expr_ref re_replace_char(expr *r, unsigned a_ch, unsigned b_ch, expr *a_str, expr *b_str);
// Calculate derivative, memoized and enforcing a normal form
expr_ref mk_der_op(decl_kind k, expr* a, expr* b);
expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b);
expr_ref mk_der_concat(expr* a, expr* b);
expr_ref mk_der_union(expr* a, expr* b);
expr_ref mk_der_inter(expr* a, expr* b);
expr_ref mk_der_xor(expr* a, expr* b);
expr_ref mk_der_compl(expr* a);
expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort);
expr_ref mk_der_antimirov_union(expr* r1, expr* r2);
bool ite_bdds_compatible(expr* a, expr* b);
#ifdef Z3DEBUG
bool check_deriv_normal_form(expr* r, int level = 3);
#endif
expr_ref merge_regex_sets(expr* r1, expr* r2, expr* unit, std::function<bool(expr*, expr*&, expr*&)>& decompose, std::function<expr* (expr*, expr*)>& compose);
// elem is (:var 0) and path a condition that may have (:var 0) as a free variable
// simplify path, e.g., (:var 0) = 'a' & (:var 0) = 'b' is simplified to false
expr_ref simplify_path(expr* elem, expr* path);
// expr_ref simplify_path(expr* elem, expr* path);
bool lt_char(expr* ch1, expr* ch2);
bool eq_char(expr* ch1, expr* ch2);
bool neq_char(expr* ch1, expr* ch2);
bool le_char(expr* ch1, expr* ch2);
bool pred_implies(expr* a, expr* b);
bool are_complements(expr* r1, expr* r2) const;
bool is_subset(expr* r1, expr* r2) const;

View file

@ -581,15 +581,15 @@ namespace smt {
expr_ref_pair_vector cofactors(m);
seq_rw().get_cofactors(hd, d, cofactors);
for (auto const& p : cofactors) {
if (is_member(p.second, u))
for (auto const& [c, r] : cofactors) {
if (is_member(r, u))
continue;
expr_ref cond(p.first, m);
expr_ref cond(c, m);
seq_rw().elim_condition(hd, cond);
rewrite(cond);
if (m.is_false(cond))
continue;
expr_ref next_non_empty = sk().mk_is_non_empty(p.second, re().mk_union(u, p.second), n);
expr_ref next_non_empty = sk().mk_is_non_empty(r, re().mk_union(u, r), n);
if (!m.is_true(cond))
next_non_empty = m.mk_and(cond, next_non_empty);
lits.push_back(th.mk_literal(next_non_empty));
@ -834,10 +834,10 @@ namespace smt {
literal_vector lits;
expr_ref_pair_vector cofactors(m);
seq_rw().get_cofactors(hd, d, cofactors);
for (auto const& p : cofactors) {
if (is_member(p.second, u))
for (auto const& [c, r] : cofactors) {
if (is_member(r, u))
continue;
expr_ref cond(p.first, m);
expr_ref cond(c, m);
seq_rw().elim_condition(hd, cond);
rewrite(cond);
if (m.is_false(cond))
@ -848,7 +848,7 @@ namespace smt {
expr_ref ncond(mk_not(m, cond), m);
lits.push_back(th.mk_literal(mk_forall(m, hd, ncond)));
}
expr_ref is_empty1 = sk().mk_is_empty(p.second, re().mk_union(u, p.second), n);
expr_ref is_empty1 = sk().mk_is_empty(r, re().mk_union(u, r), n);
lits.push_back(th.mk_literal(is_empty1));
th.add_axiom(lits);
}