diff --git a/src/ast/rewriter/bool_rewriter.cpp b/src/ast/rewriter/bool_rewriter.cpp index 321bd6f47d..014e563a5b 100644 --- a/src/ast/rewriter/bool_rewriter.cpp +++ b/src/ast/rewriter/bool_rewriter.cpp @@ -19,6 +19,7 @@ Notes: #include "ast/rewriter/bool_rewriter.h" #include "params/bool_rewriter_params.hpp" #include "ast/rewriter/rewriter_def.h" +#include "ast/rewriter/expr_safe_replace.h" #include "ast/ast_lt.h" #include "ast/for_each_expr.h" #include @@ -1185,4 +1186,30 @@ void bool_rewriter::mk_ge2(expr* a, expr* b, expr* c, expr_ref& r) { } -template class rewriter_tpl; +bool bool_rewriter::decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el) { + expr *cond = nullptr, *r1 = nullptr, *r2 = nullptr; + if (m().is_ite(r, cond, r1, r2)) { + c = cond; + th = r1; + el = r2; + return true; + } + for (expr *e : subterms::ground(expr_ref(r, m()))) { + if (m().is_ite(e, cond, r1, r2)) { + expr_safe_replace rep1(m()); + expr_safe_replace rep2(m()); + rep1.insert(e, r1); + rep2.insert(e, r2); + c = cond; + th = r; + el = r; + rep1(th); + rep2(el); + return true; + } + } + return false; +} + + +template class rewriter_tpl; \ No newline at end of file diff --git a/src/ast/rewriter/bool_rewriter.h b/src/ast/rewriter/bool_rewriter.h index 2b52404e50..27c971f787 100644 --- a/src/ast/rewriter/bool_rewriter.h +++ b/src/ast/rewriter/bool_rewriter.h @@ -242,6 +242,11 @@ public: void mk_nand(expr * arg1, expr * arg2, expr_ref & result); void mk_nor(expr * arg1, expr * arg2, expr_ref & result); void mk_ge2(expr* a, expr* b, expr* c, expr_ref& result); + + // If r is, or contains, an if-then-else, decompose it into a top-level + // ite by hoisting the (first) inner ite condition: returns c, th, el such + // that r is equivalent to (ite c th el). Returns false if r has no ite. + bool decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el); }; struct bool_rewriter_cfg : public default_rewriter_cfg { diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index e29dfc3b10..039e680814 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -1304,4 +1304,50 @@ namespace seq { m_intervals_start = old_sz; } + // ------------------------------------------------------- + // Cofactor enumeration over a transition regex + // ------------------------------------------------------- + + void derive::get_cofactors_rec(expr* r, expr_ref_pair_vector& result) { + // Hoist the (first) if-then-else condition to the top of r, splitting it + // into the equivalent ite(c, th, el); when r contains no ite it is a + // leaf of the transition regex. + expr_ref c(m), th(m), el(m); + if (!m_br.decompose_ite(r, c, th, el)) { + if (!re().is_empty(r)) + result.push_back(get_path_expr(), r); + return; + } + // Positive branch: c holds. + switch (push(c, false)) { + case l_true: get_cofactors_rec(th, result); break; + case l_undef: get_cofactors_rec(th, result); pop(); break; + case l_false: break; + } + // Negative branch: c does not hold. + switch (push(c, true)) { + case l_true: get_cofactors_rec(el, result); break; + case l_undef: get_cofactors_rec(el, result); pop(); break; + case l_false: break; + } + } + + void derive::get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result) { + SASSERT(m_util.is_re(r)); + if (ele != m_ele) + reset_op_caches(); + m_ele = ele; + m_trail.push_back(ele); + m_trail.push_back(r); + // Initialize a fresh path/interval context for this traversal. + m_path.reset(); + m_path_stack.reset(); + m_intervals.reset(); + m_intervals.push_back({0u, u().max_char()}); + m_intervals_start = 0; + m_path_expr = m.mk_true(); + get_cofactors_rec(r, result); + } + } + diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 84516e0d8f..c011af70eb 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -164,6 +164,9 @@ namespace seq { void intersect_intervals(unsigned lo, unsigned hi); void exclude_interval(unsigned lo, unsigned hi); + // Cofactor enumeration over a transition regex (ITE-tree). + void get_cofactors_rec(expr* r, expr_ref_pair_vector& result); + sort* re_sort(expr* r) { return r->get_sort(); } sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; } sort* ele_sort(expr* r) { sort* s = seq_sort(r); sort* e = nullptr; m_util.is_seq(s, e); return e; } @@ -191,6 +194,16 @@ namespace seq { */ expr_ref nullable(expr* r) { return is_nullable(r); } + /** + * Enumerate the cofactors (min-terms) of a transition regex r taken with + * respect to element ele. r is an ITE-tree over character predicates on + * ele; for every feasible path through the tree this produces a pair + * (path_condition, leaf_regex). Infeasible character-interval + * combinations are pruned using the same path/interval context that the + * derivative engine uses while hoisting ITEs. + */ + void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result); + void set_antimirov(bool flag) { m_antimirov_derivative = flag; diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index aa4603e6c7..42f8a1660a 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -455,6 +455,17 @@ public: return mk_derivative(r); } + /* + Enumerate the cofactors (min-terms) of a transition regex r taken with + respect to ele. Produces (path_condition, leaf_regex) pairs for every + feasible path through the ITE-tree, pruning infeasible character ranges. + Delegates to the derivative engine so the same path/interval context used + while hoisting ITEs is reused for the leaf simplification. + */ + void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result) { + m_derive.get_cofactors(ele, r, result); + } + // heuristic elimination of element from condition that comes form a derivative. // special case optimization for conjunctions of equalities, disequalities and ranges. void elim_condition(expr* elem, expr_ref& cond); diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 0e9a03b633..6c4f5cf5b3 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -562,7 +562,7 @@ namespace smt { lits.push_back(null_lit); expr_ref_pair_vector cofactors(m); - get_cofactors(d, cofactors); + seq_rw().get_cofactors(hd, d, cofactors); for (auto const& p : cofactors) { if (is_member(p.second, u)) continue; @@ -707,53 +707,26 @@ namespace smt { /* Return a list of all (cond, leaf) pairs in a given derivative - expression r. + expression r, where elem is the character symbol the derivative was + taken with respect to. - Note: this implementation is inefficient: it simply collects all expressions under an if and - iterates over all combinations. + The transition regexes produced by the symbolic derivative engine are + ITE-trees over character predicates ci on elem (equalities such as + elem = 'A', and ranges such as 'a' <= elem <= 'z'). These predicates + are typically mutually exclusive, so the number of feasible truth + assignments to {c1,..,ck} ("minterms") is small. - This method is still used by: + The enumeration is delegated to seq::derive (via seq_rw().get_cofactors) + so it reuses the very same path/interval context that the derivative + engine uses while hoisting ITEs: each feasible path through the ITE-tree + yields one (path_condition, leaf) cofactor, infeasible character-range + combinations are pruned, and the leaf is simplified with the path-aware + smart constructors. + + This is used by: propagate_is_empty propagate_is_non_empty */ - void seq_regex::get_cofactors(expr* r, expr_ref_pair_vector& result) { - obj_hashtable ifs; - expr* cond = nullptr, * r1 = nullptr, * r2 = nullptr; - for (expr* e : subterms::ground(expr_ref(r, m))) - if (m.is_ite(e, cond, r1, r2)) - ifs.insert(cond); - - expr_ref_vector rs(m); - vector conds; - conds.push_back(expr_ref_vector(m)); - rs.push_back(r); - for (expr* c : ifs) { - unsigned sz = conds.size(); - expr_safe_replace rep1(m); - expr_safe_replace rep2(m); - rep1.insert(c, m.mk_true()); - rep2.insert(c, m.mk_false()); - expr_ref r2(m); - for (unsigned i = 0; i < sz; ++i) { - expr_ref_vector cs = conds[i]; - cs.push_back(mk_not(m, c)); - conds.push_back(cs); - conds[i].push_back(c); - expr_ref r1(rs.get(i), m); - rep1(r1, r2); - rs[i] = r2; - rep2(r1, r2); - rs.push_back(r2); - } - } - for (unsigned i = 0; i < conds.size(); ++i) { - expr_ref conj = mk_and(conds[i]); - expr_ref r(rs.get(i), m); - ctx.get_rewriter()(r); - if (!m.is_false(conj) && !re().is_empty(r)) - result.push_back(conj, r); - } - } /* is_empty(r, u) => ~is_nullable(r) @@ -781,7 +754,7 @@ namespace smt { d = mk_derivative_wrapper(hd, r); literal_vector lits; expr_ref_pair_vector cofactors(m); - get_cofactors(d, cofactors); + seq_rw().get_cofactors(hd, d, cofactors); for (auto const& p : cofactors) { if (is_member(p.second, u)) continue; diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 5c3fddd252..f27f23fa3d 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -164,7 +164,6 @@ namespace smt { // returned by derivative_wrapper expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r); void get_derivative_targets(expr* r, expr_ref_vector& targets); - void get_cofactors(expr* r, expr_ref_pair_vector& result); /* Pretty print the regex of the state id to the out stream,