From 1b9fcc7098d79160cad1cb59bd05b8e527957b54 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 5 Jun 2020 17:28:48 -0700 Subject: [PATCH] integrate ite-normalized derivatives Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_rewriter.cpp | 174 ++++-------------------------- src/ast/rewriter/seq_rewriter.h | 12 +-- src/smt/seq_regex.cpp | 103 +++++++++--------- src/smt/seq_regex.h | 9 +- 4 files changed, 86 insertions(+), 212 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 132d2de3c..26021f9b6 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2336,30 +2336,33 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) { expr* ca = nullptr, *a1 = nullptr, *a2 = nullptr; expr* cb = nullptr, *b1 = nullptr, *b2 = nullptr; expr_ref result(m()); + auto mk_ite = [&](expr* c, expr* a, expr* b) { + return (a == b) ? a : m().mk_ite(c, a, b); + }; if (m().is_ite(a, ca, a1, a2)) { if (m().is_ite(b, cb, b1, b2)) { if (ca == cb) { expr_ref r1 = mk_der_op(k, a1, b1); expr_ref r2 = mk_der_op(k, a2, b2); - result = m().mk_ite(ca, r1, r2); + result = mk_ite(ca, r1, r2); return result; } else if (ca->get_id() < cb->get_id()) { expr_ref r1 = mk_der_op(k, a, b1); expr_ref r2 = mk_der_op(k, a, b2); - result = m().mk_ite(cb, r1, r2); + result = mk_ite(cb, r1, r2); return result; } } expr_ref r1 = mk_der_op(k, a1, b); expr_ref r2 = mk_der_op(k, a2, b); - result = m().mk_ite(ca, r1, r2); + result = mk_ite(ca, r1, r2); return result; } if (m().is_ite(b, cb, b1, b2)) { expr_ref r1 = mk_der_op(k, a, b1); expr_ref r2 = mk_der_op(k, a, b2); - result = m().mk_ite(cb, r1, r2); + result = mk_ite(cb, r1, r2); return result; } switch (k) { @@ -2413,6 +2416,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { VERIFY(m_util.is_seq(seq_sort, ele_sort)); SASSERT(ele_sort == m().get_sort(ele)); expr* r1 = nullptr, *r2 = nullptr, *p = nullptr; + auto mk_empty = [&]() { return expr_ref(re().mk_empty(m().get_sort(r)), m()); }; unsigned lo = 0, hi = 0; if (re().is_concat(r, r1, r2)) { expr_ref is_n = is_nullable(r1); @@ -2422,7 +2426,8 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { return result; } expr_ref dr2 = mk_derivative(ele, r2); - return mk_der_union(result, re_and(is_n, dr2)); + is_n = re_predicate(is_n, seq_sort); + return mk_der_union(result, mk_der_concat(is_n, dr2)); } else if (re().is_star(r, r1)) { return mk_der_concat(mk_derivative(ele, r1), r); @@ -2459,8 +2464,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { } else if (re().is_loop(r, r1, lo, hi)) { if (hi == 0) { - result = re().mk_empty(m().get_sort(r)); - return result; + return mk_empty(); } hi--; if (lo > 0) { @@ -2470,20 +2474,17 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { } else if (re().is_full_seq(r) || re().is_empty(r)) { - result = r; - return result; + return expr_ref(r, m()); } else if (re().is_to_re(r, r1)) { // r1 is a string here (not a regexp) expr_ref hd(m()), tl(m()); if (get_head_tail(r1, hd, tl)) { // head must be equal; if so, derivative is tail - result = re_and(m().mk_eq(ele, hd), re().mk_to_re(tl)); - return result; + return re_and(m().mk_eq(ele, hd), re().mk_to_re(tl)); } else if (str().is_empty(r1)) { - result = re().mk_empty(m().get_sort(r)); - return result; + return mk_empty(); } else { return expr_ref(re().mk_derivative(ele, r), m()); @@ -2495,15 +2496,10 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // This is analagous to the previous is_to_re case. expr_ref hd(m()), tl(m()); if (get_head_tail_reversed(r2, hd, tl)) { - result = re_and( - m().mk_eq(ele, tl), - re().mk_reverse(re().mk_to_re(hd)) - ); - return result; + return re_and(m().mk_eq(ele, tl), re().mk_reverse(re().mk_to_re(hd))); } else if (str().is_empty(r2)) { - result = re().mk_empty(m().get_sort(r)); - return result; + return mk_empty(); } else { return expr_ref(re().mk_derivative(ele, r), m()); @@ -2516,32 +2512,27 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { if (s1.length() == 1 && s2.length() == 1) { r1 = m_util.mk_char(s1[0]); r2 = m_util.mk_char(s2[0]); - result = m().mk_and(m_util.mk_le(r1, ele), m_util.mk_le(ele, r2)); - result = re_predicate(result, seq_sort); - return result; + return mk_der_inter(re_predicate(m_util.mk_le(r1, ele), seq_sort), + re_predicate(m_util.mk_le(ele, r2), seq_sort)); } else { - result = re().mk_empty(m().get_sort(r)); - return result; + return mk_empty(); } } expr* e1 = nullptr, *e2 = nullptr; if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) { - result = m().mk_and(m_util.mk_le(e1, ele), m_util.mk_le(ele, e2)); - result = re_predicate(result, seq_sort); - return result; + return mk_der_inter(re_predicate(m_util.mk_le(e1, ele), seq_sort), + re_predicate(m_util.mk_le(ele, e2), seq_sort)); } } else if (re().is_full_char(r)) { - result = re().mk_to_re(str().mk_empty(seq_sort)); - return result; + return expr_ref(re().mk_to_re(str().mk_empty(seq_sort)), m()); } else if (re().is_of_pred(r, p)) { array_util array(m()); expr* args[2] = { p, ele }; result = array.mk_select(2, args); - result = re_predicate(result, seq_sort); - return result; + return re_predicate(result, seq_sort); } // stuck cases: re().is_derivative, variable, ... // and re().is_reverse if the reverse is not applied to a string @@ -3196,7 +3187,6 @@ void seq_rewriter::elim_condition(expr* elem, expr_ref& cond) { else intersect(0, ch-1, ranges); } - // TBD: case for negation of range (not (and (<= lo e) (<= e hi))) else { all_ranges = false; break; @@ -3237,124 +3227,6 @@ void seq_rewriter::elim_condition(expr* elem, expr_ref& cond) { } } -void seq_rewriter::get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result) { - expr_ref cond(m()), th(m()), el(m()); - if (has_cofactor(r, cond, th, el)) { - conds.push_back(cond); - get_cofactors(th, conds, result); - conds.pop_back(); - conds.push_back(mk_not(m(), cond)); - get_cofactors(el, conds, result); - conds.pop_back(); - } - else { - cond = mk_and(conds); - result.push_back(cond, r); - } -} - -bool seq_rewriter::has_cofactor(expr* r, expr_ref& cond, expr_ref& th, expr_ref& el) { - if (m().is_ite(r)) { - cond = to_app(r)->get_arg(0); - th = to_app(r)->get_arg(1); - el = to_app(r)->get_arg(2); - return true; - } - expr_ref_vector trail(m()), args_th(m()), args_el(m()); - expr* c = nullptr, *tt = nullptr, *ee = nullptr; - cond = nullptr; - obj_map cache_th, cache_el; - expr_mark no_cofactor, visited; - ptr_vector todo; - todo.push_back(r); - while (!todo.empty()) { - expr* e = todo.back(); - if (visited.is_marked(e) || !is_app(e)) { - todo.pop_back(); - continue; - } - app* a = to_app(e); - if (m().is_ite(e, c, tt, ee)) { - if (!cond) { - cond = c; - cache_th.insert(a, tt); - cache_el.insert(a, ee); - } - else if (cond == c) { - cache_th.insert(a, tt); - cache_el.insert(a, ee); - } - else { - no_cofactor.mark(a); - } - visited.mark(e, true); - todo.pop_back(); - continue; - } - - if (a->get_family_id() != u().get_family_id()) { - visited.mark(e, true); - no_cofactor.mark(e, true); - todo.pop_back(); - continue; - } - switch (a->get_decl_kind()) { - case OP_RE_CONCAT: - case OP_RE_UNION: - case OP_RE_INTERSECT: - case OP_RE_COMPLEMENT: - break; - case OP_RE_STAR: - case OP_RE_LOOP: - default: - visited.mark(e, true); - no_cofactor.mark(e, true); - continue; - } - args_th.reset(); - args_el.reset(); - bool has_cof = false; - for (expr* arg : *a) { - if (no_cofactor.is_marked(arg)) { - args_th.push_back(arg); - args_el.push_back(arg); - } - else if (cache_th.contains(arg)) { - args_th.push_back(cache_th[arg]); - args_el.push_back(cache_el[arg]); - has_cof = true; - } - else { - todo.push_back(arg); - } - } - if (args_th.size() == a->get_num_args()) { - if (has_cof) { - th = mk_app(a->get_decl(), args_th); - el = mk_app(a->get_decl(), args_el); - trail.push_back(th); - trail.push_back(el); - cache_th.insert(a, th); - cache_el.insert(a, el); - } - else { - no_cofactor.mark(a, true); - } - visited.mark(e, true); - todo.pop_back(); - } - } - SASSERT(cond == !no_cofactor.is_marked(r)); - if (cond) { - th = cache_th[r]; - el = cache_el[r]; - return true; - } - else { - return false; - } -} - br_status seq_rewriter::reduce_re_is_empty(expr* r, expr_ref& result) { expr* r1, *r2, *r3, *r4; diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 5f5f36feb..6b7b06653 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -273,12 +273,13 @@ class seq_rewriter { class seq_util::str& str() { return u().str; } class seq_util::str const& str() const { return u().str; } - void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result); + expr_ref is_nullable_rec(expr* r); void intersect(unsigned lo, unsigned hi, svector>& ranges); public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): - m_util(m), m_autil(m), m_re2aut(m), m_op_cache(m), m_es(m), m_lhs(m), m_rhs(m), m_coalesce_chars(true) { + m_util(m), m_autil(m), m_re2aut(m), m_op_cache(m), m_es(m), + m_lhs(m), m_rhs(m), m_coalesce_chars(true) { } ast_manager & m() const { return m_util.get_manager(); } family_id get_fid() const { return m_util.get_family_id(); } @@ -320,14 +321,7 @@ public: void add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_pair_vector& new_eqs); expr_ref is_nullable(expr* r); - expr_ref is_nullable_rec(expr* r); - bool has_cofactor(expr* r, expr_ref& cond, expr_ref& th, expr_ref& el); - - void get_cofactors(expr* r, expr_ref_pair_vector& result) { - expr_ref_vector conds(m()); - get_cofactors(r, conds, result); - } // heuristic elimination of element from condition that comes form a derivative. // special case optimization for conjunctions of equalities, disequalities and ranges. diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 4ca6cdbf7..f08cc2a6c 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -148,32 +148,6 @@ namespace smt { m_to_propagate.push_back(lit); } - - // s in R[if(p,R1,R2)] & p => s in R[R1] - // s in R[if(p,R1,R2)] & ~p => s in R[R2] - - bool seq_regex::unfold_cofactors(expr_ref& r, literal_vector& conds) { - expr_ref cond(m), tt(m), el(m); - while (seq_rw().has_cofactor(r, cond, tt, el)) { - rewrite(cond); - literal lcond = th.mk_literal(cond); - switch (ctx.get_assignment(lcond)) { - case l_true: - conds.push_back(~lcond); - r = tt; - break; - case l_false: - conds.push_back(lcond); - r = el; - break; - case l_undef: - ctx.mark_as_relevant(lcond); - return false; - } - rewrite(r); - } - return true; - } /** * Propagate the atom (accept s i r) @@ -198,19 +172,14 @@ namespace smt { TRACE("seq", tout << "propagate " << mk_pp(e, m) << "\n";); - if (block_unfolding(lit, idx)) - return true; - - literal_vector conds; - conds.push_back(~lit); - if (!unfold_cofactors(d, conds)) - return false; - - if (re().is_empty(d)) { - th.add_axiom(conds); + if (re().is_empty(r)) { + th.add_axiom(~lit); return true; } + if (block_unfolding(lit, idx)) + return true; + // s in R & len(s) <= i => nullable(R) literal len_s_le_i = th.m_ax.mk_le(th.mk_len(s), idx); switch (ctx.get_assignment(len_s_le_i)) { @@ -218,11 +187,9 @@ namespace smt { ctx.mark_as_relevant(len_s_le_i); return false; case l_true: - is_nullable = seq_rw().is_nullable(d); + is_nullable = seq_rw().is_nullable(r); rewrite(is_nullable); - conds.push_back(~len_s_le_i); - conds.push_back(th.mk_literal(is_nullable)); - th.add_axiom(conds); + th.add_axiom(~lit, ~len_s_le_i, th.mk_literal(is_nullable)); return true; case l_false: break; @@ -230,14 +197,38 @@ namespace smt { // (accept s i R) & len(s) > i => (accept s (+ i 1) D(nth(s, i), R)) or conds expr_ref head = th.mk_nth(s, i); - d = re().mk_derivative(head, r); + d = re().mk_derivative(m.mk_var(0, m.get_sort(head)), r); rewrite(d); - literal acc_next = th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d)); + literal_vector conds; + conds.push_back(~lit); conds.push_back(len_s_le_i); - conds.push_back(acc_next); - th.add_axiom(conds); - + expr* cond = nullptr, *tt = nullptr, *el = nullptr; + var_subst subst(m); + expr_ref_vector sub(m); + sub.push_back(head); + // s in R[if(p,R1,R2)] & p => s in R[R1] + // s in R[if(p,R1,R2)] & ~p => s in R[R2] + while (m.is_ite(d, cond, tt, el)) { + literal lcond = th.mk_literal(subst(cond, sub)); + switch (ctx.get_assignment(lcond)) { + case l_true: + conds.push_back(~lcond); + d = tt; + break; + case l_false: + conds.push_back(lcond); + d = el; + break; + case l_undef: + ctx.mark_as_relevant(lcond); + return false; + } + } + // at this point there should be no free variables as the ites are at top-level. + if (!re().is_empty(d)) + conds.push_back(th.mk_literal(sk().mk_accept(s, a().mk_int(idx + 1), d))); + th.add_axiom(conds); TRACE("seq", tout << "unfold " << head << "\n" << mk_pp(r, m) << "\n";); return true; } @@ -358,7 +349,7 @@ namespace smt { if (null_lit != false_literal) lits.push_back(null_lit); expr_ref_pair_vector cofactors(m); - seq_rw().get_cofactors(d, cofactors); + get_cofactors(d, cofactors); for (auto const& p : cofactors) { if (is_member(p.second, u)) continue; @@ -375,6 +366,21 @@ namespace smt { th.add_axiom(lits); } + void seq_regex::get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result) { + expr* cond = nullptr, *th = nullptr, *el = nullptr; + if (m.is_ite(r, cond, th, el)) { + conds.push_back(cond); + get_cofactors(th, conds, result); + conds.pop_back(); + conds.push_back(mk_not(m, cond)); + get_cofactors(el, conds, result); + conds.pop_back(); + } + else { + cond = mk_and(conds); + result.push_back(cond, r); + } + } /* is_empty(r, u) => ~is_nullable(r) @@ -398,10 +404,7 @@ namespace smt { rewrite(d); literal_vector lits; expr_ref_pair_vector cofactors(m); - seq_rw().get_cofactors(d, cofactors); - - // is_empty(r, u) => forall hd . cond => is_empty(r1, u union r) - + get_cofactors(d, cofactors); for (auto const& p : cofactors) { if (is_member(p.second, u)) continue; diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index ebcbc53b1..02fe1555a 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -63,12 +63,17 @@ namespace smt { expr_ref unroll_non_empty(expr* r, expr_mark& seen, unsigned depth); - bool unfold_cofactors(expr_ref& r, literal_vector& conds); - bool is_member(expr* r, expr* u); expr_ref symmetric_diff(expr* r1, expr* r2); + void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result); + + void get_cofactors(expr* r, expr_ref_pair_vector& result) { + expr_ref_vector conds(m); + get_cofactors(r, conds, result); + } + public: seq_regex(theory_seq& th);