From 88e36c6bf309bb90f0f73a63703450175d53471f Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Tue, 26 May 2020 20:42:21 -0700 Subject: [PATCH] add general purpose emptiness/non-emptiness check Signed-off-by: Nikolaj Bjorner --- src/ast/expr_abstract.h | 2 + src/ast/rewriter/seq_rewriter.cpp | 137 +++++++++++++++++++++++- src/ast/rewriter/seq_rewriter.h | 21 ++++ src/smt/seq_regex.cpp | 171 +++++++++++++++++++----------- src/smt/seq_regex.h | 6 ++ src/smt/seq_skolem.cpp | 2 + src/smt/seq_skolem.h | 14 ++- src/smt/theory_seq.cpp | 10 +- 8 files changed, 298 insertions(+), 65 deletions(-) diff --git a/src/ast/expr_abstract.h b/src/ast/expr_abstract.h index 2bf43c61f..8e53a717f 100644 --- a/src/ast/expr_abstract.h +++ b/src/ast/expr_abstract.h @@ -38,6 +38,8 @@ inline expr_ref expr_abstract(expr_ref_vector const& bound, expr* n) { return ex inline expr_ref expr_abstract(app_ref_vector const& bound, expr* n) { return expr_abstract(bound.m(), 0, bound.size(), (expr*const*)bound.c_ptr(), n); } expr_ref mk_forall(ast_manager& m, unsigned num_bound, app* const* bound, expr* n); expr_ref mk_exists(ast_manager& m, unsigned num_bound, app* const* bound, expr* n); +inline expr_ref mk_forall(ast_manager& m, app* b, expr* n) { return mk_forall(m, 1, &b, n); } +inline expr_ref mk_forall(ast_manager& m, expr* b, expr* n) { return mk_forall(m, to_app(b), n); } #endif diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 258bbee92..9481057eb 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -28,6 +28,7 @@ Notes: #include "ast/well_sorted.h" #include "ast/rewriter/var_subst.h" #include "ast/rewriter/bool_rewriter.h" +#include "ast/rewriter/expr_safe_replace.h" #include "ast/rewriter/seq_rewriter_params.hpp" #include "math/automata/automaton.h" #include "math/automata/symbolic_automata_def.h" @@ -2672,6 +2673,138 @@ br_status seq_rewriter::mk_re_opt(expr* a, expr_ref& result) { return BR_REWRITE1; } +void seq_rewriter::intersect(unsigned lo, unsigned hi, svector>& ranges) { + unsigned j = 0; + for (unsigned i = 0; i < ranges.size(); ++i) { + unsigned lo1 = ranges[i].first; + unsigned hi1 = ranges[i].second; + if (hi < lo1) + break; + if (hi1 >= lo) + ranges[j++] = std::make_pair(std::max(lo1, lo), std::min(hi1, hi)); + } + ranges.shrink(j); +} + +/** + * Simplify cond using special case rewriting for character equations + * When elem is uninterpreted compute the simplification of Exists elem . cond + * if it is possible to solve for elem. + */ +void seq_rewriter::elim_condition(expr* elem, expr_ref& cond) { + expr_ref_vector conds(m()); + flatten_and(cond, conds); + expr* lhs = nullptr, *rhs = nullptr, *e1 = nullptr; + if (u().is_char(elem)) { + unsigned ch = 0; + svector> ranges, ranges1; + ranges.push_back(std::make_pair(0, zstring::max_char())); + auto exclude_char = [&](unsigned ch) { + if (ch == 0) { + intersect(1, zstring::max_char(), ranges); + } + else if (ch == zstring::max_char()) { + intersect(0, ch-1, ranges); + } + else { + ranges1.reset(); + ranges1.append(ranges); + intersect(0, ch-1, ranges); + intersect(ch + 1, zstring::max_char(), ranges1); + ranges.append(ranges1); + } + }; + bool all_ranges = true; + for (expr* e : conds) { + if (m().is_eq(e, lhs, rhs) && elem == lhs && u().is_const_char(rhs, ch)) { + intersect(ch, ch, ranges); + } + else if (m().is_eq(e, lhs, rhs) && elem == rhs && u().is_const_char(lhs, ch)) { + intersect(ch, ch, ranges); + } + else if (u().is_char_le(e, lhs, rhs) && elem == lhs && u().is_const_char(rhs, ch)) { + intersect(0, ch, ranges); + } + else if (u().is_char_le(e, lhs, rhs) && elem == rhs && u().is_const_char(lhs, ch)) { + intersect(ch, zstring::max_char(), ranges); + } + else if (m().is_not(e, e1) && m().is_eq(e1, lhs, rhs) && elem == lhs && u().is_const_char(rhs, ch)) { + exclude_char(ch); + } + else if (m().is_not(e, e1) && m().is_eq(e1, lhs, rhs) && elem == rhs && u().is_const_char(lhs, ch)) { + exclude_char(ch); + } + else if (m().is_not(e, e1) && u().is_char_le(e1, lhs, rhs) && elem == lhs && u().is_const_char(rhs, ch)) { + // not (e <= ch) + if (ch == zstring::max_char()) + ranges.reset(); + else + intersect(ch+1, zstring::max_char(), ranges); + } + else if (m().is_not(e, e1) && u().is_char_le(e1, lhs, rhs) && elem == rhs && u().is_const_char(lhs, ch)) { + // not (ch <= e) + if (ch == 0) + ranges.reset(); + else + intersect(0, ch-1, ranges); + } + // TBD: case for negation of range (not (and (<= lo e) (<= e hi))) + else { + all_ranges = false; + break; + } + if (ranges.empty()) + break; + } + if (all_ranges) { + if (ranges.empty()) { + cond = m().mk_false(); + return; + } + if (is_uninterp_const(elem)) { + cond = m().mk_true(); + return; + } + } + } + + expr* solution = nullptr; + for (expr* e : conds) { + if (!m().is_eq(e, lhs, rhs)) + continue; + if (rhs == elem) + std::swap(lhs, rhs); + if (lhs != elem) + continue; + solution = rhs; + break; + } + if (solution) { + expr_safe_replace rep(m()); + rep.insert(elem, solution); + rep(cond); + if (!is_uninterp_const(elem)) { + cond = m().mk_and(m().mk_eq(elem, solution), cond); + } + } +} + +void seq_rewriter::get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result) { + expr_ref cond(m()), th(m()), el(m()); + if (has_cofactor(r, cond, th, el)) { + conds.push_back(cond); + get_cofactors(th, conds, result); + conds.pop_back(); + conds.push_back(mk_not(m(), cond)); + get_cofactors(el, conds, result); + conds.pop_back(); + } + else { + cond = mk_and(conds); + result.push_back(cond, r); + } +} + bool seq_rewriter::has_cofactor(expr* r, expr_ref& cond, expr_ref& th, expr_ref& el) { if (m().is_ite(r)) { cond = to_app(r)->get_arg(0); @@ -2749,8 +2882,8 @@ bool seq_rewriter::has_cofactor(expr* r, expr_ref& cond, expr_ref& th, expr_ref& } if (args_th.size() == a->get_num_args()) { if (has_cof) { - th = m().mk_app(a->get_decl(), args_th); - el = m().mk_app(a->get_decl(), args_el); + th = mk_app(a->get_decl(), args_th); + el = mk_app(a->get_decl(), args_el); trail.push_back(th); trail.push_back(el); cache_th.insert(a, th); diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 8de7d8350..6ed4a9591 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -214,6 +214,9 @@ class seq_rewriter { class seq_util::str& str() { return u().str; } class seq_util::str const& str() const { return u().str; } + void get_cofactors(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result); + void intersect(unsigned lo, unsigned hi, svector>& ranges); + public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): m_util(m), m_autil(m), m_re2aut(m), m_es(m), m_lhs(m), m_rhs(m), m_coalesce_chars(true) { @@ -235,6 +238,15 @@ public: br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result); br_status mk_bool_app(func_decl* f, unsigned n, expr* const* args, expr_ref& result); + expr_ref mk_app(func_decl* f, expr_ref_vector const& args) { return mk_app(f, args.size(), args.c_ptr()); } + expr_ref mk_app(func_decl* f, unsigned n, expr* const* args) { + expr_ref result(m()); + if (f->get_family_id() != u().get_family_id() || + BR_FAILED == mk_app_core(f, n, args, result)) + result = m().mk_app(f, n, args); + return result; + } + bool reduce_eq(expr* l, expr* r, expr_ref_pair_vector& new_eqs, bool& change); bool reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& new_eqs, bool& change); @@ -249,6 +261,15 @@ public: bool has_cofactor(expr* r, expr_ref& cond, expr_ref& th, expr_ref& el); + void get_cofactors(expr* r, expr_ref_pair_vector& result) { + expr_ref_vector conds(m()); + get_cofactors(r, conds, result); + } + + // heuristic elimination of element from condition that comes form a derivative. + // special case optimization for conjunctions of equalities, disequalities and ranges. + void elim_condition(expr* elem, expr_ref& cond); + }; #endif diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 677baa4fb..900092c98 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -17,6 +17,7 @@ Author: #include "smt/seq_regex.h" #include "smt/theory_seq.h" +#include "ast/expr_abstract.h" namespace smt { @@ -267,10 +268,20 @@ namespace smt { } void seq_regex::propagate_eq(expr* r1, expr* r2) { - // the dual version of unroll_non_empty, but - // skolem functions have to be eliminated or turned into - // universal quantifiers. - throw default_exception("emptiness checking for regex is TBD"); + expr_ref r(m); + if (re().is_empty(r1)) + std::swap(r1, r2); + if (re().is_empty(r2)) + r = r1; + else + r = re().mk_union(re().mk_diff(r1, r2), re().mk_diff(r2, r1)); + rewrite(r); + sort* seq_sort = nullptr; + VERIFY(u().is_re(r, seq_sort)); + expr_ref emp(re().mk_empty(seq_sort), m); + literal lit = ~th.mk_eq(r, emp, false); + expr_ref is_non_empty = sk().mk_is_non_empty(r, emp); + th.add_axiom(~lit, th.mk_literal(is_non_empty)); } void seq_regex::propagate_ne(expr* r1, expr* r2) { @@ -284,74 +295,112 @@ namespace smt { rewrite(r); sort* seq_sort = nullptr; VERIFY(u().is_re(r, seq_sort)); - literal lit = ~th.mk_eq(r, re().mk_empty(seq_sort), false); - expr_mark seen; - expr_ref non_empty = unroll_non_empty(r, seen, 0); - if (non_empty) { - rewrite(non_empty); - th.add_axiom(~lit, th.mk_literal(non_empty)); - } - else { - // generally introduce predicate (re.nonempty r seen) - // with inference rules based on unroll_non_empty - throw default_exception("unrolling large regexes is TBD"); + expr_ref emp(re().mk_empty(seq_sort), m); + literal lit = ~th.mk_eq(r, emp, false); + expr_ref is_empty = sk().mk_is_empty(r, emp); + th.add_axiom(~lit, th.mk_literal(is_empty)); + } + + bool seq_regex::is_member(expr* r, expr* u) { + expr* u2 = nullptr; + while (re().is_union(u, u, u2)) { + if (r == u2) + return true; } + return r == u; } /** - nonempty(R union Q, Seen) = R != {} or Q != {} - nonempty(R[if(p,R1,R2)], Seen) = if(p, nonempty(R[R1], Seen), nonempty(R[R2], Seen)) (co-factor) - nonempty(R, Seen) = nullable(R) or (R not in Seen and nonempty(D(first(R),R), Seen u { R })) (derivative) - - TBD: eliminate variables from p when possible to perform quantifier elimination. - - p := first(R) == 'a' - then replace first(R) by 'a' in R[R1] - TBD: - empty(R, Seen) = R = {} if R does not contain a subterm in Seen and Seen is non-empty + * is_non_empty(r, u) => nullable or not c_i or is_non_empty(r_i, u union r) + * + * for each (c_i, r_i) in cofactors + * + * is_non_empty(r_i, u union r) := false if r_i in u + * + */ + void seq_regex::propagate_is_non_empty(literal lit) { + expr* e = ctx.bool_var2expr(lit.var()), *r, *u; + VERIFY(sk().is_is_non_empty(e, r, u)); + expr_ref is_nullable = seq_rw().is_nullable(r); + rewrite(is_nullable); + if (m.is_true(is_nullable)) + return; + literal null_lit = th.mk_literal(is_nullable); + expr_ref hd = mk_first(r); + expr_ref d = seq_rw().derivative(hd, r); + if (!d) + throw default_exception("derivative was not defined"); + literal_vector lits; + expr_ref_pair_vector cofactors(m); + seq_rw().get_cofactors(d, cofactors); + for (auto const& p : cofactors) { + expr_ref cond(p.first, m); + seq_rw().elim_condition(hd, cond); + rewrite(cond); + if (m.is_false(cond)) + continue; + lits.reset(); + lits.push_back(~lit); + if (!m.is_true(cond)) + lits.push_back(~th.mk_literal(cond)); + if (false_literal != null_lit) + lits.push_back(null_lit); + if (!is_member(p.second, u)) + lits.push_back(th.mk_literal(sk().mk_is_non_empty(p.second, re().mk_union(u, r)))); + th.add_axiom(lits); + } + } - first : RegEx -> Char is a skolem function - */ + /* + is_empty(r, u) => ~is_nullable(r) + is_empty(r, u) => (forall x . ~cond(x)) or is_empty(r1, u union r) for (cond, r) in min-terms(D(x,r)) + + is_empty(r, u) is true if r is a member of u + */ + void seq_regex::propagate_is_empty(literal lit) { + expr* e = ctx.bool_var2expr(lit.var()), *r, *u; + VERIFY(sk().is_is_empty(e, r, u)); + expr_ref is_nullable = seq_rw().is_nullable(r); + rewrite(is_nullable); + if (m.is_true(is_nullable)) { + th.add_axiom(~lit); + return; + } + th.add_axiom(~lit, ~th.mk_literal(is_nullable)); + expr_ref hd = mk_first(r); + expr_ref d = seq_rw().derivative(hd, r); + if (!d) + throw default_exception("derivative was not defined"); + literal_vector lits; + expr_ref_pair_vector cofactors(m); + seq_rw().get_cofactors(d, cofactors); + + // is_empty(r, u) => forall hd . cond => is_empty(r1, u union r) + + for (auto const& p : cofactors) { + if (is_member(p.second, u)) + continue; + expr_ref cond(p.first, m); + seq_rw().elim_condition(hd, cond); + rewrite(cond); + if (m.is_false(cond)) + continue; + lits.reset(); + lits.push_back(~lit); + expr_ref is_empty1 = sk().mk_is_non_empty(p.second, re().mk_union(u, r)); + if (!m.is_true(cond)) { + lits.push_back(th.mk_literal(mk_forall(m, hd, m.mk_not(cond)))); + } + lits.push_back(th.mk_literal(is_empty1)); + th.add_axiom(lits); + } + } expr_ref seq_regex::mk_first(expr* r) { sort* elem_sort = nullptr, *seq_sort = nullptr; VERIFY(u().is_re(r, seq_sort)); VERIFY(u().is_seq(seq_sort, elem_sort)); return expr_ref(m.mk_fresh_const("re.first", elem_sort), m); - // return sk().mk("re.first", r, elem_sort); - // - for this to be effective, requires internalizer to skip skolem function internalization, - // because of the regex argument r and we don't handle extensionality of regex well. - // It is probably a good idea to skip internalization of all skolem expressions, - // but requires some changes to theory_seq. - // - it is more useful to eliminate quantifiers in he common case, so never have to - // work with fresh expressions in the fist hand. This is possible for characters and - // ranges (just equalities and inequalities with constant bounds). - } - - expr_ref seq_regex::unroll_non_empty(expr* r, expr_mark& seen, unsigned depth) { - if (seen.is_marked(r)) - return expr_ref(m.mk_false(), m); - if (depth > 300) - return expr_ref(m); - expr_ref result(m), cond(m), th(m), el(m); - // TBD: try also rewriting - if (seq_rw().has_cofactor(r, cond, th, el)) { - th = unroll_non_empty(th, seen, depth + 1); - el = unroll_non_empty(el, seen, depth + 1); - if (th && el) - result = m.mk_ite(cond, th, el); - return result; - } - expr_ref hd = mk_first(r); - result = seq_rw().derivative(hd, r); - if (result) { - // TBD fast check if r is a subterm of result, if not, then - // loop instead of recurse - seen.mark(r, true); - result = unroll_non_empty(result, seen, depth + 1); - seen.mark(r, false); - } - return result; } } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 6fbc11e33..e79fe7894 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -65,6 +65,8 @@ namespace smt { bool unfold_cofactors(expr_ref& r, literal_vector& conds); + bool is_member(expr* r, expr* u); + public: seq_regex(theory_seq& th); @@ -84,6 +86,10 @@ namespace smt { void propagate_eq(expr* r1, expr* r2); void propagate_ne(expr* r1, expr* r2); + + void propagate_is_non_empty(literal lit); + + void propagate_is_empty(literal lit); }; diff --git a/src/smt/seq_skolem.cpp b/src/smt/seq_skolem.cpp index 8b0ed9129..5cae70080 100644 --- a/src/smt/seq_skolem.cpp +++ b/src/smt/seq_skolem.cpp @@ -37,6 +37,8 @@ seq_skolem::seq_skolem(ast_manager& m, th_rewriter& rw): m_seq_align = "seq.align"; m_max_unfolding = "seq.max_unfolding"; m_length_limit = "seq.length_limit"; + m_is_empty = "re.is_empty"; + m_is_non_empty = "re.is_non_empty"; } expr_ref seq_skolem::mk(symbol const& s, expr* e1, expr* e2, expr* e3, expr* e4, sort* range) { diff --git a/src/smt/seq_skolem.h b/src/smt/seq_skolem.h index 9506f2517..b25c3d563 100644 --- a/src/smt/seq_skolem.h +++ b/src/smt/seq_skolem.h @@ -36,6 +36,7 @@ namespace smt { symbol m_indexof_left, m_indexof_right; // inverse of indexof: (indexof_left s t) + s + (indexof_right s t) = t, for s in t. symbol m_aut_step; // regex unfolding state symbol m_accept; // regex + symbol m_is_empty, m_is_non_empty; // regex emptiness check symbol m_pre, m_post; // inverse of at: (pre s i) + (at s i) + (post s i) = s if 0 <= i < (len s) symbol m_eq; // equality atom symbol m_seq_align; @@ -60,6 +61,9 @@ namespace smt { expr_ref mk_align(expr* e1, expr* e2, expr* e3, expr* e4) { return mk(m_seq_align, e1, e2, e3, e4); } expr_ref mk_accept(expr_ref_vector const& args) { return expr_ref(seq.mk_skolem(m_accept, args.size(), args.c_ptr(), m.mk_bool_sort()), m); } expr_ref mk_accept(expr* s, expr* i, expr* r) { return mk(m_accept, s, i, r, nullptr, m.mk_bool_sort()); } + expr_ref mk_is_non_empty(expr* r, expr* u) { return mk(m_is_non_empty, r, u, m.mk_bool_sort()); } + expr_ref mk_is_empty(expr* r, expr* u) { return mk(m_is_empty, r, u, m.mk_bool_sort()); } + expr_ref mk_indexof_left(expr* t, expr* s, expr* offset = nullptr) { return mk(m_indexof_left, t, s, offset); } expr_ref mk_indexof_right(expr* t, expr* s, expr* offset = nullptr) { return mk(m_indexof_right, t, s, offset); } expr_ref mk_last_indexof_left(expr* t, expr* s, expr* offset = nullptr) { return mk("seq.last_indexof_left", t, s, offset); } @@ -82,6 +86,7 @@ namespace smt { expr_ref mk_right(expr* x, expr* y, expr* z = nullptr) { return mk("seq.right", x, y, z); } expr_ref mk_max_unfolding_depth(unsigned d); expr_ref mk_length_limit(expr* e, unsigned d); + bool is_skolem(symbol const& s, expr* e) const; bool is_skolem(expr* e) const { return seq.is_skolem(e); } @@ -117,7 +122,14 @@ namespace smt { bool is_max_unfolding(expr* e) const { return is_skolem(m_max_unfolding, e); } bool is_length_limit(expr* e) const { return is_skolem(m_length_limit, e); } bool is_length_limit(expr* p, unsigned& lim, expr*& s) const; - + bool is_is_empty(expr* e) const { return is_skolem(m_is_empty, e); } + bool is_is_non_empty(expr* e) const { return is_skolem(m_is_non_empty, e); } + bool is_is_empty(expr* e, expr*& r, expr*& u) const { + return is_skolem(m_is_empty, e) && (r = to_app(e)->get_arg(0), u = to_app(e)->get_arg(1), true); + } + bool is_is_non_empty(expr* e, expr*& r, expr*& u) const { + return is_skolem(m_is_non_empty, e) && (r = to_app(e)->get_arg(0), u = to_app(e)->get_arg(1), true); + } void decompose(expr* e, expr_ref& head, expr_ref& tail); diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index 931bfb626..2e009c31e 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -1536,7 +1536,7 @@ bool theory_seq::internalize_term(app* term) { } if (ctx.get_fparams().m_seq_use_derivatives && - (m_util.str.is_in_re(term) || m_sk.is_accept(term))) { + (m_util.str.is_in_re(term) || m_sk.is_skolem(term))) { bool_var bv = ctx.mk_bool_var(term); ctx.set_var_theory(bv, get_id()); ctx.mark_as_relevant(bv); @@ -3069,6 +3069,14 @@ void theory_seq::assign_eh(bool_var v, bool is_true) { } } } + else if (m_sk.is_is_empty(e)) { + if (is_true) + m_regex.propagate_is_empty(lit); + } + else if (m_sk.is_is_non_empty(e)) { + if (is_true) + m_regex.propagate_is_non_empty(lit); + } else if (m_sk.is_step(e)) { if (is_true) { propagate_step(lit, e);