From 835b57b77584e0ec22a0976806b392447283763f Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Tue, 14 Apr 2020 17:33:44 -0700 Subject: [PATCH] fix #3961 fix #3940 Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/CMakeLists.txt | 1 + src/ast/rewriter/bool_rewriter.cpp | 8 +- src/ast/rewriter/seq_rewriter.cpp | 41 ++++++++++- src/ast/rewriter/seq_rewriter.h | 9 ++- src/ast/seq_decl_plugin.cpp | 10 ++- src/ast/seq_decl_plugin.h | 2 + src/cmd_context/check_logic.cpp | 4 + src/smt/theory_seq.cpp | 114 +++++++++-------------------- src/smt/theory_seq.h | 8 +- 9 files changed, 100 insertions(+), 97 deletions(-) diff --git a/src/ast/rewriter/CMakeLists.txt b/src/ast/rewriter/CMakeLists.txt index d506c75b3..260a00bb8 100644 --- a/src/ast/rewriter/CMakeLists.txt +++ b/src/ast/rewriter/CMakeLists.txt @@ -47,4 +47,5 @@ z3_add_component(rewriter fpa_rewriter_params.pyg poly_rewriter_params.pyg rewriter_params.pyg + seq_rewriter_params.pyg ) diff --git a/src/ast/rewriter/bool_rewriter.cpp b/src/ast/rewriter/bool_rewriter.cpp index 0a2ca2b93..70df5de91 100644 --- a/src/ast/rewriter/bool_rewriter.cpp +++ b/src/ast/rewriter/bool_rewriter.cpp @@ -646,13 +646,13 @@ br_status bool_rewriter::try_ite_value(app * ite, app * val, expr_ref & result) } expr* cond2 = nullptr, *t2 = nullptr, *e2 = nullptr; - if (m().is_ite(t, cond2, t2, e2) && m().is_value(t2) && m().is_value(e2)) { - VERIFY(BR_FAILED != try_ite_value(to_app(t), val, result)); + if (m().is_ite(t, cond2, t2, e2) && m().is_value(t2) && m().is_value(e2) && + BR_FAILED != try_ite_value(to_app(t), val, result)) { result = m().mk_ite(cond, result, mk_eq(e, val)); return BR_REWRITE2; } - if (m().is_ite(e, cond2, t2, e2) && m().is_value(t2) && m().is_value(e2)) { - VERIFY(BR_FAILED != try_ite_value(to_app(e), val, result)); + if (m().is_ite(e, cond2, t2, e2) && m().is_value(t2) && m().is_value(e2) && + BR_FAILED != try_ite_value(to_app(e), val, result)) { result = m().mk_ite(cond, mk_eq(t, val), result); return BR_REWRITE2; } diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 39a04f6ec..b522fe940 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -27,6 +27,7 @@ Notes: #include "ast/well_sorted.h" #include "ast/rewriter/var_subst.h" #include "ast/rewriter/bool_rewriter.h" +#include "ast/rewriter/seq_rewriter_params.hpp" #include "math/automata/automaton.h" #include "math/automata/symbolic_automata_def.h" @@ -387,6 +388,17 @@ eautomaton* re2automaton::seq2aut(expr* e) { return nullptr; } +void seq_rewriter::updt_params(params_ref const & p) { + seq_rewriter_params sp(p); + m_coalesce_chars = sp.coalesce_chars(); +} + +static void get_param_descrs(param_descrs & r) { + seq_rewriter_params::collect_param_descrs(r); +} + + + br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) { SASSERT(f->get_family_id() == get_fid()); br_status st = BR_FAILED; @@ -531,7 +543,11 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con st = mk_str_lt(args[0], args[1], result); break; case OP_STRING_CONST: - return BR_FAILED; + st = BR_FAILED; + if (!m_coalesce_chars) { + st = mk_str_units(f, result); + } + break; case OP_STRING_ITOS: SASSERT(num_args == 1); st = mk_str_itos(args[0], result); @@ -563,7 +579,7 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con br_status seq_rewriter::mk_seq_unit(expr* e, expr_ref& result) { unsigned ch; // specifically we want (_ BitVector 8) - if (m_util.is_const_char(e, ch)) { + if (m_util.is_const_char(e, ch) && m_coalesce_chars) { // convert to string constant zstring str(ch); TRACE("seq_verbose", tout << "rewrite seq.unit of 8-bit value " << ch << " to string constant \"" << str<< "\"" << std::endl;); @@ -584,8 +600,8 @@ br_status seq_rewriter::mk_seq_unit(expr* e, expr_ref& result) { br_status seq_rewriter::mk_seq_concat(expr* a, expr* b, expr_ref& result) { zstring s1, s2; expr* c, *d; - bool isc1 = m_util.str.is_string(a, s1); - bool isc2 = m_util.str.is_string(b, s2); + bool isc1 = m_util.str.is_string(a, s1) && m_coalesce_chars; + bool isc2 = m_util.str.is_string(b, s2) && m_coalesce_chars; if (isc1 && isc2) { result = m_util.str.mk_string(s1 + s2); return BR_DONE; @@ -1511,6 +1527,23 @@ br_status seq_rewriter::mk_seq_suffix(expr* a, expr* b, expr_ref& result) { return BR_FAILED; } +br_status seq_rewriter::mk_str_units(func_decl* f, expr_ref& result) { + zstring s; + VERIFY(m_util.str.is_string(f, s)); + expr_ref_vector es(m()); + unsigned sz = s.length(); + for (unsigned j = 0; j < sz; ++j) { + es.push_back(m_util.str.mk_unit(m_util.str.mk_char(s, j))); + } + if (es.empty()) { + result = m_util.str.mk_empty(f->get_range()); + } + else { + result = m_util.str.mk_concat(es); + } + return BR_DONE; +} + br_status seq_rewriter::mk_str_le(expr* a, expr* b, expr_ref& result) { result = m().mk_not(m_util.str.mk_lex_lt(b, a)); return BR_REWRITE2; diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index b149864e0..2b31c185c 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -108,6 +108,7 @@ class seq_rewriter { arith_util m_autil; re2automaton m_re2aut; expr_ref_vector m_es, m_lhs, m_rhs; + bool m_coalesce_chars; br_status mk_seq_unit(expr* e, expr_ref& result); br_status mk_seq_concat(expr* a, expr* b, expr_ref& result); @@ -122,6 +123,7 @@ class seq_rewriter { br_status mk_seq_replace(expr* a, expr* b, expr* c, expr_ref& result); br_status mk_seq_prefix(expr* a, expr* b, expr_ref& result); br_status mk_seq_suffix(expr* a, expr* b, expr_ref& result); + br_status mk_str_units(func_decl* f, expr_ref& result); br_status mk_str_itos(expr* a, expr_ref& result); br_status mk_str_stoi(expr* a, expr_ref& result); br_status mk_str_in_regexp(expr* a, expr* b, expr_ref& result); @@ -164,15 +166,16 @@ class seq_rewriter { bool get_lengths(expr* e, expr_ref_vector& lens, rational& pos); + public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): - m_util(m), m_autil(m), m_re2aut(m), m_es(m), m_lhs(m), m_rhs(m) { + m_util(m), m_autil(m), m_re2aut(m), m_es(m), m_lhs(m), m_rhs(m), m_coalesce_chars(true) { } ast_manager & m() const { return m_util.get_manager(); } family_id get_fid() const { return m_util.get_family_id(); } - void updt_params(params_ref const & p) {} - static void get_param_descrs(param_descrs & r) {} + void updt_params(params_ref const & p); + static void get_param_descrs(param_descrs & r); void set_solver(expr_solver* solver) { m_re2aut.set_solver(solver); } bool has_solver() { return m_re2aut.has_solver(); } diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index eba36b5e3..03a3a8e1b 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1052,9 +1052,9 @@ app* seq_util::mk_lt(expr* ch1, expr* ch2) const { return m.mk_not(bv().mk_ule(ch2, ch1)); } -bool seq_util::str::is_string(expr const* n, zstring& s) const { - if (is_string(n)) { - s = zstring(to_app(n)->get_decl()->get_parameter(0).get_symbol().bare_str()); +bool seq_util::str::is_string(func_decl const* f, zstring& s) const { + if (is_string(f)) { + s = zstring(f->get_parameter(0).get_symbol().bare_str()); return true; } else { @@ -1062,6 +1062,10 @@ bool seq_util::str::is_string(expr const* n, zstring& s) const { } } +bool seq_util::str::is_string(expr const* n, zstring& s) const { + return is_app(n) && is_string(to_app(n)->get_decl(), s); +} + bool seq_util::str::is_nth_i(expr const* n, expr*& s, unsigned& idx) const { expr* i = nullptr; if (!is_nth_i(n, s, i)) return false; diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 653f46afe..19db86ca3 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -295,7 +295,9 @@ public: bool is_string(expr const* n, symbol& s) const { return is_string(n) && (s = to_app(n)->get_decl()->get_parameter(0).get_symbol(), true); } + bool is_string(func_decl const* f) const { return is_decl_of(f, m_fid, OP_STRING_CONST); } bool is_string(expr const* n, zstring& s) const; + bool is_string(func_decl const* f, zstring& s) const; bool is_empty(expr const* n) const { symbol s; return is_app_of(n, m_fid, OP_SEQ_EMPTY) || (is_string(n, s) && !s.is_numerical() && *s.bare_str() == 0); } diff --git a/src/cmd_context/check_logic.cpp b/src/cmd_context/check_logic.cpp index 419970a19..c9cc88556 100644 --- a/src/cmd_context/check_logic.cpp +++ b/src/cmd_context/check_logic.cpp @@ -240,6 +240,10 @@ struct check_logic::imp { if (!m_bvs) fail("logic does not support bitvectors"); } + else if (m_dt_util.is_datatype(s)) { + if (!m_dt) + fail("logic does not support algebraic datatypes"); + } else if (m_ar_util.is_array(s)) { if (m_arrays) { return; diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index 70a2ddf46..8751c1ae2 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -294,6 +294,7 @@ theory_seq::theory_seq(ast_manager& m, theory_seq_params const & params): m_length(m), m_mg(nullptr), m_rewrite(m), + m_str_rewrite(m), m_seq_rewrite(m), m_util(m), m_autil(m), @@ -322,6 +323,11 @@ theory_seq::theory_seq(ast_manager& m, theory_seq_params const & params): m_post = "seq.post"; // (seq.post s l): suffix of string s of length k, based on extract starting at index i of length l m_eq = "seq.eq"; m_seq_align = "seq.align"; + + params_ref p; + p.set_bool("coalesce_chars", false); + m_rewrite.updt_params(p); + } theory_seq::~theory_seq() { @@ -513,6 +519,7 @@ bool theory_seq::branch_binary_variable(eq const& e) { // |x| > |ys| => x = ys ++ y1, y = y1 ++ y2, y2 = xs expr_ref Y1(mk_skolem(symbol("seq.left"), x, y), m); expr_ref Y2(mk_skolem(symbol("seq.right"), x, y), m); + TRACE("seq", tout << Y1 << "\n" << Y2 << "\n";); ys.push_back(Y1); expr_ref ysY1 = mk_concat(ys); expr_ref xsE = mk_concat(xs); @@ -1604,6 +1611,7 @@ bool theory_seq::split_lengths(dependency* dep, SASSERT(is_var(Y)); expr_ref Y1(mk_skolem(symbol("seq.left"), X, b, Y), m); expr_ref Y2(mk_skolem(symbol("seq.right"), X, b, Y), m); + TRACE("seq", tout << Y1 << "\n" << Y2 << "\n" << ls << "\n" << rs << "\n";); expr_ref bY1 = mk_concat(b, Y1); expr_ref Y1Y2 = mk_concat(Y1, Y2); propagate_eq(dep, lits, X, bY1, true); @@ -2456,8 +2464,8 @@ bool theory_seq::propagate_eq(dependency* dep, enode* n1, enode* n2) { std::function fn = [&]() { return m.mk_eq(n1->get_owner(), n2->get_owner()); }; scoped_trace_stream _sts(*this, fn); ctx.assign_eq(n1, n2, eq_justification(js)); - validate_assign_eq(n1, n2, eqs, lits); } + validate_assign_eq(n1, n2, eqs, lits); m_new_propagation = true; @@ -3387,8 +3395,8 @@ bool theory_seq::solve_ne(unsigned idx) { if (m_util.is_seq(nl) || m_util.is_re(nl)) { ls.reset(); rs.reset(); - m_util.str.get_concat(nl, ls); - m_util.str.get_concat(nr, rs); + m_util.str.get_concat_units(nl, ls); + m_util.str.get_concat_units(nr, rs); new_ls.push_back(ls); new_rs.push_back(rs); } @@ -3598,19 +3606,9 @@ bool theory_seq::solve_nc(unsigned idx) { } } TRACE("seq", ctx.display_literals_verbose(tout, lits.size(), lits.c_ptr()) << "\n";); + std::function fn = [&]() { return lits; } + scoped_trace_stream(*this, fn); ctx.mk_th_axiom(get_id(), lits.size(), lits.c_ptr()); - if (m.has_trace_stream()) { - expr_ref_vector exprs(m); - for (literal l : lits) { - expr* e = ctx.bool_var2expr(l.var()); - if (l.sign()) e = m.mk_not(e); - exprs.push_back(e); - } - app_ref body(m); - body = m.mk_or(exprs.size(), exprs.c_ptr()); - log_axiom_instantiation(body); - m.trace_stream() << "[end-of-instance]\n"; - } return true; } @@ -3772,7 +3770,7 @@ bool theory_seq::explain_empty(expr_ref_vector& es, dependency*& dep) { expr* a; if (m_rep.find1(e, a, dep)) { es.pop_back(); - m_util.str.get_concat(a, es); + m_util.str.get_concat_units(a, es); continue; } TRACE("seq", tout << "Could not set to empty: " << es << "\n";); @@ -4268,12 +4266,13 @@ void theory_seq::init_model(model_generator & mg) { class theory_seq::seq_value_proc : public model_value_proc { enum source_t { unit_source, int_source, string_source }; theory_seq& th; + enode* m_node; sort* m_sort; svector m_dependencies; ptr_vector m_strings; svector m_source; public: - seq_value_proc(theory_seq& th, sort* s): th(th), m_sort(s) { + seq_value_proc(theory_seq& th, enode* n, sort* s): th(th), m_node(n), m_sort(s) { } ~seq_value_proc() override {} void add_unit(enode* n) { @@ -4364,10 +4363,10 @@ public: } } result = th.mk_concat(args, m_sort); - th.m_rewrite(result); + th.m_str_rewrite(result); } th.m_factory->add_trail(result); - TRACE("seq", tout << result << "\n";); + TRACE("seq", tout << mk_pp(m_node->get_owner(), th.m) << " -> " << result << "\n";); return to_app(result); } }; @@ -4399,7 +4398,7 @@ model_value_proc * theory_seq::mk_value(enode * n, model_generator & mg) { m_todo.push_back(e); get_ite_concat(m_concat, m_todo); sort* srt = m.get_sort(e); - seq_value_proc* sv = alloc(seq_value_proc, *this, srt); + seq_value_proc* sv = alloc(seq_value_proc, *this, n, srt); unsigned end = m_concat.size(); TRACE("seq", tout << mk_pp(e, m) << "\n";); @@ -4411,6 +4410,9 @@ model_value_proc * theory_seq::mk_value(enode * n, model_generator & mg) { if (ctx.e_internalized(c1)) { sv->add_unit(ctx.get_enode(c1)); } + else { + TRACE("seq", tout << "not internalized " << mk_pp(c, m) << "\n";); + } } else if (m_util.str.is_itos(c, c1)) { if (ctx.e_internalized(c1)) { @@ -4435,6 +4437,7 @@ model_value_proc * theory_seq::mk_value(enode * n, model_generator & mg) { app* theory_seq::mk_value(app* e) { expr_ref result(m); + expr* e0 = e; e = get_ite_value(e); result = m_rep.find(e); @@ -4739,7 +4742,7 @@ bool theory_seq::canonize(expr* e, expr_ref_vector& es, dependency*& eqs, bool& return false; } change |= e4 != e3; - m_util.str.get_concat(e4, es); + m_util.str.get_concat_units(e4, es); break; } } @@ -5888,7 +5891,7 @@ literal theory_seq::mk_eq_empty(expr* _e, bool phase) { return true_literal; } expr_ref_vector concats(m); - m_util.str.get_concat(e, concats); + m_util.str.get_concat_units(e, concats); for (auto c : concats) { if (m_util.str.is_unit(c)) { return false_literal; @@ -5936,58 +5939,6 @@ void theory_seq::add_axiom(literal l1, literal l2, literal l3, literal l4, liter } -expr_ref theory_seq::coalesce_chars(expr* const& e) { - context& ctx = get_context(); - expr* s; - unsigned ch; - expr_ref result(m); - if (m_util.str.is_concat(e)) { - expr_ref_vector rs(m), concats(m); - m_util.str.get_concat(e, concats); - for (unsigned i = 0; i < concats.size(); ++i) { - expr_ref tmp(coalesce_chars(concats.get(i)), m); - if (m_util.str.is_empty(tmp)) continue; - zstring zs, a; - bool flag = false; - while (m_util.str.is_string(tmp, a)) { - if (flag) - zs = zs + a; - else - zs = a; - flag = true; - if (i < concats.size()-1) - tmp = coalesce_chars(concats[++i].get()); - else { - ++i; - break; - } - } - if (flag) { - rs.push_back(m_util.str.mk_string(zs)); - if (i < concats.size()) - rs.push_back(tmp); - } - else - rs.push_back(tmp); - } - SASSERT(rs.size() > 0); - if (rs.size() > 1) { - return expr_ref(m_util.str.mk_concat(rs.size(), rs.c_ptr()), m); - } - else { - result = e; - return result; - } - } - else if (m_util.str.is_unit(e, s) && m_util.is_const_char(s, ch) && - BR_FAILED != m_seq_rewrite.mk_app_core(to_app(e)->get_decl(), 1, &s, result)) { - if (!ctx.e_internalized(result)) - ctx.internalize(result, false); - return result; - } - result = e; - return result; -} expr_ref theory_seq::mk_skolem(symbol const& name, expr* e1, expr* e2, expr* e3, expr*e4, sort* range) { expr* es[4] = { e1, e2, e3, e4 }; @@ -5999,9 +5950,11 @@ expr_ref theory_seq::mk_skolem(symbol const& name, expr* e1, expr* e2, expr* e3, expr_ref_vector pinned(m); if (name == m_seq_align) { for (unsigned i = 0; i < len; ++i) { - pinned.push_back(coalesce_chars(es[i])); - es[i] = pinned.back(); - TRACE("seq", tout << mk_pp(es[i], m) << "\n";); + expr_ref tmp(es[i], m); + m_str_rewrite(tmp); + pinned.push_back(tmp); + es[i] = tmp; + TRACE("seq", tout << tmp << "\n";); } } return expr_ref(m_util.mk_skolem(name, len, es, range), m); @@ -6607,8 +6560,11 @@ optional: !(e1 < e2) or !(e2 < e1) */ void theory_seq::add_lt_axiom(expr* n) { - expr* e1 = nullptr, *e2 = nullptr; - VERIFY(m_util.str.is_lt(n, e1, e2)); + expr* _e1 = nullptr, *_e2 = nullptr; + VERIFY(m_util.str.is_lt(n, _e1, _e2)); + expr_ref e1(_e1, m), e2(_e2, m); + m_rewrite(e1); + m_rewrite(e2); sort* s = m.get_sort(e1); sort* char_sort = nullptr; VERIFY(m_util.is_seq(s, char_sort)); diff --git a/src/smt/theory_seq.h b/src/smt/theory_seq.h index f8dcc9b8d..793a228dc 100644 --- a/src/smt/theory_seq.h +++ b/src/smt/theory_seq.h @@ -171,8 +171,8 @@ namespace smt { eq mk_eqdep(expr* l, expr* r, dependency* dep) { expr_ref_vector ls(m), rs(m); - m_util.str.get_concat(l, ls); - m_util.str.get_concat(r, rs); + m_util.str.get_concat_units(l, ls); + m_util.str.get_concat_units(r, rs); return eq(m_eq_id++, ls, rs, dep); } @@ -393,7 +393,8 @@ namespace smt { expr_ref_vector m_length; // length applications themselves scoped_ptr_vector m_replay; // set of actions to replay model_generator* m_mg; - th_rewriter m_rewrite; + th_rewriter m_rewrite; // rewriter that converts strings to character concats + th_rewriter m_str_rewrite; // rewriter that coonverts character concats to strings seq_rewriter m_seq_rewrite; seq_util m_util; arith_util m_autil; @@ -678,7 +679,6 @@ namespace smt { bool get_length(expr* s, rational& val); void mk_decompose(expr* e, expr_ref& head, expr_ref& tail); - expr_ref coalesce_chars(expr* const& str); expr_ref mk_skolem(symbol const& s, expr* e1, expr* e2 = nullptr, expr* e3 = nullptr, expr* e4 = nullptr, sort* range = nullptr); bool is_skolem(symbol const& s, expr* e) const;