From de9c95924163fc8d0a77c09f0c49f0e51cda058f Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 14 Jan 2016 10:56:03 +0100 Subject: [PATCH] add support for re.nostr, re.all, fix bug in disequality handling of sequences, update signature of loop to handle integer arguments and variable arguments Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_rewriter.cpp | 80 ++++++++++++++++++++++++++----- src/ast/rewriter/seq_rewriter.h | 5 +- src/ast/seq_decl_plugin.cpp | 80 +++++++++++++++++++++++-------- src/ast/seq_decl_plugin.h | 5 +- src/smt/theory_seq.cpp | 19 +++++--- 5 files changed, 149 insertions(+), 40 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 442de5503..2b8689264 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -126,6 +126,16 @@ eautomaton* re2automaton::re2aut(expr* e) { } return b.detach(); } + else if (u.re.is_loop(e, e1, lo) && (a = re2aut(e1))) { + b = eautomaton::clone(*a); + b->add_final_to_init_moves(); + b->add_init_to_final_states(); + while (lo > 0) { + b = eautomaton::mk_concat(*a, *b); + --lo; + } + return b.detach(); + } else if (u.re.is_empty(e)) { return alloc(eautomaton, sm); } @@ -200,7 +210,7 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con case OP_RE_INTERSECT: return BR_FAILED; case OP_RE_LOOP: - return BR_FAILED; + return mk_re_loop(num_args, args, result); case OP_RE_EMPTY_SET: return BR_FAILED; case OP_RE_FULL_SET: @@ -296,10 +306,14 @@ br_status seq_rewriter::mk_seq_concat(expr* a, expr* b, expr_ref& result) { return BR_REWRITE2; } if (m_util.str.is_empty(a)) { - result = b; + result = a; return BR_DONE; } if (m_util.str.is_empty(b)) { + result = b; + return BR_DONE; + } + if (m_util.re.is_full(a) && m_util.re.is_full(b)) { result = a; return BR_DONE; } @@ -690,14 +704,14 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) { return BR_FAILED; } -void seq_rewriter::add_next(u_map& next, unsigned idx, expr* cond) { +void seq_rewriter::add_next(u_map& next, expr_ref_vector& trail, unsigned idx, expr* cond) { expr* acc; - if (m().is_true(cond) || !next.find(idx, acc)) { - next.insert(idx, cond); - } - else { - next.insert(idx, m().mk_or(cond, acc)); + if (!m().is_true(cond) && next.find(idx, acc)) { + cond = m().mk_or(cond, acc); } + trail.push_back(cond); + next.insert(idx, cond); + } bool seq_rewriter::is_sequence(eautomaton& aut, expr_ref_vector& seq) { @@ -807,9 +821,10 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { aut->get_moves_from(state, mvs, false); for (unsigned j = 0; j < mvs.size(); ++j) { eautomaton::move const& mv = mvs[j]; + SASSERT(mv.t()); if (mv.t()->is_char() && m().is_value(mv.t()->get_char()) && m().is_value(ch)) { if (mv.t()->get_char() == ch) { - add_next(next, mv.dst(), acc); + add_next(next, trail, mv.dst(), acc); } else { continue; @@ -818,7 +833,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { else { cond = mv.t()->accept(ch); if (!m().is_true(acc)) cond = m().mk_and(acc, cond); - add_next(next, mv.dst(), cond); + add_next(next, trail, mv.dst(), cond); } } } @@ -838,6 +853,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { } } result = mk_or(ors); + TRACE("seq", tout << result << "\n";); return BR_REWRITE_FULL; } br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) { @@ -864,6 +880,22 @@ br_status seq_rewriter::mk_re_union(expr* a, expr* b, expr_ref& result) { result = a; return BR_DONE; } + if (m_util.re.is_empty(a)) { + result = b; + return BR_DONE; + } + if (m_util.re.is_empty(b)) { + result = a; + return BR_DONE; + } + if (m_util.re.is_full(a)) { + result = a; + return BR_DONE; + } + if (m_util.re.is_full(b)) { + result = b; + return BR_DONE; + } if (m_util.re.is_star(a) && is_epsilon(b)) { result = a; return BR_DONE; @@ -874,6 +906,32 @@ br_status seq_rewriter::mk_re_union(expr* a, expr* b, expr_ref& result) { } return BR_FAILED; } + + +br_status seq_rewriter::mk_re_loop(unsigned num_args, expr* const* args, expr_ref& result) { + rational n1, n2; + switch (num_args) { + case 1: + break; + case 2: + if (m_autil.is_numeral(args[1], n1) && n1.is_unsigned()) { + result = m_util.re.mk_loop(args[0], n1.get_unsigned()); + return BR_DONE; + } + break; + case 3: + if (m_autil.is_numeral(args[1], n1) && n1.is_unsigned() && + m_autil.is_numeral(args[2], n2) && n2.is_unsigned()) { + result = m_util.re.mk_loop(args[0], n1.get_unsigned(), n2.get_unsigned()); + return BR_DONE; + } + break; + default: + break; + } + return BR_FAILED; +} + /* a** = a* (a* + b)* = (a + b)* @@ -882,7 +940,7 @@ br_status seq_rewriter::mk_re_union(expr* a, expr* b, expr_ref& result) { */ br_status seq_rewriter::mk_re_star(expr* a, expr_ref& result) { expr* b, *c, *b1, *c1; - if (m_util.re.is_star(a)) { + if (m_util.re.is_star(a) || m_util.re.is_empty(a) || m_util.re.is_full(a)) { result = a; return BR_DONE; } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 77af42dee..f65b22b12 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -36,7 +36,7 @@ public: static sym_expr* mk_char(expr_ref& t) { return alloc(sym_expr, false, t); } static sym_expr* mk_char(ast_manager& m, expr* t) { expr_ref tr(t, m); return alloc(sym_expr, false, tr); } static sym_expr* mk_pred(expr_ref& t) { return alloc(sym_expr, true, t); } - void inc_ref() { ++m_ref; } + void inc_ref() { ++m_ref; } void dec_ref() { --m_ref; if (m_ref == 0) dealloc(this); } std::ostream& display(std::ostream& out) const; bool is_char() const { return !m_is_pred; } @@ -91,6 +91,7 @@ class seq_rewriter { br_status mk_re_star(expr* a, expr_ref& result); br_status mk_re_plus(expr* a, expr_ref& result); br_status mk_re_opt(expr* a, expr_ref& result); + br_status mk_re_loop(unsigned num_args, expr* const* args, expr_ref& result); bool set_empty(unsigned sz, expr* const* es, bool all, expr_ref_vector& lhs, expr_ref_vector& rhs); bool is_subsequence(unsigned n, expr* const* l, unsigned m, expr* const* r, @@ -100,7 +101,7 @@ class seq_rewriter { bool min_length(unsigned n, expr* const* es, unsigned& len); expr* concat_non_empty(unsigned n, expr* const* es); - void add_next(u_map& next, unsigned idx, expr* cond); + void add_next(u_map& next, expr_ref_vector& trail, unsigned idx, expr* cond); bool is_sequence(expr* e, expr_ref_vector& seq); bool is_sequence(eautomaton& aut, expr_ref_vector& seq); bool is_epsilon(expr* e) const; diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 393556b58..5bbc26e6d 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -163,9 +163,6 @@ std::string zstring::encode() const { unsigned char ch = m_buffer[i]; if (0 <= ch && ch < 32) { strm << esc_table[ch]; - } - else if (ch == 127) { - strm << "^?"; } else { strm << (char)(ch); @@ -248,13 +245,15 @@ seq_decl_plugin::seq_decl_plugin(): m_init(false), m_stringc_sym("String"), m_charc_sym("Char"), m_string(0), - m_char(0) {} + m_char(0), + m_re(0) {} void seq_decl_plugin::finalize() { for (unsigned i = 0; i < m_sigs.size(); ++i) dealloc(m_sigs[i]); m_manager->dec_ref(m_string); m_manager->dec_ref(m_char); + m_manager->dec_ref(m_re); } bool seq_decl_plugin::is_sort_param(sort* s, unsigned& idx) { @@ -479,6 +478,9 @@ void seq_decl_plugin::set_manager(ast_manager* m, family_id id) { parameter param(m_char); m_string = m->mk_sort(symbol("String"), sort_info(m_family_id, SEQ_SORT, 1, ¶m)); m->inc_ref(m_string); + parameter paramS(m_string); + m_re = m->mk_sort(m_family_id, RE_SORT, 1, ¶mS); + m->inc_ref(m_re); } sort * seq_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) { @@ -567,20 +569,40 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k)); case _OP_REGEXP_FULL: + if (!range) { + range = m_re; + } match(*m_sigs[k], arity, domain, range, rng); return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_FULL_SET)); case _OP_REGEXP_EMPTY: + if (!range) { + range = m_re; + } match(*m_sigs[k], arity, domain, range, rng); return m.mk_func_decl(symbol("re.nostr"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_EMPTY_SET)); case OP_RE_LOOP: - match(*m_sigs[k], arity, domain, range, rng); - if (num_parameters != 2 || !parameters[0].is_int() || !parameters[1].is_int()) { - m.raise_exception("Expecting two numeral parameters to function re-loop"); + switch (arity) { + case 1: + match(*m_sigs[k], arity, domain, range, rng); + if (num_parameters == 0 || num_parameters > 2 || !parameters[0].is_int() || (num_parameters == 2 && !parameters[1].is_int())) { + m.raise_exception("Expecting two numeral parameters to function re-loop"); + } + return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters)); + case 2: + if (m_re != domain[0] || !arith_util(m).is_int(domain[1])) { + m.raise_exception("Incorrect type of arguments passed to re.loop. Expecting regular expression and two integer parameters"); + } + return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, domain[0], func_decl_info(m_family_id, k, num_parameters, parameters)); + case 3: + if (m_re != domain[0] || !arith_util(m).is_int(domain[1]) || !arith_util(m).is_int(domain[2])) { + m.raise_exception("Incorrect type of arguments passed to re.loop. Expecting regular expression and two integer parameters"); + } + return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, domain[0], func_decl_info(m_family_id, k, num_parameters, parameters)); + default: + m.raise_exception("Incorrect number of arguments passed to loop. Expected 1 regular expression and two integer parameters"); } - return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters)); - - + case OP_STRING_CONST: if (!(num_parameters == 1 && arity == 0 && parameters[0].is_symbol())) { @@ -768,17 +790,37 @@ void seq_util::str::get_concat(expr* e, expr_ref_vector& es) const { } } +app* seq_util::re::mk_loop(expr* r, unsigned lo) { + parameter param(lo); + return m.mk_app(m_fid, OP_RE_LOOP, 1, ¶m, 1, &r); +} + +app* seq_util::re::mk_loop(expr* r, unsigned lo, unsigned hi) { + parameter params[2] = { parameter(lo), parameter(hi) }; + return m.mk_app(m_fid, OP_RE_LOOP, 2, params, 1, &r); +} + bool seq_util::re::is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) { if (is_loop(n)) { app const* a = to_app(n); - SASSERT(a->get_num_args() == 1); - SASSERT(a->get_decl()->get_num_parameters() == 2); - body = a->get_arg(0); - lo = a->get_decl()->get_parameter(0).get_int(); - hi = a->get_decl()->get_parameter(1).get_int(); - return true; - } - else { - return false; + if (a->get_num_args() == 1 && a->get_decl()->get_num_parameters() == 2) { + body = a->get_arg(0); + lo = a->get_decl()->get_parameter(0).get_int(); + hi = a->get_decl()->get_parameter(1).get_int(); + return true; + } } + return false; +} + +bool seq_util::re::is_loop(expr const* n, expr*& body, unsigned& lo) { + if (is_loop(n)) { + app const* a = to_app(n); + if (a->get_num_args() == 1 && a->get_decl()->get_num_parameters() == 1) { + body = a->get_arg(0); + lo = a->get_decl()->get_parameter(0).get_int(); + return true; + } + } + return false; } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index ecb118581..611020a69 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -136,6 +136,7 @@ class seq_decl_plugin : public decl_plugin { symbol m_charc_sym; sort* m_string; sort* m_char; + sort* m_re; void match(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng); @@ -297,6 +298,8 @@ public: app* mk_star(expr* r) { return m.mk_app(m_fid, OP_RE_STAR, r); } app* mk_plus(expr* r) { return m.mk_app(m_fid, OP_RE_PLUS, r); } app* mk_opt(expr* r) { return m.mk_app(m_fid, OP_RE_OPTION, r); } + app* mk_loop(expr* r, unsigned lo); + app* mk_loop(expr* r, unsigned lo, unsigned hi); bool is_to_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); } bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_RE_CONCAT); } @@ -318,7 +321,7 @@ public: MATCH_UNARY(is_plus); MATCH_UNARY(is_opt); bool is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi); - + bool is_loop(expr const* n, expr*& body, unsigned& lo); }; str str; re re; diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index 185e92ea2..4ac7e8f7e 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -585,7 +585,7 @@ void theory_seq::propagate_lit(dependency* dep, unsigned n, literal const* _lits enode_pair_vector eqs; linearize(dep, eqs, lits); TRACE("seq", ctx.display_detailed_literal(tout, lit); - tout << " <- "; ctx.display_literals_verbose(tout, lits.size(), lits.c_ptr()); display_deps(tout, dep);); + tout << " <- "; ctx.display_literals_verbose(tout, lits.size(), lits.c_ptr()); if (!lits.empty()) tout << "\n"; display_deps(tout, dep);); justification* js = ctx.mk_justification( ext_theory_propagation_justification( @@ -600,7 +600,7 @@ void theory_seq::set_conflict(dependency* dep, literal_vector const& _lits) { enode_pair_vector eqs; literal_vector lits(_lits); linearize(dep, eqs, lits); - TRACE("seq", ctx.display_literals_verbose(tout, lits.size(), lits.c_ptr()); display_deps(tout, dep); ;); + TRACE("seq", ctx.display_literals_verbose(tout, lits.size(), lits.c_ptr()); if (!lits.empty()) tout << "\n"; display_deps(tout, dep); ;); m_new_propagation = true; ctx.set_conflict( ctx.mk_justification( @@ -617,7 +617,7 @@ void theory_seq::propagate_eq(dependency* dep, enode* n1, enode* n2) { enode_pair_vector eqs; linearize(dep, eqs, lits); TRACE("seq", - tout << mk_pp(n1->get_owner(), m) << " = " << mk_pp(n2->get_owner(), m) << " <- "; + tout << mk_pp(n1->get_owner(), m) << " = " << mk_pp(n2->get_owner(), m) << " <- \n"; display_deps(tout, dep); ); @@ -987,6 +987,10 @@ bool theory_seq::solve_ne(unsigned idx) { } else if (!change) { TRACE("seq", tout << "no change " << n.ls(i) << " " << n.rs(i) << "\n";); + if (updated) { + new_ls.push_back(n.ls(i)); + new_rs.push_back(n.rs(i)); + } continue; } else { @@ -1197,7 +1201,7 @@ void theory_seq::display(std::ostream & out) const { void theory_seq::display_equations(std::ostream& out) const { for (unsigned i = 0; i < m_eqs.size(); ++i) { eq const& e = m_eqs[i]; - out << e.ls() << " = " << e.rs() << " <- "; + out << e.ls() << " = " << e.rs() << " <- \n"; display_deps(out, e.dep()); } } @@ -1231,13 +1235,13 @@ void theory_seq::display_deps(std::ostream& out, dependency* dep) const { enode_pair_vector eqs; linearize(dep, eqs, lits); for (unsigned i = 0; i < eqs.size(); ++i) { - out << "\n " << mk_pp(eqs[i].first->get_owner(), m) << " = " << mk_pp(eqs[i].second->get_owner(), m); + out << " " << mk_pp(eqs[i].first->get_owner(), m) << " = " << mk_pp(eqs[i].second->get_owner(), m) << "\n"; } for (unsigned i = 0; i < lits.size(); ++i) { literal lit = lits[i]; - get_context().display_literals_verbose(out << "\n ", 1, &lit); + get_context().display_literals_verbose(out << " ", 1, &lit); + tout << "\n"; } - out << "\n"; } void theory_seq::collect_statistics(::statistics & st) const { @@ -2204,6 +2208,7 @@ void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) { expr_ref eq(m.mk_eq(e1, e2), m); m_rewrite(eq); if (!m.is_false(eq)) { + TRACE("seq", tout << "new disequality: " << eq << "\n";); literal lit = ~mk_eq(e1, e2, false); //SASSERT(get_context().get_assignment(lit) == l_true); dependency* dep = m_dm.mk_leaf(assumption(lit));