From 31302ec851204b1244df4bb0aca0fe502ebece39 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 25 Dec 2015 15:22:26 -0800 Subject: [PATCH] automata Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_rewriter.cpp | 194 +++++++++++++++++++++++++++++- src/ast/rewriter/seq_rewriter.h | 15 +++ src/ast/seq_decl_plugin.cpp | 8 +- src/ast/seq_decl_plugin.h | 5 +- src/math/automata/automaton.h | 152 ++++++++++++++++++----- src/smt/theory_seq.cpp | 134 ++++++++------------- src/smt/theory_seq.h | 12 +- 7 files changed, 386 insertions(+), 134 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 41c20f599..428c4b224 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -25,6 +25,89 @@ Notes: #include"automaton.h" + +re2automaton::re2automaton(ast_manager& m): m(m), u(m) {} + +eautomaton* re2automaton::operator()(expr* e) { + eautomaton* r = re2aut(e); + if (r) { + r->compress(); + } + return r; +} + +eautomaton* re2automaton::re2aut(expr* e) { + SASSERT(u.is_re(e)); + expr* e1, *e2; + scoped_ptr a, b; + if (u.re.is_to_re(e, e1)) { + return seq2aut(e1); + } + else if (u.re.is_concat(e, e1, e2) && (a = re2aut(e1)) && (b = re2aut(e2))) { + return eautomaton::mk_concat(*a, *b); + } + else if (u.re.is_union(e, e1, e2) && (a = re2aut(e1)) && (b = re2aut(e2))) { + return eautomaton::mk_union(*a, *b); + } + else if (u.re.is_star(e, e1) && (a = re2aut(e1))) { + a->add_final_to_init_moves(); + a->add_init_to_final_states(); + return a.detach(); + } + else if (u.re.is_plus(e, e1) && (a = re2aut(e1))) { + a->add_final_to_init_moves(); + return a.detach(); + } + else if (u.re.is_opt(e, e1) && (a = re2aut(e1))) { + a = eautomaton::mk_opt(*a); + return a.detach(); + } + else if (u.re.is_range(e)) { + + } + else if (u.re.is_loop(e)) { + + } +#if 0 + else if (u.re.is_intersect(e, e1, e2)) { + + } + else if (u.re.is_empty(e)) { + + } +#endif + + return 0; +} + +eautomaton* re2automaton::seq2aut(expr* e) { + SASSERT(u.is_seq(e)); + zstring s; + expr* e1, *e2; + scoped_ptr a, b; + if (u.str.is_concat(e, e1, e2) && (a = seq2aut(e1)) && (b = seq2aut(e2))) { + return eautomaton::mk_concat(*a, *b); + } + else if (u.str.is_unit(e, e1)) { + return alloc(eautomaton, m, e1); + } + else if (u.str.is_empty(e)) { + return eautomaton::mk_epsilon(m); + } + else if (u.str.is_string(e, s)) { + unsigned init = 0; + eautomaton::moves mvs; + unsigned_vector final; + final.push_back(s.length()); + for (unsigned k = 0; k < s.length(); ++k) { + // reference count? + mvs.push_back(eautomaton::move(m, k, k+1, u.str.mk_char(s, k))); + } + return alloc(eautomaton, m, init, final, mvs); + } + return 0; +} + br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) { SASSERT(f->get_family_id() == get_fid()); @@ -159,11 +242,15 @@ br_status seq_rewriter::mk_seq_concat(expr* a, expr* b, expr_ref& result) { result = a; return BR_DONE; } - if (m_util.str.is_concat(a, c, d) && - m_util.str.is_string(d, s1) && isc2) { + // TBD concatenation is right-associative + if (isc2 && m_util.str.is_concat(a, c, d) && m_util.str.is_string(d, s1)) { result = m_util.str.mk_concat(c, m_util.str.mk_string(s1 + s2)); return BR_DONE; } + if (isc1 && m_util.str.is_concat(b, c, d) && m_util.str.is_string(c, s2)) { + result = m_util.str.mk_concat(m_util.str.mk_string(s1 + s2), d); + return BR_DONE; + } return BR_FAILED; } @@ -398,7 +485,7 @@ br_status seq_rewriter::mk_seq_suffix(expr* a, expr* b, expr_ref& result) { result = m().mk_eq(m_util.str.mk_empty(m().get_sort(a)), a); return BR_REWRITE3; } - // concatenation is left-associative, so a2, b2 are not concatenations + // TBD concatenation is right-associative expr* a1, *a2, *b1, *b2; if (m_util.str.is_concat(a, a1, a2) && m_util.str.is_concat(b, b1, b2) && a2 == b2) { @@ -498,7 +585,108 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) { } return BR_FAILED; } + +void seq_rewriter::add_next(u_map& next, unsigned idx, expr* cond) { + expr* acc; + if (m().is_true(cond) || !next.find(idx, acc)) { + next.insert(idx, cond); + } + else { + next.insert(idx, m().mk_or(cond, acc)); + } +} + +bool seq_rewriter::is_sequence(expr* e, expr_ref_vector& seq) { + zstring s; + ptr_vector todo; + expr *e1, *e2; + todo.push_back(e); + while (!todo.empty()) { + e = todo.back(); + todo.pop_back(); + if (m_util.str.is_string(e, s)) { + for (unsigned i = s.length(); i > 0; ) { + --i; + seq.push_back(m_util.str.mk_char(s, i)); + } + } + else if (m_util.str.is_empty(e)) { + continue; + } + else if (m_util.str.is_unit(e)) { + seq.push_back(e); + } + else if (m_util.str.is_concat(e, e1, e2)) { + todo.push_back(e1); + todo.push_back(e2); + } + else { + return false; + } + } + seq.reverse(); + return true; +} + br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { + scoped_ptr aut; + expr_ref_vector seq(m()); + if (is_sequence(a, seq) && (aut = re2automaton(m())(b))) { + expr_ref_vector trail(m()); + u_map maps[2]; + bool select_map = false; + expr_ref ch(m()), cond(m()); + eautomaton::moves mvs; + maps[0].insert(aut->init(), m().mk_true()); + // is_accepted(a, aut) & some state in frontier is final. + + for (unsigned i = 0; i < seq.size(); ++i) { + u_map& frontier = maps[select_map]; + u_map& next = maps[!select_map]; + select_map = !select_map; + ch = seq[i].get(); + next.reset(); + u_map::iterator it = frontier.begin(), end = frontier.end(); + for (; it != end; ++it) { + mvs.reset(); + unsigned state = it->m_key; + expr* acc = it->m_value; + aut->get_moves_from(state, mvs, false); + for (unsigned j = 0; j < mvs.size(); ++j) { + eautomaton::move const& mv = mvs[j]; + if (m().is_value(mv.t()) && m().is_value(ch)) { + if (mv.t() == ch) { + add_next(next, mv.dst(), acc); + } + else { + continue; + } + } + else { + cond = m().mk_eq(mv.t(), ch); + if (!m().is_true(acc)) cond = m().mk_and(acc, cond); + add_next(next, mv.dst(), cond); + } + } + } + } + u_map const& frontier = maps[select_map]; + u_map::iterator it = frontier.begin(), end = frontier.end(); + expr_ref_vector ors(m()); + for (; it != end; ++it) { + unsigned_vector states; + bool has_final = false; + aut->get_epsilon_closure(it->m_key, states); + for (unsigned i = 0; i < states.size() && !has_final; ++i) { + has_final = aut->is_final_state(states[i]); + } + if (has_final) { + ors.push_back(it->m_value); + } + } + result = mk_or(ors); + return BR_REWRITE_FULL; + } return BR_FAILED; } br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) { diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index c3e466585..f6772bfe9 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -24,8 +24,20 @@ Notes: #include"rewriter_types.h" #include"params.h" #include"lbool.h" +#include"automaton.h" +typedef automaton eautomaton; +class re2automaton { + ast_manager& m; + seq_util u; + eautomaton* re2aut(expr* e); + eautomaton* seq2aut(expr* e); + public: + re2automaton(ast_manager& m); + eautomaton* operator()(expr* e); +}; + /** \brief Cheap rewrite rules for seq constraints */ @@ -61,6 +73,9 @@ class seq_rewriter { bool min_length(unsigned n, expr* const* es, unsigned& len); expr* concat_non_empty(unsigned n, expr* const* es); + void add_next(u_map& next, unsigned idx, expr* cond); + bool is_sequence(expr* e, expr_ref_vector& seq); + public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): m_util(m), m_autil(m), m_es(m), m_lhs(m), m_rhs(m) { diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 10e9c00a5..e516d4ea5 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -223,9 +223,9 @@ bool seq_decl_plugin::match(ptr_vector& binding, sort* s, sort* sP) { } /* - \brief match left associative operator. + \brief match right associative operator. */ -void seq_decl_plugin::match_left_assoc(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) { +void seq_decl_plugin::match_right_assoc(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) { ptr_vector binding; ast_manager& m = *m_manager; TRACE("seq_verbose", @@ -441,9 +441,9 @@ func_decl* seq_decl_plugin::mk_assoc_fun(decl_kind k, unsigned arity, sort* cons if (arity == 0) { m.raise_exception("Invalid function application. At least one argument expected"); } - match_left_assoc(*m_sigs[k], arity, domain, range, rng); + match_right_assoc(*m_sigs[k], arity, domain, range, rng); func_decl_info info(m_family_id, k_seq); - info.set_left_associative(); + info.set_right_associative(); return m.mk_func_decl(m_sigs[(rng == m_string)?k_string:k_seq]->m_name, rng, rng, rng, info); } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 9a5f65d05..d0a475b25 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -137,7 +137,7 @@ class seq_decl_plugin : public decl_plugin { void match(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng); - void match_left_assoc(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng); + void match_right_assoc(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng); bool match(ptr_vector& binding, sort* s, sort* sP); @@ -221,7 +221,7 @@ public: app* mk_char(char ch); app* mk_concat(expr* a, expr* b) { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONCAT, 2, es); } app* mk_concat(expr* a, expr* b, expr* c) { - return mk_concat(mk_concat(a, b), c); + return mk_concat(a, mk_concat(b, c)); } expr* mk_concat(unsigned n, expr* const* es) { if (n == 1) return es[0]; SASSERT(n > 1); return m.mk_app(m_fid, OP_SEQ_CONCAT, n, es); } app* mk_length(expr* a) { return m.mk_app(m_fid, OP_SEQ_LENGTH, 1, &a); } @@ -278,6 +278,7 @@ public: void get_concat(expr* e, expr_ref_vector& es) const; expr* get_leftmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e1; return e; } + expr* get_rightmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e2; return e; } }; class re { diff --git a/src/math/automata/automaton.h b/src/math/automata/automaton.h index 684082170..dabfa1417 100644 --- a/src/math/automata/automaton.h +++ b/src/math/automata/automaton.h @@ -100,8 +100,7 @@ public: automaton(M& m, unsigned init, unsigned_vector const& final, moves const& mvs): m(m) { m_init = init; for (unsigned i = 0; i < final.size(); ++i) { - m_final_states.push_back(final[i]); - m_final_set.insert(final[i]); + add_to_final_states(final[i]); } for (unsigned i = 0; i < mvs.size(); ++i) { move const& mv = mvs[i]; @@ -110,8 +109,7 @@ public: m_delta.resize(n+1, moves()); m_delta_inv.resize(n+1, moves()); } - m_delta[mv.src()].push_back(mv); - m_delta_inv[mv.dst()].push_back(mv); + add(mv); } } @@ -125,8 +123,7 @@ public: m_delta[i].push_back(move(m, i, i + 1, seq[i])); m_delta[i + 1].push_back(move(m, i, i + 1, seq[i])); } - m_final_states.push_back(seq.size()); - m_final_set.insert(seq.size()); + add_to_final_states(seq.size()); } // The automaton that accepts t @@ -135,10 +132,8 @@ public: m_init(0) { m_delta.resize(2, moves()); m_delta_inv.resize(2, moves()); - m_final_set.insert(1); - m_final_states.push_back(1); - m_delta[0].push_back(move(m, 0, 1, t)); - m_delta_inv[1].push_back(move(m, 0, 1, t)); + add_to_final_states(1); + add(move(m, 0, 1, t)); } automaton(automaton const& other): @@ -257,13 +252,24 @@ public: return alloc(automaton, m, init, final, mvs); } - void add_init_to_final() { - if (!m_final_set.contains(m_init)) { - m_final_set.insert(m_init); - m_final_states.push_back(m_init); + void add_to_final_states(unsigned s) { + if (!is_final_state(s)) { + m_final_set.insert(s); + m_final_states.push_back(s); } } + void remove_from_final_states(unsigned s) { + if (is_final_state(s)) { + m_final_set.remove(s); + m_final_states.erase(s); + } + } + + void add_init_to_final_states() { + add_to_final_states(init()); + } + void add_final_to_init_moves() { for (unsigned i = 0; i < m_final_states.size(); ++i) { unsigned state = m_final_states[i]; @@ -273,16 +279,69 @@ public: found = (mvs[j].dst() == m_init) && mvs[j].is_epsilon(); } if (!found) { - m_delta[state].push_back(move(m, state, m_init)); - m_delta_inv[m_init].push_back(move(m, state, m_init)); + add(move(m, state, m_init)); } } } - // remove states that only have epsilon transitions. + // remove epsilon transitions + // src - e -> dst + // in_degree(src) = 1, final(src) => final(dst), src0 != src + // src0 - t -> src - e -> dst => src0 - t -> dst + // out_degree(dst) = 1, final(dst) => final(src), dst != dst1 + // src - e -> dst - t -> dst1 => src - t -> dst1 void compress() { - - // TBD + for (unsigned i = 0; i < m_delta.size(); ++i) { + for (unsigned j = 0; j < m_delta[i].size(); ++j) { + move const& mv = m_delta[i][j]; + unsigned src = mv.src(); + unsigned dst = mv.dst(); + SASSERT(src == i); + if (mv.is_epsilon()) { + if (src == dst) { + // just remove this edge. + } + else if (1 == in_degree(src) && init() != src && (!is_final_state(src) || is_final_state(dst))) { + move const& mv0 = m_delta_inv[src][0]; + unsigned src0 = mv0.src(); + T* t = mv0.t(); + SASSERT(mv0.dst() == src); + if (src0 == src) { + continue; + } + add(move(m, src0, dst, t)); + remove(src0, src, t); + } + else if (1 == out_degree(dst) && init() != dst && (!is_final_state(dst) || is_final_state(src))) { + move const& mv1 = m_delta[dst][0]; + unsigned dst1 = mv1.dst(); + T* t = mv1.t(); + SASSERT(mv1.src() == dst); + if (dst1 == dst) { + continue; + } + add(move(m, src, dst1, t)); + remove(dst, dst1, t); + } + else { + continue; + } + remove(src, dst, 0); + --j; + } + } + } + while (true) { + SASSERT(!m_delta.empty()); + unsigned src = m_delta.size() - 1; + if (in_degree(src) == 0 && init() != src) { + remove_from_final_states(src); + m_delta.pop_back(); + } + else { + break; + } + } } bool is_sequence(unsigned& length) const { @@ -356,11 +415,11 @@ public: void get_inv_epsilon_closure(unsigned state, unsigned_vector& states) { get_epsilon_closure(state, m_delta_inv, states); } - void get_moves_from(unsigned state, moves& mvs) const { - get_moves(state, m_delta, mvs); + void get_moves_from(unsigned state, moves& mvs, bool epsilon_closure = true) const { + get_moves(state, m_delta, mvs, epsilon_closure); } - void get_moves_to(unsigned state, moves& mvs) { - get_moves(state, m_delta_inv, mvs); + void get_moves_to(unsigned state, moves& mvs, bool epsilon_closure = true) { + get_moves(state, m_delta_inv, mvs, epsilon_closure); } template @@ -384,9 +443,41 @@ public: return out; } private: + + void add(move const& mv) { + m_delta[mv.src()].push_back(mv); + m_delta_inv[mv.dst()].push_back(mv); + } + + + unsigned find_move(unsigned src, unsigned dst, T* t, moves const& mvs) { + for (unsigned i = 0; i < mvs.size(); ++i) { + move const& mv = mvs[i]; + if (mv.src() == src && mv.dst() == dst && t == mv.t()) { + return i; + } + } + UNREACHABLE(); + return UINT_MAX; + } + + void remove(unsigned src, unsigned dst, T* t, moves& mvs) { + remove(find_move(src, dst, t, mvs), mvs); + } + + void remove(unsigned src, unsigned dst, T* t) { + remove(src, dst, t, m_delta[src]); + remove(src, dst, t, m_delta_inv[dst]); + } + + void remove(unsigned index, moves& mvs) { + mvs[index] = mvs.back(); + mvs.pop_back(); + } + mutable unsigned_vector m_states1, m_states2; - void get_moves(unsigned state, vector const& delta, moves& mvs) const { + void get_moves(unsigned state, vector const& delta, moves& mvs, bool epsilon_closure) const { m_states1.reset(); m_states2.reset(); get_epsilon_closure(state, delta, m_states1); @@ -396,10 +487,15 @@ private: for (unsigned j = 0; j < mv1.size(); ++j) { move const& mv = mv1[j]; if (!mv.is_epsilon()) { - m_states2.reset(); - get_epsilon_closure(mv.dst(), delta, m_states2); - for (unsigned k = 0; k < m_states2.size(); ++k) { - mvs.push_back(move(m, mv.src(), m_states2[k], mv.t())); + if (epsilon_closure) { + m_states2.reset(); + get_epsilon_closure(mv.dst(), delta, m_states2); + for (unsigned k = 0; k < m_states2.size(); ++k) { + mvs.push_back(move(m, state, m_states2[k], mv.t())); + } + } + else { + mvs.push_back(move(m, state, mv.dst(), mv.t())); } } } diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index 0b8e71f1c..39ed71beb 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -35,79 +35,6 @@ struct display_expr { }; -re2automaton::re2automaton(ast_manager& m): m(m), u(m) {} - -eautomaton* re2automaton::re2aut(expr* e) { - SASSERT(u.is_re(e)); - expr* e1, *e2; - scoped_ptr a, b; - if (u.re.is_to_re(e, e1)) { - return seq2aut(e1); - } - else if (u.re.is_concat(e, e1, e2) && (a = re2aut(e1)) && (b = re2aut(e2))) { - return eautomaton::mk_concat(*a, *b); - } - else if (u.re.is_union(e, e1, e2) && (a = re2aut(e1)) && (b = re2aut(e2))) { - return eautomaton::mk_union(*a, *b); - } - else if (u.re.is_star(e, e1) && (a = re2aut(e1))) { - a->add_final_to_init_moves(); - a->add_init_to_final(); - return a.detach(); - } - else if (u.re.is_plus(e, e1) && (a = re2aut(e1))) { - a->add_final_to_init_moves(); - return a.detach(); - } - else if (u.re.is_opt(e, e1) && (a = re2aut(e1))) { - a = eautomaton::mk_opt(*a); - return a.detach(); - } - else if (u.re.is_range(e)) { - - } - else if (u.re.is_loop(e)) { - - } -#if 0 - else if (u.re.is_intersect(e, e1, e2)) { - - } - else if (u.re.is_empty(e)) { - - } -#endif - - return 0; -} - -eautomaton* re2automaton::seq2aut(expr* e) { - SASSERT(u.is_seq(e)); - zstring s; - expr* e1, *e2; - scoped_ptr a, b; - if (u.str.is_concat(e, e1, e2) && (a = seq2aut(e1)) && (b = seq2aut(e2))) { - return eautomaton::mk_concat(*a, *b); - } - else if (u.str.is_unit(e, e1)) { - return alloc(eautomaton, m, e1); - } - else if (u.str.is_empty(e)) { - return eautomaton::mk_epsilon(m); - } - else if (u.str.is_string(e, s)) { - unsigned init = 0; - eautomaton::moves mvs; - unsigned_vector final; - final.push_back(s.length()); - for (unsigned k = 0; k < s.length(); ++k) { - // reference count? - mvs.push_back(eautomaton::move(m, k, k+1, u.str.mk_char(s, k))); - } - return alloc(eautomaton, m, init, final, mvs); - } - return 0; -} void theory_seq::solution_map::update(expr* e, expr* r, enode_pair_dependency* d) { m_cache.reset(); @@ -449,12 +376,35 @@ bool theory_seq::check_length_coherence_tbd() { } void theory_seq::mk_decompose(expr* e, expr_ref& emp, expr_ref& head, expr_ref& tail) { + expr* e1, *e2; sort* char_sort = 0; + zstring s; VERIFY(m_util.is_seq(m.get_sort(e), char_sort)); - tail = mk_skolem(m_tail, e); - expr_ref v(mk_skolem(m_head_elem, e, 0, 0, char_sort), m); - head = m_util.str.mk_unit(v); emp = m_util.str.mk_empty(m.get_sort(e)); + if (m_util.str.is_empty(e)) { + head = m_util.str.mk_unit(mk_skolem(m_head_elem, e, 0, 0, char_sort)); + tail = mk_skolem(m_tail, e); + } + else if (m_util.str.is_string(e, s)) { + head = m_util.str.mk_unit(m_util.str.mk_char(s, 0)); + tail = m_util.str.mk_string(s.extract(1, s.length()-1)); + } + else if (m_util.str.is_unit(e)) { + head = e; + tail = emp; + } + else if (m_util.str.is_concat(e, e1, e2) && m_util.str.is_unit(e1)) { + head = e1; + tail = e2; + } + else { + head = m_util.str.mk_unit(mk_skolem(m_head_elem, e, 0, 0, char_sort)); + tail = mk_skolem(m_tail, e); + if (!m_util.is_skolem(e)) { + expr_ref conc(m_util.str.mk_concat(head, tail), m); + add_axiom(mk_eq(e, emp, false), mk_eq(e, conc, false)); + } + } } bool theory_seq::check_ineq_coherence() { @@ -484,8 +434,8 @@ bool theory_seq::is_solved() { if (!check_ineq_coherence()) { return false; } - if (!m_re2aut.empty()) { - return false; + for (unsigned i = 0; i < m_automata.size(); ++i) { + if (!m_automata[i]) return false; } SASSERT(check_length_coherence()); @@ -500,7 +450,7 @@ void theory_seq::propagate_lit(enode_pair_dependency* dep, unsigned n, literal c vector _eqs; m_dm.linearize(dep, _eqs); TRACE("seq", ctx.display_detailed_literal(tout, lit); - tout << " <- "; display_deps(tout, dep);); + tout << " <- "; ctx.display_literals_verbose(tout, n, lits); display_deps(tout, dep);); justification* js = ctx.mk_justification( ext_theory_propagation_justification( @@ -1248,9 +1198,10 @@ void theory_seq::add_elim_string_axiom(expr* n) { zstring s; VERIFY(m_util.str.is_string(n, s)); SASSERT(s.length() > 0); - expr_ref result(m_util.str.mk_unit(m_util.str.mk_char(s, 0)), m); - for (unsigned i = 1; i < s.length(); ++i) { - result = m_util.str.mk_concat(result, m_util.str.mk_unit(m_util.str.mk_char(s, i))); + expr_ref result(m_util.str.mk_unit(m_util.str.mk_char(s, s.length()-1)), m); + for (unsigned i = s.length()-1; i > 0; ) { + --i; + result = m_util.str.mk_concat(m_util.str.mk_unit(m_util.str.mk_char(s, i)), result); } add_axiom(mk_eq(n, result, false)); m_rep.update(n, result, 0); @@ -1322,6 +1273,7 @@ void theory_seq::add_in_re_axiom(expr* n) { void theory_seq::propagate_in_re(expr* n, bool is_true) { + TRACE("seq", tout << mk_pp(n, m) << " <- " << (is_true?"true":"false") << "\n";); expr* e1, *e2; VERIFY(m_util.str.is_in_re(n, e1, e2)); eautomaton* a = get_automaton(e2); @@ -1349,6 +1301,7 @@ void theory_seq::propagate_in_re(expr* n, bool is_true) { propagate_lit(0, 1, &lit, lits[1]); } else { + TRACE("seq", ctx.display_literals_verbose(tout, lits.size(), lits.c_ptr());); ctx.mk_th_axiom(get_id(), lits.size(), lits.c_ptr()); } } @@ -1435,10 +1388,15 @@ void theory_seq::add_at_axiom(expr* e) { step(s, tail, re, i, j, t) -> s = t ++ tail */ void theory_seq::propagate_step(bool_var v, expr* step) { + context& ctx = get_context(); expr* re, *t, *s, *tail, *i, *j; VERIFY(is_step(step, s, tail, re, i, j, t)); expr_ref conc(m_util.str.mk_concat(m_util.str.mk_unit(t), tail), m); + expr_ref sr(s, m); propagate_eq(v, s, conc); + enode* n1 = ensure_enode(step); + enode* n2 = ctx.get_enode(m.mk_true()); + m_eqs.push_back(eq(sr, conc, m_dm.mk_leaf(enode_pair(n1, n2)))); } @@ -1479,12 +1437,13 @@ bool theory_seq::is_skolem(symbol const& s, expr* e) const { void theory_seq::propagate_eq(bool_var v, expr* e1, expr* e2) { context& ctx = get_context(); - SASSERT(ctx.e_internalized(e2)); enode* n1 = ensure_enode(e1); enode* n2 = ensure_enode(e2); if (n1->get_root() == n2->get_root()) { return; } + ctx.mark_as_relevant(n1); + ctx.mark_as_relevant(n2); TRACE("seq", tout << mk_pp(ctx.bool_var2enode(v)->get_owner(), m) << " => " << mk_pp(e1, m) << " = " << mk_pp(e2, m) << "\n";); @@ -1517,7 +1476,7 @@ void theory_seq::assign_eh(bool_var v, bool is_true) { else if (is_true && m_util.str.is_contains(e, e1, e2)) { expr_ref f1 = mk_skolem(m_contains_left, e1, e2); expr_ref f2 = mk_skolem(m_contains_right, e1, e2); - f = m_util.str.mk_concat(m_util.str.mk_concat(f1, e2), f2); + f = m_util.str.mk_concat(f1, m_util.str.mk_concat(e2, f2)); propagate_eq(v, f, e1); } else if (is_accept(e)) { @@ -1723,7 +1682,7 @@ void theory_seq::add_accept2step(expr* acc) { if (!aut) return; if (m_util.str.is_empty(s)) return; eautomaton::moves mvs; - aut->get_moves_to(src, mvs); + aut->get_moves_from(src, mvs); expr_ref head(m), tail(m), emp(m), step(m); mk_decompose(s, emp, head, tail); literal_vector lits; @@ -1735,6 +1694,9 @@ void theory_seq::add_accept2step(expr* acc) { lits.push_back(mk_literal(step)); } TRACE("seq", ctx.display_literals_verbose(tout, lits.size(), lits.c_ptr()); tout << "\n";); + for (unsigned i = 0; i < lits.size(); ++i) { // TBD + ctx.mark_as_relevant(lits[i]); + } ctx.mk_th_axiom(get_id(), lits.size(), lits.c_ptr()); } @@ -1767,7 +1729,7 @@ void theory_seq::add_reject2reject(expr* rej) { if (!aut) return; if (m_util.str.is_empty(s)) return; eautomaton::moves mvs; - aut->get_moves_to(src, mvs); + aut->get_moves_from(src, mvs); expr_ref head(m), tail(m), emp(m), conc(m); mk_decompose(s, emp, head, tail); literal rej1 = ctx.get_literal(rej); diff --git a/src/smt/theory_seq.h b/src/smt/theory_seq.h index f19599472..59b721fa7 100644 --- a/src/smt/theory_seq.h +++ b/src/smt/theory_seq.h @@ -27,20 +27,10 @@ Revision History: #include "scoped_vector.h" #include "scoped_ptr_vector.h" #include "automaton.h" +#include "seq_rewriter.h" namespace smt { - typedef automaton eautomaton; - class re2automaton { - ast_manager& m; - seq_util u; - eautomaton* re2aut(expr* e); - eautomaton* seq2aut(expr* e); - public: - re2automaton(ast_manager& m); - eautomaton* operator()(expr* e) { return re2aut(e); } - }; - class theory_seq : public theory { typedef scoped_dependency_manager enode_pair_dependency_manager; typedef enode_pair_dependency_manager::dependency enode_pair_dependency;