diff --git a/scripts/mk_project.py b/scripts/mk_project.py index eb7218d4b..08986d96b 100644 --- a/scripts/mk_project.py +++ b/scripts/mk_project.py @@ -16,11 +16,12 @@ def init_project_def(): add_lib('nlsat', ['polynomial', 'sat']) add_lib('hilbert', ['util'], 'math/hilbert') add_lib('simplex', ['util'], 'math/simplex') + add_lib('automata', ['util'], 'math/automata') add_lib('interval', ['util'], 'math/interval') add_lib('realclosure', ['interval'], 'math/realclosure') add_lib('subpaving', ['interval'], 'math/subpaving') add_lib('ast', ['util', 'polynomial']) - add_lib('rewriter', ['ast', 'polynomial'], 'ast/rewriter') + add_lib('rewriter', ['ast', 'polynomial', 'automata'], 'ast/rewriter') add_lib('normal_forms', ['rewriter'], 'ast/normal_forms') add_lib('model', ['rewriter']) add_lib('tactic', ['ast', 'model']) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index d7ac3ff67..41c20f599 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -22,6 +22,7 @@ Notes: #include"ast_pp.h" #include"ast_util.h" #include"uint_set.h" +#include"automaton.h" br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) { @@ -30,22 +31,38 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con switch(f->get_decl_kind()) { case OP_SEQ_UNIT: - case OP_SEQ_EMPTY: - - case OP_RE_PLUS: - case OP_RE_STAR: - case OP_RE_OPTION: - case OP_RE_RANGE: - case OP_RE_CONCAT: - case OP_RE_UNION: - case OP_RE_INTERSECT: - case OP_RE_LOOP: - case OP_RE_EMPTY_SET: - case OP_RE_FULL_SET: - case OP_RE_OF_PRED: - case _OP_SEQ_SKOLEM: return BR_FAILED; - + case OP_SEQ_EMPTY: + return BR_FAILED; + case OP_RE_PLUS: + SASSERT(num_args == 1); + return mk_re_plus(args[0], result); + case OP_RE_STAR: + SASSERT(num_args == 1); + return mk_re_star(args[0], result); + case OP_RE_OPTION: + SASSERT(num_args == 1); + return mk_re_opt(args[0], result); + case OP_RE_CONCAT: + SASSERT(num_args == 2); + return mk_re_concat(args[0], args[1], result); + case OP_RE_UNION: + SASSERT(num_args == 2); + return mk_re_union(args[0], args[1], result); + case OP_RE_RANGE: + return BR_FAILED; + case OP_RE_INTERSECT: + return BR_FAILED; + case OP_RE_LOOP: + return BR_FAILED; + case OP_RE_EMPTY_SET: + return BR_FAILED; + case OP_RE_FULL_SET: + return BR_FAILED; + case OP_RE_OF_PRED: + return BR_FAILED; + case _OP_SEQ_SKOLEM: + return BR_FAILED; case OP_SEQ_CONCAT: if (num_args == 1) { result = args[0]; @@ -83,10 +100,11 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con SASSERT(num_args == 3); return mk_seq_replace(args[0], args[1], args[2], result); case OP_SEQ_TO_RE: - return BR_FAILED; + SASSERT(num_args == 1); + return mk_str_to_regexp(args[0], result); case OP_SEQ_IN_RE: - return BR_FAILED; - + SASSERT(num_args == 2); + return mk_str_in_regexp(args[0], args[1], result); case OP_STRING_CONST: return BR_FAILED; case OP_STRING_ITOS: @@ -499,7 +517,10 @@ br_status seq_rewriter::mk_re_plus(expr* a, expr_ref& result) { return BR_FAILED; } br_status seq_rewriter::mk_re_opt(expr* a, expr_ref& result) { - return BR_FAILED; + sort* s; + VERIFY(m_util.is_re(a, s)); + result = m_util.re.mk_union(m_util.re.mk_to_re(m_util.str.mk_empty(s)), a); + return BR_REWRITE1; } br_status seq_rewriter::mk_eq_core(expr * l, expr * r, expr_ref & result) { diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 5b5ed1c55..10e9c00a5 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -629,6 +629,11 @@ app* seq_util::str::mk_char(zstring const& s, unsigned idx) { return bvu.mk_numeral(s[idx], s.num_bits()); } +app* seq_util::str::mk_char(char ch) { + zstring s(ch, zstring::ascii); + return mk_char(s, 0); +} + bool seq_util::str::is_char(expr* n, zstring& c) const { if (u.is_char(n)) { c = zstring(to_app(n)->get_decl()->get_parameter(0).get_symbol().bare_str()); diff --git a/src/math/automata/automaton.cpp b/src/math/automata/automaton.cpp new file mode 100644 index 000000000..ab908525e --- /dev/null +++ b/src/math/automata/automaton.cpp @@ -0,0 +1,23 @@ +/*++ +Copyright (c) 2015 Microsoft Corporation + +Module Name: + + automaton.cpp + +Abstract: + + Symbolic Automaton, a la Margus Veanes Automata library. + +Author: + + Nikolaj Bjorner (nbjorner) 2015-12-23. + +Revision History: + + +--*/ + +#include "automaton.h" + +template class automaton; diff --git a/src/math/automata/automaton.h b/src/math/automata/automaton.h new file mode 100644 index 000000000..e28db8fa0 --- /dev/null +++ b/src/math/automata/automaton.h @@ -0,0 +1,295 @@ +/*++ +Copyright (c) 2015 Microsoft Corporation + +Module Name: + + automaton.h + +Abstract: + + Symbolic Automaton, a la Margus Veanes Automata library. + +Author: + + Nikolaj Bjorner (nbjorner) 2015-12-23. + +Revision History: + + +--*/ + +#ifndef AUTOMATON_H_ +#define AUTOMATON_H_ + + +#include "util.h" +#include "vector.h" +#include "uint_set.h" + +template +class automaton { + class move { + T* m_t; + unsigned m_src; + unsigned m_dst; + public: + move(unsigned s, unsigned d, T* t = 0): m_t(t), m_src(s), m_dst(d) {} + + unsigned dst() const { return m_dst; } + unsigned src() const { return m_src; } + T* t() const { return m_t; } + + bool is_epsilon() const { return m_t == 0; } + }; + typedef svector moves; + + + svector m_delta; + svector m_delta_inv; + unsigned m_init; + uint_set m_final_set; + unsigned_vector m_final_states; + bool m_is_epsilon_free; + bool m_is_deterministic; + + + // local data-structures + mutable uint_set m_visited; + mutable unsigned_vector m_todo; + + +public: + + // The empty automaton: + automaton(): + m_init(0), + m_is_epsilon_free(true), + m_is_deterministic(true) + { + m_delta.push_back(moves()); + m_delta_inv.push_back(moves()); + } + + + // create an automaton from initial state, final states, and moves + automaton(unsigned init, unsigned_vector const& final, moves const& mvs) { + m_is_epsilon_free = true; + m_is_deterministic = true; + m_init = init; + for (unsigned i = 0; i < final.size(); ++i) { + m_final_states.push_back(final[i]); + m_final_set.insert(final[i]); + } + for (unsigned i = 0; i < mvs.size(); ++i) { + move const& mv = mvs[i]; + unsigned n = std::max(mv.src(), mv.dst()); + if (n >= m_delta.size()) { + m_delta.resize(n+1, moves()); + m_delta_inv.resize(n+1, moves()); + } + m_delta[mv.src()].push_back(mv); + m_delta_inv[mv.dst()].push_back(mv); + m_is_deterministic &= m_delta[mv.src()].size() < 2; + m_is_epsilon_free &= !mv.is_epsilon(); + } + } + + // The automaton with a single state that is also final. + automaton(T* t, unsigned s = 0): + m_init(s) { + m_delta.resize(s+1, moves()); + m_delta_inv.resize(s+1, moves()); + m_final_set.insert(s); + m_final_states.push_back(s); + m_delta[s].push_back(move(s, s, t)); + m_delta_inv[s].push_back(move(s, s, t)); + } + + automaton(automaton const& other): + m_delta(other.m_delta), + m_delta_inv(other.m_delta_inv), + m_init(other.m_init), + m_final_set(other.m_final_set), + m_final_states(other.m_final_states), + m_is_epsilon_free(other.m_is_epsilon_free), + m_is_deterministic(other.m_is_deterministic) + {} + + // create the automaton that accepts the empty string only. + static automaton mk_epsilon() { + moves mvs; + unsigned_vector final; + final.push_back(0); + return automaton(0, final, mvs); + } + + // create the automaton with a single state on condition t. + static automaton mk_loop(T* t) { + moves mvs; + unsigned_vector final; + final.push_back(0); + mvs.push_back(move(0, 0, t)); + return automaton(0, final, mvs); + } + + // create the sum of disjoint automata + static automaton mk_union(automaton const& a, automaton const& b) { + moves mvs; + unsigned_vector final; + unsigned offset1 = 1; + unsigned offset2 = a.num_states() + 1; + mvs.push_back(move(0, a.init() + offset1, 0)); + mvs.push_back(move(0, b.init() + offset2, 0)); + append_moves(offset1, a, mvs); + append_moves(offset2, b, mvs); + append_final(offset1, a, final); + append_final(offset2, b, final); + return automaton(0, final, mvs); + } + + static automaton mk_reverse(automaton const& a) { + if (a.is_empty()) { + return automaton(); + } + moves mvs; + for (unsigned i = 0; i < a.m_delta.size(); ++i) { + moves const& mvs1 = a.m_delta[i]; + for (unsigned j = 0; j < mvs1.size(); ++j) { + move const& mv = mvs1[j]; + mvs.push_back(move(mv.dst(), mv.src(), mv.t())); + } + } + unsigned_vector final; + unsigned init; + final.push_back(a.init()); + if (a.m_final_states.size() == 1) { + init = a.m_final_states[0]; + } + else { + init = a.num_states(); + for (unsigned i = 0; i < a.m_final_states.size(); ++i) { + mvs.push_back(move(init, a.m_final_states[i])); + } + } + return automaton(init, final, mvs); + } + + bool is_sequence(unsigned& length) const { + if (is_final_state(m_init) && (out_degree(m_init) == 0 || (out_degree(m_init) == 1 && is_loop_state(m_init)))) { + length = 0; + return true; + } + if (is_empty() || in_degree(m_init) != 0 || out_degree(m_init) != 1) { + return false; + } + + length = 1; + unsigned s = get_move_from(m_init).dst(); + while (!is_final_state(s)) { + if (out_degree(s) != 1 || in_degree(s) != 1) { + return false; + } + s = get_move_from(s).dst(); + ++length; + } + return out_degree(s) == 0 || (out_degree(s) == 1 && is_loop_state(s)); + } + + unsigned init() const { return m_init; } + unsigned in_degree(unsigned state) const { return m_delta_inv[state].size(); } + unsigned out_degree(unsigned state) const { return m_delta[state].size(); } + move const& get_move_from(unsigned state) const { SASSERT(m_delta[state].size() == 1); return m_delta[state][0]; } + move const& get_move_to(unsigned state) const { SASSERT(m_delta_inv[state].size() == 1); return m_delta_inv[state][0]; } + moves const& get_moves_from(unsigned state) const { return m_delta[state]; } + moves const& get_moves_to(unsigned state) const { return m_delta_inv[state]; } + bool initial_state_is_source() const { return m_delta_inv[m_init].empty(); } + bool is_final_state(unsigned s) const { return m_final_set.contains(s); } + bool is_epsilon_free() const { return m_is_epsilon_free; } + bool is_deterministic() const { return m_is_deterministic; } + bool is_empty() const { return m_final_states.empty(); } + unsigned final_state() const { return m_final_states[0]; } + bool has_single_final_sink() const { return m_final_states.size() == 1 && m_delta[final_state()].empty(); } + unsigned num_states() const { return m_delta.size(); } + bool is_loop_state(unsigned s) const { + moves mvs; + get_moves_from(s, mvs); + for (unsigned i = 0; i < mvs.size(); ++i) { + if (s == mvs[i].dst()) return true; + } + return false; + } + + unsigned move_count() const { + unsigned result = 0; + for (unsigned i = 0; i < m_delta.size(); result += m_delta[i].size(), ++i) {} + return result; + } + void get_epsilon_closure(unsigned state, unsigned_vector& states) { + get_epsilon_closure(state, m_delta, states); + } + void get_inv_epsilon_closure(unsigned state, unsigned_vector& states) { + get_epsilon_closure(state, m_delta_inv, states); + } + void get_moves_from(unsigned state, moves& mvs) const { + get_moves(state, m_delta, mvs); + } + void get_moves_to(unsigned state, moves& mvs) { + get_moves(state, m_delta_inv, mvs); + } +private: + void get_moves(unsigned state, svector const& delta, moves& mvs) const { + unsigned_vector states; + get_epsilon_closure(state, delta, states); + for (unsigned i = 0; i < states.size(); ++i) { + state = states[i]; + moves const& mv1 = delta[state]; + for (unsigned j = 0; j < mv1.size(); ++j) { + if (!mv1[j].is_epsilon()) { + mvs.push_back(mv1[j]); + } + } + } + } + + void get_epsilon_closure(unsigned state, svector const& delta, unsigned_vector& states) const { + m_todo.push_back(state); + m_visited.insert(state); + while (!m_todo.empty()) { + state = m_todo.back(); + states.push_back(state); + m_todo.pop_back(); + moves const& mvs = delta[state]; + for (unsigned i = 0; i < mvs.size(); ++i) { + unsigned tgt = mvs[i].dst(); + if (mvs[i].is_epsilon() && !m_visited.contains(tgt)) { + m_visited.insert(tgt); + m_todo.push_back(tgt); + } + } + } + m_visited.reset(); + SASSERT(m_todo.empty()); + } + + static void append_moves(unsigned offset, automaton const& a, moves& mvs) { + for (unsigned i = 0; i < a.num_states(); ++i) { + moves const& mvs1 = a.m_delta[i]; + for (unsigned j = 0; j < mvs1.size(); ++j) { + move const& mv = mvs1[j]; + mvs.push_back(move(mv.src() + offset, mv.dst() + offset, mv.t())); + } + } + } + + static void append_final(unsigned offset, automaton const& a, unsigned_vector& final) { + for (unsigned i = 0; i < a.m_final_states.size(); ++i) { + final.push_back(a.m_final_states[i]+offset); + } + } + +}; + +typedef automaton uautomaton; + + +#endif diff --git a/src/smt/smt_context.cpp b/src/smt/smt_context.cpp index 26ca2e68a..738763c88 100644 --- a/src/smt/smt_context.cpp +++ b/src/smt/smt_context.cpp @@ -1382,7 +1382,8 @@ namespace smt { bool_var v = l.var(); bool_var_data & d = get_bdata(v); lbool val = get_assignment(v); - TRACE("propagate_atoms", tout << "propagating atom, #" << bool_var2expr(v)->get_id() << ", is_enode(): " << d.is_enode() << " " << l << "\n";); + TRACE("propagate_atoms", tout << "propagating atom, #" << bool_var2expr(v)->get_id() << ", is_enode(): " << d.is_enode() + << " tag: " << (d.is_eq()?"eq":"") << (d.is_theory_atom()?"th":"") << (d.is_quantifier()?"q":"") << " " << l << "\n";); SASSERT(val != l_undef); if (d.is_enode()) propagate_bool_var_enode(v); @@ -1404,6 +1405,7 @@ namespace smt { else if (d.is_theory_atom()) { theory * th = m_theories.get_plugin(d.get_theory()); SASSERT(th); + TRACE("seq", tout << d.get_theory() << "\n";); th->assign_eh(v, val == l_true); } else if (d.is_quantifier()) { diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index 79779d088..1cae448b2 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -125,6 +125,7 @@ theory_seq::theory_seq(ast_manager& m): m_rep(m, m_dm), m_factory(0), m_ineqs(m), + m_in_re(m), m_exclude(m), m_axioms(m), m_axioms_head(0), @@ -379,6 +380,9 @@ bool theory_seq::is_solved() { if (!check_ineq_coherence()) { return false; } + if (!m_in_re.empty()) { + return false; + } SASSERT(check_length_coherence()); @@ -572,7 +576,6 @@ bool theory_seq::solve_nqs() { bool change = false; context & ctx = get_context(); for (unsigned i = 0; !ctx.inconsistent() && i < m_nqs.size(); ++i) { - TRACE("seq", tout << i << " " << m_nqs.size() << "\n";); if (!m_nqs[i].is_solved()) { change = solve_ne(i) || change; } @@ -637,6 +640,16 @@ bool theory_seq::solve_ne(unsigned idx) { literal lit(mk_eq(nl, nr, false)); m_trail_stack.push(push_lit(*this, idx, lit)); ctx.mark_as_relevant(lit); + switch (ctx.get_assignment(lit)) { + case l_false: + mark_solved(idx); + return false; + case l_true: + break; + case l_undef: + ++num_undef_lits; + break; + } } } m_trail_stack.push(push_dep(*this, idx, deps)); @@ -696,6 +709,8 @@ bool theory_seq::internalize_term(app* term) { if (m.is_bool(term)) { bool_var bv = ctx.mk_bool_var(term); ctx.set_var_theory(bv, get_id()); + ctx.mark_as_relevant(bv); + TRACE("seq", tout << mk_pp(term, m) << ": " << bv << "\n";); } else { enode* e = 0; @@ -751,6 +766,12 @@ void theory_seq::display(std::ostream & out) const { out << mk_pp(m_ineqs[i], m) << "\n"; } } + if (!m_in_re.empty()) { + out << "Regex\n"; + for (unsigned i = 0; i < m_in_re.size(); ++i) { + out << mk_pp(m_in_re[i], m) << "\n"; + } + } if (!m_rep.empty()) { out << "Solved equations:\n"; m_rep.display(out); @@ -917,7 +938,12 @@ expr_ref theory_seq::expand(expr* e0, enode_pair_dependency*& eqs) { enode* n = get_context().get_enode(e)->get_root(); result = n->get_owner(); if (!m.is_model_value(result)) { - result = m_mg->get_some_value(m.get_sort(result)); + if (m_util.is_char(result)) { + result = m_util.str.mk_char('#'); + } + else { + result = m_mg->get_some_value(m.get_sort(result)); + } } m_rep.update(e, result, 0); TRACE("seq", tout << mk_pp(e, m) << " |-> " << result << "\n";); @@ -1288,13 +1314,16 @@ expr_ref theory_seq::mk_skolem(symbol const& name, expr* e1, void theory_seq::propagate_eq(bool_var v, expr* e1, expr* e2) { context& ctx = get_context(); - TRACE("seq", - tout << mk_pp(ctx.bool_var2enode(v)->get_owner(), m) << " => " - << mk_pp(e1, m) << " = " << mk_pp(e2, m) << "\n";); SASSERT(ctx.e_internalized(e2)); enode* n1 = ensure_enode(e1); enode* n2 = ensure_enode(e2); + if (n1->get_root() == n2->get_root()) { + return; + } + TRACE("seq", + tout << mk_pp(ctx.bool_var2enode(v)->get_owner(), m) << " => " + << mk_pp(e1, m) << " = " << mk_pp(e2, m) << "\n";); literal lit(v); justification* js = ctx.mk_justification( @@ -1304,10 +1333,9 @@ void theory_seq::propagate_eq(bool_var v, expr* e1, expr* e2) { ctx.assign_eq(n1, n2, eq_justification(js)); } -void theory_seq::assign_eq(bool_var v, bool is_true) { +void theory_seq::assign_eh(bool_var v, bool is_true) { context & ctx = get_context(); - enode* n = ctx.bool_var2enode(v); - app* e = n->get_owner(); + expr* e = ctx.bool_var2expr(v); if (is_true) { expr* e1, *e2; expr_ref f(m); @@ -1327,8 +1355,10 @@ void theory_seq::assign_eq(bool_var v, bool is_true) { f = m_util.str.mk_concat(m_util.str.mk_concat(f1, e2), f2); propagate_eq(v, f, e1); } - else if (m_util.str.is_in_re(e, e1, e2)) { - // TBD + else if (m_util.str.is_in_re(e)) { + TRACE("seq", tout << "in re: " << mk_pp(e, m) << "\n";); + m_trail_stack.push(push_back_vector(m_in_re)); + m_in_re.push_back(e); } else { UNREACHABLE(); @@ -1403,4 +1433,22 @@ void theory_seq::relevant_eh(app* n) { m_util.str.is_string(n)) { enque_axiom(n); } + if (m_util.str.is_in_re(n) || + m_util.str.is_contains(n) || + m_util.str.is_suffix(n) || + m_util.str.is_prefix(n)) { + context& ctx = get_context(); + TRACE("seq", tout << mk_pp(n, m) << "\n";); + bool_var bv = ctx.get_bool_var(n); + switch (ctx.get_assignment(bv)) { + case l_false: + assign_eh(bv, false); + break; + case l_true: + assign_eh(bv, true); + break; + case l_undef: + break; + } + } } diff --git a/src/smt/theory_seq.h b/src/smt/theory_seq.h index 21aabb599..43852a086 100644 --- a/src/smt/theory_seq.h +++ b/src/smt/theory_seq.h @@ -251,6 +251,7 @@ namespace smt { seq_factory* m_factory; // value factory expr_ref_vector m_ineqs; // inequalities to check solution against + expr_ref_vector m_in_re; // regular expression membership exclusion_table m_exclude; // set of asserted disequalities. expr_ref_vector m_axioms; // list of axioms to add. unsigned m_axioms_head; // index of first axiom to add. @@ -277,7 +278,7 @@ namespace smt { virtual void internalize_eq_eh(app * atom, bool_var v); virtual void new_eq_eh(theory_var, theory_var); virtual void new_diseq_eh(theory_var, theory_var); - virtual void assign_eq(bool_var v, bool is_true); + virtual void assign_eh(bool_var v, bool is_true); virtual bool can_propagate(); virtual void propagate(); virtual void push_scope_eh();