From 65d147106e1ddcadc7b7772e1ec45c34b5a3eccf Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 24 Dec 2015 12:01:59 -0800 Subject: [PATCH] automata Signed-off-by: Nikolaj Bjorner --- src/math/automata/automaton.h | 59 ++++++++++----- src/smt/theory_seq.cpp | 136 +++++++++++++++++++++++++++------- src/smt/theory_seq.h | 15 +++- src/util/scoped_ptr_vector.h | 1 + 4 files changed, 163 insertions(+), 48 deletions(-) diff --git a/src/math/automata/automaton.h b/src/math/automata/automaton.h index cc9fe2c02..606991f6a 100644 --- a/src/math/automata/automaton.h +++ b/src/math/automata/automaton.h @@ -78,8 +78,6 @@ private: unsigned m_init; uint_set m_final_set; unsigned_vector m_final_states; - bool m_is_epsilon_free; - bool m_is_deterministic; // local data-structures @@ -91,9 +89,7 @@ public: // The empty automaton: automaton(M& m): m(m), - m_init(0), - m_is_epsilon_free(true), - m_is_deterministic(true) + m_init(0) { m_delta.push_back(moves()); m_delta_inv.push_back(moves()); @@ -102,8 +98,6 @@ public: // create an automaton from initial state, final states, and moves automaton(M& m, unsigned init, unsigned_vector const& final, moves const& mvs): m(m) { - m_is_epsilon_free = true; - m_is_deterministic = true; m_init = init; for (unsigned i = 0; i < final.size(); ++i) { m_final_states.push_back(final[i]); @@ -118,17 +112,13 @@ public: } m_delta[mv.src()].push_back(mv); m_delta_inv[mv.dst()].push_back(mv); - m_is_deterministic &= m_delta[mv.src()].size() < 2; - m_is_epsilon_free &= !mv.is_epsilon(); } } // create an automaton that accepts a sequence. automaton(M& m, ptr_vector const& seq): m(m), - m_init(0), - m_is_epsilon_free(true), - m_is_deterministic(true) { + m_init(0) { m_delta.resize(seq.size()+1, moves()); m_delta_inv.resize(seq.size()+1, moves()); for (unsigned i = 0; i < seq.size(); ++i) { @@ -157,9 +147,7 @@ public: m_delta_inv(other.m_delta_inv), m_init(other.m_init), m_final_set(other.m_final_set), - m_final_states(other.m_final_states), - m_is_epsilon_free(other.m_is_epsilon_free), - m_is_deterministic(other.m_is_deterministic) + m_final_states(other.m_final_states) {} // create the automaton that accepts the empty string only. @@ -187,8 +175,8 @@ public: unsigned_vector final; unsigned offset1 = 1; unsigned offset2 = a.num_states() + 1; - mvs.push_back(move(m, 0, a.init() + offset1, 0)); - mvs.push_back(move(m, 0, b.init() + offset2, 0)); + mvs.push_back(move(m, 0, a.init() + offset1)); + mvs.push_back(move(m, 0, b.init() + offset2)); append_moves(offset1, a, mvs); append_moves(offset2, b, mvs); append_final(offset1, a, final); @@ -196,6 +184,23 @@ public: return alloc(automaton, m, 0, final, mvs); } + static automaton* mk_opt(automaton const& a) { + M& m = a.m; + moves mvs; + unsigned_vector final; + unsigned offset = 0; + unsigned init = a.init(); + if (!a.initial_state_is_source()) { + offset = 1; + init = 0; + mvs.push_back(move(m, 0, a.init() + offset)); + } + mvs.push_back(move(m, init, a.final_state() + offset)); + append_moves(offset, a, mvs); + append_final(offset, a, final); + return alloc(automaton, m, init, final, mvs); + } + // concatenate accepting languages static automaton* mk_concat(automaton const& a, automaton const& b) { SASSERT(&a.m == &b.m); @@ -276,6 +281,7 @@ public: // remove states that only have epsilon transitions. void compress() { + // TBD } @@ -309,8 +315,23 @@ public: moves const& get_moves_to(unsigned state) const { return m_delta_inv[state]; } bool initial_state_is_source() const { return m_delta_inv[m_init].empty(); } bool is_final_state(unsigned s) const { return m_final_set.contains(s); } - bool is_epsilon_free() const { return m_is_epsilon_free; } - bool is_deterministic() const { return m_is_deterministic; } + bool is_epsilon_free() const { + for (unsigned i = 0; i < m_delta.size(); ++i) { + moves const& mvs = m_delta[i]; + for (unsigned j = 0; j < mvs.size(); ++j) { + if (!mvs[j].t()) return false; + } + } + return true; + } + + bool is_deterministic() const { + for (unsigned i = 0; i < m_delta.size(); ++i) { + if (m_delta[i].size() >= 2) return false; + } + return true; + } + bool is_empty() const { return m_final_states.empty(); } unsigned final_state() const { return m_final_states[0]; } bool has_single_final_sink() const { return m_final_states.size() == 1 && m_delta[final_state()].empty(); } diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index b729ddebe..a379d9eed 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -26,6 +26,14 @@ Revision History: using namespace smt; +struct display_expr { + ast_manager& m; + display_expr(ast_manager& m): m(m) {} + std::ostream& display(std::ostream& out, expr* e) const { + return out << mk_pp(e, m); + } +}; + re2automaton::re2automaton(ast_manager& m): m(m), u(m) {} @@ -52,7 +60,7 @@ eautomaton* re2automaton::re2aut(expr* e) { return a.detach(); } else if (u.re.is_opt(e, e1) && (a = re2aut(e1))) { - a->add_init_to_final(); + a = eautomaton::mk_opt(*a); return a.detach(); } else if (u.re.is_range(e)) { @@ -200,7 +208,6 @@ theory_seq::theory_seq(ast_manager& m): m_rep(m, m_dm), m_factory(0), m_ineqs(m), - m_in_re(m), m_exclude(m), m_axioms(m), m_axioms_head(0), @@ -219,6 +226,7 @@ theory_seq::theory_seq(ast_manager& m): m_right_sym = "seq.right"; m_contains_left_sym = "seq.contains.left"; m_contains_right_sym = "seq.contains.right"; + m_accept_sym = "aut.accept"; } theory_seq::~theory_seq() { @@ -455,7 +463,7 @@ bool theory_seq::is_solved() { if (!check_ineq_coherence()) { return false; } - if (!m_in_re.empty()) { + if (!m_re2aut.empty()) { return false; } @@ -595,17 +603,15 @@ bool theory_seq::is_var(expr* a) { } bool theory_seq::is_left_select(expr* a, expr*& b) { - return m_util.is_skolem(a) && - to_app(a)->get_decl()->get_parameter(0).get_symbol() == m_left_sym && (b = to_app(a)->get_arg(0), true); + return is_skolem(m_left_sym, a) && (b = to_app(a)->get_arg(0), true); } bool theory_seq::is_right_select(expr* a, expr*& b) { - return m_util.is_skolem(a) && - to_app(a)->get_decl()->get_parameter(0).get_symbol() == m_right_sym && (b = to_app(a)->get_arg(0), true); + return is_skolem(m_right_sym, a) && (b = to_app(a)->get_arg(0), true); } bool theory_seq::is_head_elem(expr* e) const { - return m_util.is_skolem(e) && to_app(e)->get_decl()->get_parameter(0).get_symbol() == symbol("seq.head.elem"); + return is_skolem(symbol("seq.head.elem"), e); } void theory_seq::add_solution(expr* l, expr* r, enode_pair_dependency* deps) { @@ -841,10 +847,13 @@ void theory_seq::display(std::ostream & out) const { out << mk_pp(m_ineqs[i], m) << "\n"; } } - if (!m_in_re.empty()) { + if (!m_re2aut.empty()) { out << "Regex\n"; - for (unsigned i = 0; i < m_in_re.size(); ++i) { - out << mk_pp(m_in_re[i], m) << "\n"; + obj_map::iterator it = m_re2aut.begin(), end = m_re2aut.end(); + for (; it != end; ++it) { + out << mk_pp(it->m_key, m) << "\n"; + display_expr disp(m); + it->m_value->display(out, disp); } } if (!m_rep.empty()) { @@ -1089,6 +1098,9 @@ void theory_seq::deque_axiom(expr* n) { add_elim_string_axiom(n); // add_length_string_axiom(n); } + else if (m_util.str.is_in_re(n)) { + add_in_re_axiom(n); + } } @@ -1281,6 +1293,76 @@ void theory_seq::add_length_axiom(expr* n) { } } +// the empty sequence is accepted only in the final states. +// membership holds iff the initial state holds. +void theory_seq::add_in_re_axiom(expr* n) { + expr* e1, *e2; + context& ctx = get_context(); + VERIFY(m_util.str.is_in_re(n, e1, e2)); + eautomaton* a = get_automaton(e2); + if (!a) return; + + expr_ref emp(m_util.str.mk_empty(m.get_sort(e1)), m); + for (unsigned i = 0; i < a->num_states(); ++i) { + expr_ref acc = mk_accept(emp, e2, m_autil.mk_int(i)); + literal lit = mk_literal(acc); + add_axiom(a->is_final_state(i)?lit:~lit); + } + unsigned_vector states; + a->get_epsilon_closure(a->init(), states); + literal_vector lits; + literal lit = mk_literal(n); + ctx.mark_as_relevant(lit); + lits.push_back(~lit); + for (unsigned i = 0; i < states.size(); ++i) { + expr_ref acc = mk_accept(e1, e2, m_autil.mk_int(a->init())); + literal lit2 = mk_literal(acc); + lits.push_back(lit2); + add_axiom(~lit2, lit); + } + ctx.mk_th_axiom(get_id(), lits.size(), lits.c_ptr()); +} + +eautomaton* theory_seq::get_automaton(expr* re) { + eautomaton* result = 0; + if (m_re2aut.find(re, result)) { + return result; + } + result = re2automaton(m)(re); + if (result) { + display_expr disp(m); + TRACE("seq", result->display(tout, disp);); + } + if (result) { + m_automata.push_back(result); + m_trail_stack.push(push_back_vector >(m_automata)); + } + m_re2aut.insert(re, result); + m_trail_stack.push(insert_obj_map(m_re2aut, re)); + return result; +} + +expr_ref theory_seq::mk_accept(expr* s, expr* re, expr* state) { + return expr_ref(mk_skolem(m_accept_sym, s, re, state, m.mk_bool_sort()), m); +} + + +bool theory_seq::is_accept(expr* acc, expr*& s, expr*& re, unsigned& i, eautomaton*& aut) { + if (is_accept(acc)) { + rational r; + s = to_app(acc)->get_arg(0); + re = to_app(acc)->get_arg(1); + VERIFY(m_autil.is_numeral(to_app(acc)->get_arg(2), r)); + SASSERT(r.is_unsigned()); + i = r.get_unsigned(); + aut = m_re2aut[re]; + return true; + } + else { + return false; + } +} + expr_ref theory_seq::mk_sub(expr* a, expr* b) { expr_ref result(m_autil.mk_sub(a, b), m); m_rewrite(result); @@ -1387,6 +1469,11 @@ expr_ref theory_seq::mk_skolem(symbol const& name, expr* e1, return expr_ref(m_util.mk_skolem(name, len, es, range), m); } +bool theory_seq::is_skolem(symbol const& s, expr* e) const { + return m_util.is_skolem(e) && to_app(e)->get_decl()->get_parameter(0).get_symbol() == s; +} + + void theory_seq::propagate_eq(bool_var v, expr* e1, expr* e2) { context& ctx = get_context(); @@ -1408,18 +1495,14 @@ void theory_seq::propagate_eq(bool_var v, expr* e1, expr* e2) { ctx.assign_eq(n1, n2, eq_justification(js)); } -struct display_expr { - ast_manager& m; - display_expr(ast_manager& m): m(m) {} - std::ostream& display(std::ostream& out, expr* e) const { - return out << mk_pp(e, m); - } -}; void theory_seq::assign_eh(bool_var v, bool is_true) { context & ctx = get_context(); expr* e = ctx.bool_var2expr(v); - if (is_true) { + if (is_accept(e)) { + // TBD + } + else if (is_true) { expr* e1, *e2; expr_ref f(m); if (m_util.str.is_prefix(e, e1, e2)) { @@ -1439,15 +1522,11 @@ void theory_seq::assign_eh(bool_var v, bool is_true) { propagate_eq(v, f, e1); } else if (m_util.str.is_in_re(e, e1, e2)) { - TRACE("seq", tout << "in re: " << mk_pp(e, m) << "\n";); - m_trail_stack.push(push_back_vector(m_in_re)); - m_in_re.push_back(e); - scoped_ptr a = re2automaton(m)(e2); - if (a) { - display_expr disp(m); - TRACE("seq", a->display(tout, disp);); - } + // Predicate: accept(e1, e2, state) + // seq.in.re(e1,e2) <-> accept(e1, e2, init) + // accept("", e2, state) <-> state is final in a + // e = head.elem(e)*tail(e) -> accept(e, e2, state) <-> \/ accept(tail(e), e2, state') & label(t) = head.elem(e) for t : state->state' in a } else { UNREACHABLE(); @@ -1519,7 +1598,8 @@ void theory_seq::relevant_eh(app* n) { m_util.str.is_concat(n) || m_util.str.is_empty(n) || m_util.str.is_unit(n) || - m_util.str.is_string(n)) { + m_util.str.is_string(n) || + m_util.str.is_in_re(n)) { enque_axiom(n); } if (m_util.str.is_in_re(n) || diff --git a/src/smt/theory_seq.h b/src/smt/theory_seq.h index 23eac1c8b..ab0038933 100644 --- a/src/smt/theory_seq.h +++ b/src/smt/theory_seq.h @@ -25,6 +25,7 @@ Revision History: #include "th_rewriter.h" #include "ast_trail.h" #include "scoped_vector.h" +#include "scoped_ptr_vector.h" #include "automaton.h" namespace smt { @@ -263,7 +264,6 @@ namespace smt { seq_factory* m_factory; // value factory expr_ref_vector m_ineqs; // inequalities to check solution against - expr_ref_vector m_in_re; // regular expression membership exclusion_table m_exclude; // set of asserted disequalities. expr_ref_vector m_axioms; // list of axioms to add. unsigned m_axioms_head; // index of first axiom to add. @@ -283,6 +283,11 @@ namespace smt { symbol m_contains_right_sym; symbol m_left_sym; // split variable left part symbol m_right_sym; // split variable right part + symbol m_accept_sym; + + // maintain automata with regular expressions. + scoped_ptr_vector m_automata; + obj_map m_re2aut; virtual final_check_status final_check_eh(); virtual bool internalize_atom(app*, bool); @@ -361,15 +366,23 @@ namespace smt { void add_length_string_axiom(expr* n); void add_elim_string_axiom(expr* n); void add_at_axiom(expr* n); + void add_in_re_axiom(expr* n); literal mk_literal(expr* n); void tightest_prefix(expr* s, expr* x, literal lit, literal lit2 = null_literal); expr_ref mk_sub(expr* a, expr* b); enode* ensure_enode(expr* a); expr_ref mk_skolem(symbol const& s, expr* e1, expr* e2 = 0, expr* e3 = 0, sort* range = 0); + bool is_skolem(symbol const& s, expr* e) const; void set_incomplete(app* term); + // automata utilities + eautomaton* get_automaton(expr* e); + expr_ref mk_accept(expr* s, expr* re, expr* state); + bool is_accept(expr* acc) const { return is_skolem(m_accept_sym, acc); } + bool is_accept(expr* acc, expr*& s, expr*& re, unsigned& i, eautomaton*& aut); + // diagnostics void display_equations(std::ostream& out) const; void display_disequations(std::ostream& out) const; diff --git a/src/util/scoped_ptr_vector.h b/src/util/scoped_ptr_vector.h index 6e1c26f7d..d1ff89733 100644 --- a/src/util/scoped_ptr_vector.h +++ b/src/util/scoped_ptr_vector.h @@ -30,6 +30,7 @@ public: ~scoped_ptr_vector() { reset(); } void reset() { std::for_each(m_vector.begin(), m_vector.end(), delete_proc()); m_vector.reset(); } void push_back(T * ptr) { m_vector.push_back(ptr); } + void pop_back() { SASSERT(!empty()); set(size()-1, 0); m_vector.pop_back(); } T * operator[](unsigned idx) const { return m_vector[idx]; } void set(unsigned idx, T * ptr) { if (m_vector[idx] == ptr)