overhaul of regular expression membership solving. Use iterative deepening and propagation, coallesce intersections

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2025-08-07 03:31:23 +00:00 · 2018-11-24 15:26:39 -08:00 · 2018-11-24 15:26:39 -08:00 · 6ddbc9cd38
commit 6ddbc9cd38
parent d61d9d4ce3
5 changed files with 200 additions and 252 deletions
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@ -663,24 +663,28 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
            return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, info);
        }

-    case OP_SEQ_UNIT:
    case OP_RE_PLUS:
    case OP_RE_STAR:
    case OP_RE_OPTION:
    case OP_RE_RANGE:
    case OP_RE_OF_PRED:
+    case OP_RE_COMPLEMENT:
+        m_has_re = true;
+        // fall-through
+    case OP_SEQ_UNIT:
    case OP_STRING_ITOS:
    case OP_STRING_STOI:
-    case OP_RE_COMPLEMENT:
        match(*m_sigs[k], arity, domain, range, rng);
        return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));

    case _OP_REGEXP_FULL_CHAR:
+        m_has_re = true;
        if (!range) range = m_re;
        match(*m_sigs[k], arity, domain, range, rng);
        return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_FULL_CHAR_SET));

    case OP_RE_FULL_CHAR_SET:
+        m_has_re = true;
        if (!range) range = m_re;
        if (range == m_re) {
            match(*m_sigs[k], arity, domain, range, rng);
@ -689,15 +693,18 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
        return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));

    case OP_RE_FULL_SEQ_SET:
+        m_has_re = true;
        if (!range) range = m_re;
        return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));        

    case _OP_REGEXP_EMPTY:
+        m_has_re = true;
        if (!range) range = m_re;
        match(*m_sigs[k], arity, domain, range, rng);
        return m.mk_func_decl(symbol("re.nostr"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_EMPTY_SET));

    case OP_RE_EMPTY_SET:
+        m_has_re = true;
        if (!range) range = m_re;
        if (range == m_re) {
            match(*m_sigs[k], arity, domain, range, rng);
@ -706,6 +713,7 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
        return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));

    case OP_RE_LOOP:
+        m_has_re = true;
        switch (arity) {
        case 1:
            match(*m_sigs[k], arity, domain, range, rng);
@ -728,6 +736,7 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
        }
        
    case _OP_RE_UNROLL:
+        m_has_re = true;
        match(*m_sigs[k], arity, domain, range, rng);
        return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));

@ -741,6 +750,7 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
    case OP_RE_UNION:
    case OP_RE_CONCAT:
    case OP_RE_INTERSECT:
+        m_has_re = true;
        return mk_assoc_fun(k, arity, domain, range, k, k);

    case OP_SEQ_CONCAT:
@ -792,13 +802,17 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
        return mk_str_fun(k, arity, domain, range, OP_SEQ_CONTAINS);

    case OP_SEQ_TO_RE:
+        m_has_re = true;
        return mk_seq_fun(k, arity, domain, range, _OP_STRING_TO_REGEXP);
    case _OP_STRING_TO_REGEXP:
+        m_has_re = true;
        return mk_str_fun(k, arity, domain, range, OP_SEQ_TO_RE);

    case OP_SEQ_IN_RE:
+        m_has_re = true;
        return mk_seq_fun(k, arity, domain, range, _OP_STRING_IN_REGEXP);
    case _OP_STRING_IN_REGEXP:
+        m_has_re = true;
        return mk_str_fun(k, arity, domain, range, OP_SEQ_IN_RE);

    case OP_SEQ_AT:
--- a/src/ast/seq_decl_plugin.h
+++ b/src/ast/seq_decl_plugin.h
@ -146,6 +146,7 @@ class seq_decl_plugin : public decl_plugin {
    sort*            m_string;
    sort*            m_char;
    sort*            m_re;
+    bool             m_has_re;

    void match(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng);

@ -197,6 +198,8 @@ public:
    app* mk_string(symbol const& s);
    app* mk_string(zstring const& s);

+    bool has_re() const { return m_has_re; }
+
 };

 class seq_util {
@ -221,6 +224,8 @@ public:
    app* mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range);
    bool is_skolem(expr const* e) const { return is_app_of(e, m_fid, _OP_SEQ_SKOLEM); }

+    bool has_re() const { return seq.has_re(); }
+
    class str {
        seq_util&    u;
        ast_manager& m;
--- a/src/cmd_context/cmd_context.cpp
+++ b/src/cmd_context/cmd_context.cpp
@ -1750,20 +1750,25 @@ struct contains_underspecified_op_proc {
    struct found {};
    family_id m_array_fid;
    datatype_util m_dt;
+    seq_util m_seq;
+    family_id m_seq_id;
    
-    contains_underspecified_op_proc(ast_manager & m):m_array_fid(m.mk_family_id("array")), m_dt(m) {}
+    contains_underspecified_op_proc(ast_manager & m):m_array_fid(m.mk_family_id("array")), m_dt(m), m_seq(m), m_seq_id(m_seq.get_family_id()) {}
    void operator()(var * n)        {}
    void operator()(app * n)        {
        if (m_dt.is_accessor(n->get_decl())) 
            throw found();
-        if (n->get_family_id() != m_array_fid)
-            return;
-        decl_kind k = n->get_decl_kind();
-        if (k == OP_AS_ARRAY ||
-            k == OP_STORE ||
-            k == OP_ARRAY_MAP ||
-            k == OP_CONST_ARRAY)
+        if (n->get_family_id() == m_array_fid) {
+            decl_kind k = n->get_decl_kind();
+            if (k == OP_AS_ARRAY ||
+                k == OP_STORE ||
+                k == OP_ARRAY_MAP ||
+                k == OP_CONST_ARRAY)
+                throw found();
+        }
+        if (n->get_family_id() == m_seq_id && m_seq.is_re(n)) {
            throw found();
+        }
    }
    void operator()(quantifier * n) {}
 };
--- a/src/smt/theory_seq.cpp
+++ b/src/smt/theory_seq.cpp
@ -217,7 +217,8 @@ theory_seq::theory_seq(ast_manager& m, theory_seq_params const & params):
    m_ls(m), m_rs(m),
    m_lhs(m), m_rhs(m),
    m_res(m),
-    m_atoms_qhead(0),
+    m_max_unfolding_depth(1),
+    m_max_unfolding_lit(null_literal),
    m_new_solution(false),
    m_new_propagation(false),
    m_mk_aut(m) {
@ -324,11 +325,6 @@ final_check_status theory_seq::final_check_eh() {
        TRACE("seq", tout << ">>extensionality\n";);
        return FC_CONTINUE;
    }
-    if (propagate_automata()) {
-        ++m_stats.m_propagate_automata;
-        TRACE("seq", tout << ">>propagate_automata\n";);
-        return FC_CONTINUE;
-    }
    if (is_solved()) {
        TRACE("seq", tout << ">>is_solved\n";);
        return FC_DONE;
@ -895,8 +891,8 @@ void theory_seq::len_offset(expr* e, rational val) {
    if (m_autil.is_add(e, l1, l2) && m_autil.is_mul(l2, l21, l22) &&
        m_autil.is_numeral(l21, fact) && fact.is_minus_one()) {
        if (ctx.e_internalized(l1) && ctx.e_internalized(l22)) {
-            enode* r1 = ctx.get_enode(l1)->get_root(), *n1 = r1;
-            enode* r2 = ctx.get_enode(l22)->get_root(), *n2 = r2;
+            enode* r1 = get_root(l1), *n1 = r1;
+            enode* r2 = get_root(l22), *n2 = r2;
            expr *e1 = nullptr, *e2 = nullptr;
            do {
                if (m_util.str.is_length(n1->get_owner(), e1))
@ -1093,13 +1089,13 @@ bool theory_seq::find_better_rep(expr_ref_vector const& ls, expr_ref_vector cons
    if (!ctx.e_internalized(len_r_fst))
        return false;
    else
-        root2 = ctx.get_enode(len_r_fst)->get_root();
+        root2 = get_root(len_r_fst);

    // Offset = 0, No change
    if (l_fst) {
        expr_ref len_l_fst = mk_len(l_fst);
        if (ctx.e_internalized(len_l_fst)) {
-            enode * root1 = ctx.get_enode(len_l_fst)->get_root();
+            enode * root1 = get_root(len_l_fst);
            if (root1 == root2) {
                TRACE("seq", tout << "(" << mk_pp(l_fst, m) << ", " << mk_pp(r_fst,m) << ")\n";);
                return false;
@ -1125,7 +1121,7 @@ bool theory_seq::find_better_rep(expr_ref_vector const& ls, expr_ref_vector cons
                    if (nl_fst && nl_fst != r_fst) {
                        expr_ref len_nl_fst = mk_len(nl_fst);
                        if (ctx.e_internalized(len_nl_fst)) {
-                            enode * root1 = ctx.get_enode(len_nl_fst)->get_root();
+                            enode * root1 = get_root(len_nl_fst);
                            if (root1 == root2) {
                                res.reset();
                                res.append(e.rs().size(), e.rs().c_ptr());
@ -1142,7 +1138,7 @@ bool theory_seq::find_better_rep(expr_ref_vector const& ls, expr_ref_vector cons
    if (l_fst) {
        expr_ref len_l_fst = mk_len(l_fst);
        if (ctx.e_internalized(len_l_fst)) {
-            enode * root1 = ctx.get_enode(len_l_fst)->get_root();
+            enode * root1 = get_root(len_l_fst);
            obj_map<enode, int> tmp;
            int offset;
            if (!m_autil.is_numeral(root1->get_owner()) && !m_autil.is_numeral(root2->get_owner())) {
@ -3363,7 +3359,6 @@ bool theory_seq::internalize_term(app* term) {
        mk_var(e);
        return true;
    }
-    TRACE("seq_verbose", tout << mk_pp(term, m) << "\n";);
    for (auto arg : *term) {
        mk_var(ensure_enode(arg));
    }
@ -3743,7 +3738,6 @@ std::ostream& theory_seq::display_deps(std::ostream& out, dependency* dep) const
 void theory_seq::collect_statistics(::statistics & st) const {
    st.update("seq num splits", m_stats.m_num_splits);
    st.update("seq num reductions", m_stats.m_num_reductions);
-    st.update("seq unfold def", m_stats.m_propagate_automata);
    st.update("seq length coherence", m_stats.m_check_length_coherence);
    st.update("seq branch", m_stats.m_branch_variable);
    st.update("seq solve !=", m_stats.m_solve_nqs);
@ -3905,8 +3899,8 @@ model_value_proc * theory_seq::mk_value(enode * n, model_generator & mg) {
    context& ctx = get_context();
    expr* e1, *e2, *e3;
    if (m.is_ite(e, e1, e2, e3) && ctx.e_internalized(e2) && ctx.e_internalized(e3) &&
-        (ctx.get_enode(e2)->get_root() == n->get_root() ||
-         ctx.get_enode(e3)->get_root() == n->get_root())) {
+        (get_root(e2) == n->get_root() ||
+         get_root(e3) == n->get_root())) {
        if (ctx.get_enode(e2)->get_root() == n->get_root()) {
            return mk_value(ctx.get_enode(e2), mg);
        }
@ -4562,20 +4556,43 @@ void theory_seq::propagate_in_re(expr* n, bool is_true) {
        e3 = m_util.re.mk_complement(re);
        lit.neg();
    }
+
+    literal_vector lits;
+    
+    enode_pair_vector eqs;
+    for (unsigned i = 0; i < m_s_in_re.size(); ++i) {
+        auto const& entry = m_s_in_re[i];
+        if (entry.m_active && get_root(entry.m_s) == get_root(s)) {
+            m_trail_stack.push(vector_value_trail<theory_seq, s_in_re, true>(m_s_in_re, i));
+            m_s_in_re[i].m_active = false;
+            e3 = m_util.re.mk_inter(entry.m_re, e3);
+            lits.push_back(entry.m_lit);
+            eqs.push_back(enode_pair(ensure_enode(entry.m_s), ensure_enode(s)));
+        }
+    }
+    if (!lits.empty()) {
+        TRACE("seq", tout << "creating intersection " << e3 << "\n";);
+        lits.push_back(lit);
+        literal inter = mk_literal(m_util.re.mk_in_re(s, e3));
+        justification* js = 
+            ctx.mk_justification(
+                ext_theory_propagation_justification(
+                    get_id(), ctx.get_region(), lits.size(), lits.c_ptr(), eqs.size(), eqs.c_ptr(), inter));
+        ctx.assign(inter, js);
+        return;
+    }    
+
    eautomaton* a = get_automaton(e3);
    if (!a) return;

+    m_s_in_re.push_back(s_in_re(lit, s, e3, a));
+    m_trail_stack.push(push_back_vector<theory_seq, vector<s_in_re>>(m_s_in_re));

    expr_ref len = mk_len(s);
-    for (unsigned i = 0; i < a->num_states(); ++i) {
-        literal acc = mk_accept(s, len, e3, i);
-        add_axiom(a->is_final_state(i)?acc:~acc);
-    }

    expr_ref zero(m_autil.mk_int(0), m);
    unsigned_vector states;
    a->get_epsilon_closure(a->init(), states);
-    literal_vector lits;
    lits.push_back(~lit);
    
    for (unsigned st : states) {
@ -4966,25 +4983,6 @@ void theory_seq::add_at_axiom(expr* e) {
    add_axiom(~i_ge_len_s, mk_eq(e, emp, false));
 }

-/**
-   step(s, idx, re, i, j, t) -> nth(s, idx) == t & len(s) > idx
-*/
-void theory_seq::propagate_step(literal lit, expr* step) {
-    SASSERT(get_context().get_assignment(lit) == l_true);
-    expr* re = nullptr, *acc = nullptr, *s = nullptr, *idx = nullptr, *i = nullptr, *j = nullptr;
-    VERIFY(is_step(step, s, idx, re, i, j, acc));
-    TRACE("seq", tout << mk_pp(step, m) << " -> " << mk_pp(acc, m) << "\n";);
-    propagate_lit(nullptr, 1, &lit, mk_simplified_literal(acc));
-    rational lo;
-    rational _idx;
-    if (lower_bound(s, lo) && lo.is_unsigned() && m_autil.is_numeral(idx, _idx) && lo >= _idx) {
-        // skip
-    }
-    else {
-        propagate_lit(nullptr, 1, &lit, ~mk_literal(m_autil.mk_le(mk_len(s), idx)));
-    }
-    ensure_nth(lit, s, idx);
-}

 /*
    lit => s = (nth s 0) ++ (nth s 1) ++ ... ++ (nth s idx) ++ (tail s idx)
@ -5264,18 +5262,12 @@ void theory_seq::assign_eh(bool_var v, bool is_true) {
    }
    else if (is_accept(e)) {
        if (is_true) {
-            propagate_acc_rej_length(lit, e);
-            if (add_accept2step(e, change)) {
-                add_atom(e);
-            }
+            propagate_accept(lit, e);
        }
    }
    else if (is_step(e)) {
        if (is_true) {
            propagate_step(lit, e);
-            if (add_step2accept(e, change)) {
-                add_atom(e);
-            }
        }
    }
    else if (is_eq(e, e1, e2)) {
@ -5290,6 +5282,10 @@ void theory_seq::assign_eh(bool_var v, bool is_true) {
        // propagate equalities
    }
    else if (is_skolem(symbol("seq.is_digit"), e)) {
+        // no-op
+    }
+    else if (is_max_unfolding(e)) {
+        // no-op
    }
    else {
        TRACE("seq", tout << mk_pp(e, m) << "\n";);
@ -5297,11 +5293,6 @@ void theory_seq::assign_eh(bool_var v, bool is_true) {
    }
 }

-void theory_seq::add_atom(expr* e) {
-    m_trail_stack.push(push_back_vector<theory_seq, ptr_vector<expr> >(m_atoms));
-    m_atoms.push_back(e);
-}
-
 void theory_seq::new_eq_eh(theory_var v1, theory_var v2) {
    enode* n1 = get_enode(v1);
    enode* n2 = get_enode(v2);
@ -5326,13 +5317,31 @@ void theory_seq::new_eq_eh(dependency* deps, enode* n1, enode* n2) {
        enforce_length_coherence(n1, n2);
    }
    else if (n1 != n2 && m_util.is_re(n1->get_owner())) {
-        // ignore
-        // eautomaton* a1 = get_automaton(n1->get_owner());
-        // eautomaton* a2 = get_automaton(n2->get_owner());
-        // eautomaton* b1 = mk_difference(*a1, *a2);
-        // eautomaton* b2 = mk_difference(*a2, *a1);
-        // eautomaton* c = mk_union(*b1, *b2);
-        // then some emptiness check.
+        // create an expression for the symmetric difference and imply it is empty.
+        enode_pair_vector eqs;
+        literal_vector lits;
+        if (!linearize(deps, eqs, lits)) 
+            return;
+        context& ctx = get_context();
+        eqs.push_back(enode_pair(n1, n2));
+        expr_ref r1(n1->get_owner(), m);
+        expr_ref r2(n2->get_owner(), m);
+        ctx.get_rewriter()(r1);
+        ctx.get_rewriter()(r2);
+        if (r1 == r2) {
+            return;
+        }
+#if 0
+        expr* d1 = m_util.re.mk_inter(r1, m_util.re.mk_complement(r2));
+        expr* d2 = m_util.re.mk_inter(r2, m_util.re.mk_complement(r1));
+        expr_ref diff(m_util.re.mk_union(d1, d2), m);
+        lit = mk_literal(m_util.re.mk_is_empty(diff));
+        justification* js =
+            ctx.mk_justification(
+                ext_theory_propagation_justification(
+                    get_id(), ctx.get_region(), lits.size(), lits.c_ptr(), eqs.size(), eqs.c_ptr(), lit));
+        ctx.assign(lit, js);        
+#endif
    }
 }

@ -5369,7 +5378,6 @@ void theory_seq::push_scope_eh() {
    m_eqs.push_scope();
    m_nqs.push_scope();
    m_ncs.push_scope();
-    m_atoms_lim.push_back(m_atoms.size());
 }

 void theory_seq::pop_scope_eh(unsigned num_scopes) {
@ -5382,8 +5390,6 @@ void theory_seq::pop_scope_eh(unsigned num_scopes) {
    m_eqs.pop_scope(num_scopes);
    m_nqs.pop_scope(num_scopes);
    m_ncs.pop_scope(num_scopes);
-    m_atoms.resize(m_atoms_lim[m_atoms_lim.size()-num_scopes]);
-    m_atoms_lim.shrink(m_atoms_lim.size()-num_scopes);
    m_rewrite.reset();    
    if (ctx.get_base_level() > ctx.get_scope_level() - num_scopes) {
        m_replay.reset();
@ -5430,10 +5436,7 @@ eautomaton* theory_seq::get_automaton(expr* re) {
        m_mk_aut.set_solver(alloc(seq_expr_solver, m, get_context().get_fparams()));
    }
    result = m_mk_aut(re);
-    if (result) {
-        display_expr disp(m);
-        TRACE("seq", result->display(tout, disp););
-    }
+    CTRACE("seq", result, result->display(tout, display_expr(m)););
    m_automata.push_back(result);
    m_re2aut.insert(re, result);
    m_res.push_back(re);
@ -5446,8 +5449,8 @@ literal theory_seq::mk_accept(expr* s, expr* idx, expr* re, expr* state) {
    return mk_literal(m_util.mk_skolem(m_accept, args.size(), args.c_ptr(), m.mk_bool_sort()));
 }

-bool theory_seq::is_acc_rej(symbol const& ar, expr* e, expr*& s, expr*& idx, expr*& re, unsigned& i, eautomaton*& aut) {
-    if (is_skolem(ar, e)) {
+bool theory_seq::is_accept(expr* e, expr*& s, expr*& idx, expr*& re, unsigned& i, eautomaton*& aut) {
+    if (is_accept(e)) {
        rational r;
        s  = to_app(e)->get_arg(0);
        idx = to_app(e)->get_arg(1);
@ -5483,169 +5486,109 @@ bool theory_seq::is_step(expr* e, expr*& s, expr*& idx, expr*& re, expr*& i, exp
    }
 }

-expr_ref theory_seq::mk_step(expr* s, expr* idx, expr* re, unsigned i, unsigned j, expr* acc) {
-    SASSERT(m.is_bool(acc));
+expr_ref theory_seq::mk_step(expr* s, expr* idx, expr* re, unsigned i, unsigned j, expr* t) {
    expr_ref_vector args(m);
    args.push_back(s).push_back(idx).push_back(re);
    args.push_back(m_autil.mk_int(i));
    args.push_back(m_autil.mk_int(j));
-    args.push_back(acc);
+    args.push_back(t);
    return expr_ref(m_util.mk_skolem(m_aut_step, args.size(), args.c_ptr(), m.mk_bool_sort()), m);
 }

-/*
-   acc(s, idx, re, i) -> len(s) >= idx    if i is final
-
-   acc(s, idx, re, i) -> len(s) > idx     if i is non-final
+/**
+   step(s, idx, re, i, j, t) -> nth(s, idx) == t & len(s) > idx
+   step(s, idx, re, i, j, t) -> accept(s, idx + 1, re, j)
 */
-void theory_seq::propagate_acc_rej_length(literal lit, expr* e) {
-    expr *s = nullptr, *idx = nullptr, *re = nullptr;
-    eautomaton* aut = nullptr;
-    unsigned src = 0;
-    VERIFY(is_accept(e, s, idx, re, src, aut));
-    if (m_util.str.is_length(idx)) return;
-    SASSERT(m_autil.is_numeral(idx));
+void theory_seq::propagate_step(literal lit, expr* step) {
    SASSERT(get_context().get_assignment(lit) == l_true);
-    if (aut->is_sink_state(src)) {
-        propagate_lit(nullptr, 1, &lit, false_literal);
-    }
-    else if (aut->is_final_state(src)) {
-        propagate_lit(nullptr, 1, &lit, mk_literal(m_autil.mk_ge(mk_len(s), idx)));
+    expr* re = nullptr, *s = nullptr, *t = nullptr, *idx = nullptr, *i = nullptr, *j = nullptr;
+    VERIFY(is_step(step, s, idx, re, i, j, t));
+    
+    TRACE("seq", tout << mk_pp(step, m) << " -> " << mk_pp(t, m) << "\n";);
+    propagate_lit(nullptr, 1, &lit, mk_literal(t));
+
+    rational lo;
+    rational _idx;
+    if (lower_bound(s, lo) && lo.is_unsigned() && m_autil.is_numeral(idx, _idx) && lo >= _idx) {
+        // skip
    }
    else {
        propagate_lit(nullptr, 1, &lit, ~mk_literal(m_autil.mk_le(mk_len(s), idx)));
    }
+    ensure_nth(lit, s, idx);
+
+    expr_ref idx1(m_autil.mk_int(_idx + 1), m);
+    propagate_lit(nullptr, 1, &lit, mk_accept(s, idx1, re, j));
 }

 /**
   acc(s, idx, re, i) ->  \/ step(s, idx, re, i, j, t)                if i is non-final
-   acc(s, idx, re, i) -> len(s) <= idx \/ step(s, idx, re, i, j, t)   if i is final
-*/
-bool theory_seq::add_accept2step(expr* acc, bool& change) {
-    context& ctx = get_context();
-
-    TRACE("seq", tout << mk_pp(acc, m) << "\n";);
-    SASSERT(ctx.get_assignment(acc) == l_true);
+   acc(s, idx, re, i) -> len(s) <= idx \/ step(s, idx, re, i, j, t)   if i is final   
+   acc(s, idx, re, i) -> len(s) >= idx    if i is final
+   acc(s, idx, re, i) -> len(s) > idx     if i is non-final
+   acc(s, idx, re, i) -> idx < max_unfolding
+ */
+void theory_seq::propagate_accept(literal lit, expr* acc) {
    expr *e = nullptr, *idx = nullptr, *re = nullptr;
-    expr_ref step(m);
-    unsigned src;
+    unsigned src = 0;
+    rational _idx;
    eautomaton* aut = nullptr;
    VERIFY(is_accept(acc, e, idx, re, src, aut));
-    if (!aut || m_util.str.is_length(idx)) {
-        return false;
+    VERIFY(aut);
+    if (aut->is_sink_state(src)) {
+        propagate_lit(nullptr, 1, &lit, false_literal);
+        return;
+    }
+    VERIFY(m_autil.is_numeral(idx, _idx));
+    if (_idx.get_unsigned() > m_max_unfolding_depth && m_max_unfolding_lit != null_literal) {
+        propagate_lit(nullptr, 1, &lit, ~m_max_unfolding_lit);
+        return;
    }
-    SASSERT(m_autil.is_numeral(idx));

    expr_ref len = mk_len(e);
+
    literal_vector lits;
-    lits.push_back(~ctx.get_literal(acc));
+    lits.push_back(~lit);
    if (aut->is_final_state(src)) {
        lits.push_back(mk_literal(m_autil.mk_le(len, idx)));
-        switch (ctx.get_assignment(lits.back())) {
-        case l_true:            
-            return false;
-        case l_undef:
-            change = true;
-            ctx.force_phase(lits.back());
-            return true;
-        default:
-            break;
-        }
+        propagate_lit(nullptr, 1, &lit, mk_literal(m_autil.mk_ge(len, idx)));
+    }
+    else {
+        propagate_lit(nullptr, 1, &lit, ~mk_literal(m_autil.mk_le(len, idx)));
    }
-
    eautomaton::moves mvs;
    aut->get_moves_from(src, mvs);
-    bool has_undef = false;
-    int start = ctx.get_random_value();
    TRACE("seq", tout << mvs.size() << "\n";);
-    for (unsigned i = 0; i < mvs.size(); ++i) {
-        unsigned j = (i + start) % mvs.size();
-        eautomaton::move mv = mvs[j];
+    for (auto const& mv : mvs) {
        expr_ref nth = mk_nth(e, idx);
-        expr_ref acc = mv.t()->accept(nth);
-        TRACE("seq", tout << j << ": " << acc << "\n";);
-        step = mk_step(e, idx, re, src, mv.dst(), acc);
-        literal slit = mk_literal(step);
-        literal tlit = mk_literal(acc);
-        add_axiom(~slit, tlit);
-        lits.push_back(slit);
-        switch (ctx.get_assignment(slit)) {
-        case l_true:
-            return false;
-        case l_undef:
-            ctx.mark_as_relevant(slit);
-            // ctx.force_phase(slit);
-            // return true;
-            has_undef = true;
-            break;            
-        default:
-            break;
-        }
+        expr_ref t = mv.t()->accept(nth);
+        get_context().get_rewriter()(t);
+        literal step = mk_literal(mk_step(e, idx, re, src, mv.dst(), t));
+        lits.push_back(step);
    }
-    change = true;
-    if (has_undef && mvs.size() == 1) {
-        TRACE("seq", tout << "has single move\n";);
-        literal lit = lits.back();
-        lits.pop_back();
-        for (unsigned i = 0; i < lits.size(); ++i) {
-            lits[i].neg();
-        }
-        propagate_lit(nullptr, lits.size(), lits.c_ptr(), lit);
-        return false;
-    }
-    if (has_undef) {
-        TRACE("seq", tout << "has undef\n";);
-        return true;
-    }
-    TRACE("seq", ctx.display_literals_verbose(tout, lits) << "\n";);
-    for (unsigned i = 0; i < lits.size(); ++i) {
-        SASSERT(ctx.get_assignment(lits[i]) == l_false);
-        lits[i].neg();
-    }
-    set_conflict(nullptr, lits);
-    return false;
+    get_context().mk_th_axiom(get_id(), lits.size(), lits.c_ptr());
 }

-
-/**
-   step(s, idx, re, i, j, t) => t
-   acc(s, idx, re, i) & step(s, idx, re, i, j, t) => acc(s, idx + 1, re, j)
-*/
-
-bool theory_seq::add_step2accept(expr* step, bool& change) {
-    context& ctx = get_context();
-    SASSERT(ctx.get_assignment(step) == l_true);
-    expr* re = nullptr, *t = nullptr, *s = nullptr, *idx = nullptr, *i = nullptr, *j = nullptr;
-    VERIFY(is_step(step, s, idx, re, i, j, t));
-    literal acc1 = mk_accept(s, idx,  re, i);
-    switch (ctx.get_assignment(acc1)) {
-    case l_false:
-        break;
-    case l_undef:
-        change = true;
-        return true;
-    case l_true: {
-        change = true;
-        rational r;
-        VERIFY(m_autil.is_numeral(idx, r) && r.is_unsigned());
-        expr_ref idx1(m_autil.mk_int(r.get_unsigned() + 1), m);
-        literal acc2 = mk_accept(s, idx1, re, j);
-        literal_vector lits;
-        lits.push_back(acc1);
-        lits.push_back(ctx.get_literal(step));
-        lits.push_back(~acc2);
-        switch (ctx.get_assignment(acc2)) {
-        case l_undef:
-            propagate_lit(nullptr, 2, lits.c_ptr(), acc2);
-            break;
-        case l_true:
-            break;
-        case l_false:
-            set_conflict(nullptr, lits);
-            break;
-        }
-        break;
+void theory_seq::add_theory_assumptions(expr_ref_vector & assumptions) {
+    TRACE("seq", tout << "add_theory_assumption " << m_util.has_re() << "\n";);
+    if (m_util.has_re()) {
+        expr_ref dlimit = mk_max_unfolding_depth();
+        m_max_unfolding_lit = mk_literal(dlimit);
+        TRACE("seq", tout << "add_theory_assumption " << dlimit << " " << assumptions << "\n";);
+        assumptions.push_back(dlimit);
    }
+}
+
+bool theory_seq::should_research(expr_ref_vector & unsat_core) {
+    if (!m_util.has_re()) {
+        return false;
+    }
+    for (auto & e : unsat_core) {
+        if (is_max_unfolding(e)) {
+            m_max_unfolding_depth = (3 * m_max_unfolding_depth) / 2 + 1;
+            IF_VERBOSE(1, verbose_stream() << "(smt.seq :increase-depth " << m_max_unfolding_depth << ")\n");
+            return true;
+        }
    }
    return false;
 }
@ -5683,8 +5626,7 @@ void theory_seq::propagate_not_prefix(expr* e) {
 /*
  !suffix(e1,e2) => e1 != ""
  !suffix(e1,e2) => len(e1) > len(e2) or e1 = ycx & e2 = zdx & c != d
- */
-
+*/

 void theory_seq::propagate_not_suffix(expr* e) {
    context& ctx = get_context();
@ -5732,34 +5674,6 @@ bool theory_seq::canonizes(bool sign, expr* e) {
 }


-bool theory_seq::propagate_automata() {
-    context& ctx = get_context();
-    if (m_atoms_qhead == m_atoms.size()) {
-        return false;
-    }
-    m_trail_stack.push(value_trail<theory_seq, unsigned>(m_atoms_qhead));
-    ptr_vector<expr> re_add;
-    bool change = false;
-    while (m_atoms_qhead < m_atoms.size() && !ctx.inconsistent()) {
-        expr* e = m_atoms[m_atoms_qhead];
-        TRACE("seq", tout << mk_pp(e, m) << "\n";);
-        bool reQ = false;
-        if (is_accept(e)) {
-            reQ = add_accept2step(e, change);
-        }
-        else if (is_step(e)) {
-            reQ = add_step2accept(e, change);
-        }
-        if (reQ) {
-            re_add.push_back(e);
-            change = true;
-        }
-        ++m_atoms_qhead;
-    }
-    m_atoms.append(re_add);
-    return change || get_context().inconsistent();
-}
-
 void theory_seq::get_concat(expr* e, ptr_vector<expr>& concats) {
    expr* e1 = nullptr, *e2 = nullptr;
    while (true) {
--- a/src/smt/theory_seq.h
+++ b/src/smt/theory_seq.h
@ -289,6 +289,16 @@ namespace smt {
            }
        };

+        struct s_in_re {
+            literal     m_lit;
+            expr*       m_s;
+            expr*       m_re;
+            eautomaton* m_aut;
+            bool        m_active;
+            s_in_re(literal l, expr*s, expr* re, eautomaton* aut):
+                m_lit(l), m_s(s), m_re(re), m_aut(aut), m_active(true) {}
+        };
+
        void erase_index(unsigned idx, unsigned i);

        struct stats {
@ -353,11 +363,10 @@ namespace smt {
        scoped_ptr_vector<eautomaton>  m_automata;
        obj_map<expr, eautomaton*>     m_re2aut;
        expr_ref_vector                m_res;
+        unsigned                       m_max_unfolding_depth;
+        literal                        m_max_unfolding_lit;
+        vector<s_in_re>                m_s_in_re;

-        // queue of asserted atoms
-        ptr_vector<expr>               m_atoms;
-        unsigned_vector                m_atoms_lim;
-        unsigned                       m_atoms_qhead;
        bool                           m_new_solution;     // new solution added
        bool                           m_new_propagation;  // new propagation to core
        re2automaton                   m_mk_aut;
@ -378,6 +387,8 @@ namespace smt {
        void pop_scope_eh(unsigned num_scopes) override;
        void restart_eh() override;
        void relevant_eh(app* n) override;
+        bool should_research(expr_ref_vector &) override;
+        void add_theory_assumptions(expr_ref_vector & assumptions) override;
        theory* mk_fresh(context* new_ctx) override { return alloc(theory_seq, new_ctx->get_manager(), m_params); }
        char const * get_name() const override { return "seq"; }
        theory_var mk_var(enode* n) override;
@ -579,7 +590,7 @@ namespace smt {
        expr_ref mk_add(expr* a, expr* b);
        expr_ref mk_len(expr* s) const { return expr_ref(m_util.str.mk_length(s), m); }
        enode* ensure_enode(expr* a);
-        
+        enode* get_root(expr* a) { return ensure_enode(a)->get_root(); }
        dependency* mk_join(dependency* deps, literal lit);
        dependency* mk_join(dependency* deps, literal_vector const& lits);

@ -604,25 +615,24 @@ namespace smt {
        literal mk_accept(expr* s, expr* idx, expr* re, expr* state);
        literal mk_accept(expr* s, expr* idx, expr* re, unsigned i) { return mk_accept(s, idx, re, m_autil.mk_int(i)); }
        bool is_accept(expr* acc) const {  return is_skolem(m_accept, acc); }
-        bool is_accept(expr* acc, expr*& s, expr*& idx, expr*& re, unsigned& i, eautomaton*& aut) {
-            return is_acc_rej(m_accept, acc, s, idx, re, i, aut);
-        }
-        bool is_acc_rej(symbol const& ar, expr* e, expr*& s, expr*& idx, expr*& re, unsigned& i, eautomaton*& aut);
-        expr_ref mk_step(expr* s, expr* tail, expr* re, unsigned i, unsigned j, expr* acc);
+        bool is_accept(expr* acc, expr*& s, expr*& idx, expr*& re, unsigned& i, eautomaton*& aut);
+        expr_ref mk_step(expr* s, expr* tail, expr* re, unsigned i, unsigned j, expr* t);
        bool is_step(expr* e, expr*& s, expr*& tail, expr*& re, expr*& i, expr*& j, expr*& t) const;
        bool is_step(expr* e) const;
-        void propagate_step(literal lit, expr* n);
-        bool add_accept2step(expr* acc, bool& change);       
-        bool add_step2accept(expr* step, bool& change);
+        bool is_max_unfolding(expr* e) const { return is_skolem(symbol("seq.max_unfolding_depth"), e); }
+        expr_ref mk_max_unfolding_depth() { 
+            return mk_skolem(symbol("seq.max_unfolding_depth"), 
+                             m_autil.mk_int(m_max_unfolding_depth), 
+                             nullptr, nullptr, nullptr, m.mk_bool_sort());
+        }
        void propagate_not_prefix(expr* e);
        void propagate_not_suffix(expr* e);
        void ensure_nth(literal lit, expr* s, expr* idx);
        bool canonizes(bool sign, expr* e);
        void propagate_non_empty(literal lit, expr* s);
        bool propagate_is_conc(expr* e, expr* conc);
-        void propagate_acc_rej_length(literal lit, expr* acc_rej);
-        bool propagate_automata();
-        void add_atom(expr* e);
+        void propagate_step(literal lit, expr* n);
+        void propagate_accept(literal lit, expr* e);
        void new_eq_eh(dependency* dep, enode* n1, enode* n2);

        // diagnostics