Merge branch 'master' of https://github.com/z3prover/z3

2025-08-15 07:15:26 +00:00 · 2018-05-01 07:13:05 -07:00 · 2018-05-01 07:13:05 -07:00 · fd5159bf18
commit fd5159bf18
parent 371880da04 b8193a0ae6
137 changed files with 2942 additions and 859 deletions
--- a/src/smt/params/smt_params_helper.pyg
+++ b/src/smt/params/smt_params_helper.pyg
@ -79,6 +79,12 @@ def_module_params(module_name='smt',
                          ('theory_aware_branching', BOOL, False, 'Allow the context to use extra information from theory solvers regarding literal branching prioritization.'),
                          ('str.finite_overlap_models', BOOL, False, 'attempt a finite model search for overlapping variables instead of completely giving up on the arrangement'),
                          ('str.overlap_priority', DOUBLE, -0.1, 'theory-aware priority for overlapping variable cases; use smt.theory_aware_branching=true'),
+                          ('str.regex_automata', BOOL, True, 'use automata-based reasoning for regular expressions (Z3str3 only)'),
+                          ('str.regex_automata_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex automata heuristics'),
+                          ('str.regex_automata_intersection_difficulty_threshold', UINT, 1000, 'difficulty threshold for regex intersection heuristics'),
+                          ('str.regex_automata_failed_automaton_threshold', UINT, 10, 'number of failed automaton construction attempts after which a full automaton is automatically built'),
+                          ('str.regex_automata_failed_intersection_threshold', UINT, 10, 'number of failed automaton intersection attempts after which intersection is always computed'),
+                          ('str.regex_automata_length_attempt_threshold', UINT, 10, 'number of length/path constraint attempts before checking unsatisfiability of regex terms'),
                          ('core.minimize', BOOL, False, 'minimize unsat core produced by SMT context'),
                          ('core.extend_patterns', BOOL, False, 'extend unsat core with literals that trigger (potential) quantifier instances'),
                          ('core.extend_patterns.max_distance', UINT, UINT_MAX, 'limits the distance of a pattern-extended unsat core'),
--- a/src/smt/params/theory_str_params.cpp
+++ b/src/smt/params/theory_str_params.cpp
@ -31,4 +31,10 @@ void theory_str_params::updt_params(params_ref const & _p) {
    m_UseBinarySearch = p.str_use_binary_search();
    m_BinarySearchInitialUpperBound = p.str_binary_search_start();
    m_OverlapTheoryAwarePriority = p.str_overlap_priority();
+    m_RegexAutomata = p.str_regex_automata();
+    m_RegexAutomata_DifficultyThreshold = p.str_regex_automata_difficulty_threshold();
+    m_RegexAutomata_IntersectionDifficultyThreshold = p.str_regex_automata_intersection_difficulty_threshold();
+    m_RegexAutomata_FailedAutomatonThreshold = p.str_regex_automata_failed_automaton_threshold();
+    m_RegexAutomata_FailedIntersectionThreshold = p.str_regex_automata_failed_intersection_threshold();
+    m_RegexAutomata_LengthAttemptThreshold = p.str_regex_automata_length_attempt_threshold();
 }
--- a/src/smt/params/theory_str_params.h
+++ b/src/smt/params/theory_str_params.h
@ -80,6 +80,43 @@ struct theory_str_params {

    double m_OverlapTheoryAwarePriority;

+    /*
+     * If RegexAutomata is set to true,
+     * Z3str3 will use automata-based methods to reason about
+     * regular expression constraints.
+     */
+    bool m_RegexAutomata;
+
+    /*
+     * RegexAutomata_DifficultyThreshold is the lowest difficulty above which Z3str3
+     * will not eagerly construct an automaton for a regular expression term.
+     */
+    unsigned m_RegexAutomata_DifficultyThreshold;
+
+    /*
+     * RegexAutomata_IntersectionDifficultyThreshold is the lowest difficulty above which Z3str3
+     * will not eagerly intersect automata to check unsatisfiability.
+     */
+    unsigned m_RegexAutomata_IntersectionDifficultyThreshold;
+
+    /*
+     * RegexAutomata_FailedAutomatonThreshold is the number of failed attempts to build an automaton
+     * after which a full automaton (i.e. with no length information) will be built regardless of difficulty.
+     */
+    unsigned m_RegexAutomata_FailedAutomatonThreshold;
+
+    /*
+     * RegexAutomaton_FailedIntersectionThreshold is the number of failed attempts to perform automaton
+     * intersection after which intersection will always be performed regardless of difficulty.
+     */
+    unsigned m_RegexAutomata_FailedIntersectionThreshold;
+
+    /*
+     * RegexAutomaton_LengthAttemptThreshold is the number of attempts to satisfy length/path constraints
+     * before which we begin checking unsatisfiability of a regex term.
+     */
+    unsigned m_RegexAutomata_LengthAttemptThreshold;
+
    theory_str_params(params_ref const & p = params_ref()):
        m_StrongArrangements(true),
        m_AggressiveLengthTesting(false),
@ -91,7 +128,13 @@ struct theory_str_params {
        m_FiniteOverlapModels(false),
        m_UseBinarySearch(false),
        m_BinarySearchInitialUpperBound(64),
-        m_OverlapTheoryAwarePriority(-0.1)
+        m_OverlapTheoryAwarePriority(-0.1),
+        m_RegexAutomata(true),
+        m_RegexAutomata_DifficultyThreshold(1000),
+        m_RegexAutomata_IntersectionDifficultyThreshold(1000),
+        m_RegexAutomata_FailedAutomatonThreshold(10),
+        m_RegexAutomata_FailedIntersectionThreshold(10),
+        m_RegexAutomata_LengthAttemptThreshold(10)
    {
        updt_params(p);
    }
--- a/src/smt/smt_context.cpp
+++ b/src/smt/smt_context.cpp
@ -3922,7 +3922,7 @@ namespace smt {
 #if 0
            {
                static unsigned counter = 0;
-                static uint64 total = 0;
+                static uint64_t total = 0;
                static unsigned max = 0;
                counter++;
                total += num_lits;
--- a/src/smt/smt_types.h
+++ b/src/smt/smt_types.h
@ -21,6 +21,7 @@ Revision History:

 #include "util/list.h"
 #include "util/vector.h"
+#include "util/hashtable.h"
 #include "util/lbool.h"

 class model;
--- a/src/smt/theory_datatype.cpp
+++ b/src/smt/theory_datatype.cpp
@ -36,6 +36,41 @@ namespace smt {
        theory_id get_from_theory() const override { return null_theory_id; }
    };

+    theory_datatype::final_check_st::final_check_st(theory_datatype * th) : th(th) {
+        SASSERT(th->m_to_unmark.empty());
+        SASSERT(th->m_to_unmark2.empty());
+        th->m_used_eqs.reset();
+        th->m_stack.reset();
+        th->m_parent.reset();
+    }
+
+    theory_datatype::final_check_st::~final_check_st() {
+        unmark_enodes(th->m_to_unmark.size(), th->m_to_unmark.c_ptr());
+        unmark_enodes2(th->m_to_unmark2.size(), th->m_to_unmark2.c_ptr());
+        th->m_to_unmark.reset();
+        th->m_to_unmark2.reset();
+        th->m_used_eqs.reset();
+        th->m_stack.reset();
+        th->m_parent.reset();
+    }   
+    
+    void theory_datatype::oc_mark_on_stack(enode * n) {
+        n = n->get_root();
+        n->set_mark();
+        m_to_unmark.push_back(n); 
+    }
+
+    void theory_datatype::oc_mark_cycle_free(enode * n) {
+        n = n->get_root();
+        n->set_mark2();
+        m_to_unmark2.push_back(n); 
+    }
+
+    void theory_datatype::oc_push_stack(enode * n) {
+        m_stack.push_back(std::make_pair(EXIT, n));
+        m_stack.push_back(std::make_pair(ENTER, n));
+    }
+

    theory* theory_datatype::mk_fresh(context* new_ctx) { 
        return alloc(theory_datatype, new_ctx->get_manager(), m_params); 
@ -389,10 +424,11 @@ namespace smt {
    final_check_status theory_datatype::final_check_eh() {
        int num_vars = get_num_vars();
        final_check_status r = FC_DONE;
+        final_check_st _guard(this); // RAII for managing state
        for (int v = 0; v < num_vars; v++) {
            if (v == static_cast<int>(m_find.find(v))) {
                enode * node = get_enode(v);
-                if (occurs_check(node)) {
+                if (!oc_cycle_free(node) && occurs_check(node)) {
                    // conflict was detected... 
                    // return...
                    return FC_CONTINUE;
@ -410,6 +446,73 @@ namespace smt {
        return r;
    }

+    // Assuming `app` is equal to a constructor term, return the constructor enode
+    inline enode * theory_datatype::oc_get_cstor(enode * app) {
+        theory_var v = app->get_root()->get_th_var(get_id());
+        SASSERT(v != null_theory_var);
+        v = m_find.find(v);
+        var_data * d = m_var_data[v];
+        SASSERT(d->m_constructor);
+        return d->m_constructor;
+    }
+
+    // explain the cycle root -> ... -> app -> root
+    void theory_datatype::occurs_check_explain(enode * app, enode * root) {
+        TRACE("datatype", tout << "occurs_check_explain " << mk_bounded_pp(app->get_owner(), get_manager()) << " <-> " << mk_bounded_pp(root->get_owner(), get_manager()) << "\n";);
+        enode* app_parent = nullptr;
+
+        // first: explain that root=v, given that app=cstor(...,v,...)
+        for (enode * arg : enode::args(oc_get_cstor(app))) {
+            // found an argument which is equal to root
+            if (arg->get_root() == root->get_root()) {
+                if (arg != root)
+                    m_used_eqs.push_back(enode_pair(arg, root));
+                break;
+            }
+        }
+
+        // now explain app=cstor(..,v,..) where v=root, and recurse with parent of app
+        while (app->get_root() != root->get_root()) {
+            enode * app_cstor = oc_get_cstor(app);
+            if (app != app_cstor)
+                m_used_eqs.push_back(enode_pair(app, app_cstor));
+            app_parent = m_parent[app->get_root()];
+            app = app_parent;
+        }
+        
+        SASSERT(app->get_root() == root->get_root());
+        if (app != root)
+            m_used_eqs.push_back(enode_pair(app, root));
+    }
+
+    // start exploring subgraph below `app`
+    bool theory_datatype::occurs_check_enter(enode * app) {
+        oc_mark_on_stack(app);
+        theory_var v = app->get_root()->get_th_var(get_id());
+        if (v != null_theory_var) {
+            v = m_find.find(v);
+            var_data * d = m_var_data[v];
+            if (d->m_constructor) {
+                for (enode * arg : enode::args(d->m_constructor)) {
+                    if (oc_cycle_free(arg)) {
+                        continue;
+                    }
+                    if (oc_on_stack(arg)) {
+                        // arg was explored before app, and is still on the stack: cycle
+                        occurs_check_explain(app, arg);
+                        return true;
+                    }
+                    // explore `arg` (with parent `app`)
+                    if (m_util.is_datatype(get_manager().get_sort(arg->get_owner()))) {
+                        m_parent.insert(arg->get_root(), app);
+                        oc_push_stack(arg);
+                    }
+                }
+            }
+        }
+        return false;
+    }
+
    /**
       \brief Check if n can be reached starting from n and following equalities and constructors.
       For example, occur_check(a1) returns true in the following set of equalities:
@ -418,17 +521,39 @@ namespace smt {
       a3 = cons(v3, a1)
    */
    bool theory_datatype::occurs_check(enode * n) {
-        TRACE("datatype", tout << "occurs check: #" << n->get_owner_id() << "\n";);
-        m_to_unmark.reset();
-        m_used_eqs.reset();
-        m_main   = n;
-        bool res = occurs_check_core(m_main);
-        unmark_enodes(m_to_unmark.size(), m_to_unmark.c_ptr());
+        TRACE("datatype", tout << "occurs check: #" << n->get_owner_id() << " " << mk_bounded_pp(n->get_owner(), get_manager()) << "\n";);
+        m_stats.m_occurs_check++;
+
+        bool res = false;
+        oc_push_stack(n);
+
+        // DFS traversal from `n`. Look at top element and explore it.
+        while (!res && !m_stack.empty()) {
+            stack_op op = m_stack.back().first;
+            enode * app = m_stack.back().second;
+            m_stack.pop_back();
+
+            if (oc_cycle_free(app)) continue;
+
+            TRACE("datatype", tout << "occurs check loop: #" << app->get_owner_id() << " " << mk_bounded_pp(app->get_owner(), get_manager()) << (op==ENTER?" enter":" exit")<< "\n";);
+
+            switch (op) {
+            case ENTER:
+              res = occurs_check_enter(app);
+              break;
+
+            case EXIT:
+              oc_mark_cycle_free(app);
+              break;
+            }
+        }
+
        if (res) {
+            // m_used_eqs should contain conflict
            context & ctx = get_context();
            region & r    = ctx.get_region();
            ctx.set_conflict(ctx.mk_justification(ext_theory_conflict_justification(get_id(), r, 0, nullptr, m_used_eqs.size(), m_used_eqs.c_ptr())));
-            TRACE("occurs_check",
+            TRACE("datatype",
                  tout << "occurs_check: true\n";
                  for (enode_pair const& p : m_used_eqs) {
                      tout << "eq: #" << p.first->get_owner_id() << " #" << p.second->get_owner_id() << "\n";
@ -437,48 +562,6 @@ namespace smt {
        }
        return res;
    }
-
-    /**
-       \brief Auxiliary method for occurs_check.
-       TODO: improve performance.
-    */
-    bool theory_datatype::occurs_check_core(enode * app) {
-        if (app->is_marked())
-            return false;
-        
-        m_stats.m_occurs_check++;
-        app->set_mark();
-        m_to_unmark.push_back(app);
-        
-        TRACE("datatype", tout << "occurs check_core: #" << app->get_owner_id() << " #" << m_main->get_owner_id() << "\n";);
-
-        theory_var v = app->get_root()->get_th_var(get_id());
-        if (v != null_theory_var) {
-            v = m_find.find(v);
-            var_data * d = m_var_data[v];
-            if (d->m_constructor) {
-                if (app != d->m_constructor)
-                    m_used_eqs.push_back(enode_pair(app, d->m_constructor));
-                unsigned num_args = d->m_constructor->get_num_args();
-                for (unsigned i = 0; i < num_args; i++) {
-                    enode * arg = d->m_constructor->get_arg(i);
-                    if (arg->get_root() == m_main->get_root()) {
-                        if (arg != m_main)
-                            m_used_eqs.push_back(enode_pair(arg, m_main));
-                        return true;
-                    }
-                    if (m_util.is_datatype(get_manager().get_sort(arg->get_owner())) && occurs_check_core(arg))
-                        return true;
-                }
-                if (app != d->m_constructor) {
-                    SASSERT(m_used_eqs.back().first  == app);
-                    SASSERT(m_used_eqs.back().second == d->m_constructor);
-                    m_used_eqs.pop_back();
-                }
-            }
-        }
-        return false;
-    }
        
    void theory_datatype::reset_eh() {
        m_trail_stack.reset();
--- a/src/smt/theory_datatype.h
+++ b/src/smt/theory_datatype.h
@ -26,7 +26,6 @@ Revision History:
 #include "smt/proto_model/datatype_factory.h"

 namespace smt {
-    
    class theory_datatype : public theory {
        typedef trail_stack<theory_datatype> th_trail_stack;
        typedef union_find<theory_datatype>  th_union_find;
@ -73,11 +72,36 @@ namespace smt {
        void propagate_recognizer(theory_var v, enode * r);
        void sign_recognizer_conflict(enode * c, enode * r);

-        ptr_vector<enode>    m_to_unmark;
-        enode_pair_vector    m_used_eqs;
-        enode *              m_main;
+        typedef enum { ENTER, EXIT } stack_op;
+        typedef map<enode*, enode*, obj_ptr_hash<enode>, ptr_eq<enode> > parent_tbl;
+        typedef std::pair<stack_op, enode*> stack_entry;
+
+        ptr_vector<enode>     m_to_unmark;
+        ptr_vector<enode>     m_to_unmark2;
+        enode_pair_vector     m_used_eqs; // conflict, if any
+        parent_tbl            m_parent; // parent explanation for occurs_check
+        svector<stack_entry>  m_stack; // stack for DFS for occurs_check
+
+        void oc_mark_on_stack(enode * n);
+        bool oc_on_stack(enode * n) const { return n->get_root()->is_marked(); }
+
+        void oc_mark_cycle_free(enode * n);
+        bool oc_cycle_free(enode * n) const { return n->get_root()->is_marked2(); }
+
+        void oc_push_stack(enode * n);
+
+        // class for managing state of final_check
+        class final_check_st {
+            theory_datatype * th;
+        public:
+            final_check_st(theory_datatype * th);
+            ~final_check_st();
+        };
+
+        enode * oc_get_cstor(enode * n);
        bool occurs_check(enode * n);
-        bool occurs_check_core(enode * n);
+        bool occurs_check_enter(enode * n);
+        void occurs_check_explain(enode * top, enode * root);

        void mk_split(theory_var v);

--- a/src/smt/theory_dl.cpp
+++ b/src/smt/theory_dl.cpp
@ -182,7 +182,7 @@ namespace smt {
                
                if (n->get_decl() != v) {
                    expr* rep = m().mk_app(r, n);
-                    uint64 vl;
+                    uint64_t vl;
                    if (u().is_numeral_ext(n, vl)) {
                        assert_cnstr(m().mk_eq(rep, mk_bv_constant(vl, s)));
                    }
@ -237,12 +237,12 @@ namespace smt {
            return true;
        }

-        app* mk_bv_constant(uint64 val, sort* s) {
+        app* mk_bv_constant(uint64_t val, sort* s) {
            return b().mk_numeral(rational(val, rational::ui64()), 64);
        }

        app* max_value(sort* s) {
-            uint64 sz;
+            uint64_t sz;
            VERIFY(u().try_get_size(s, sz));
            SASSERT(sz > 0);
            return mk_bv_constant(sz-1, s);
--- a/src/smt/theory_pb.cpp
+++ b/src/smt/theory_pb.cpp
@ -446,7 +446,16 @@ namespace smt {
            expr* arg = atom->get_arg(i);
            literal l = compile_arg(arg);
            numeral c = m_util.get_coeff(atom, i);
-            args.push_back(std::make_pair(l, c));
+            switch (ctx.get_assignment(l)) {
+            case l_true: 
+                k -= c;
+                break;
+            case l_false:
+                break;
+            default:
+                args.push_back(std::make_pair(l, c));
+                break;
+            }
        }
        if (m_util.is_at_most_k(atom) || m_util.is_le(atom)) {
            // turn W <= k into -W >= -k
@ -458,7 +467,7 @@ namespace smt {
        else {
            SASSERT(m_util.is_at_least_k(atom) || m_util.is_ge(atom) || m_util.is_eq(atom));
        }
-        TRACE("pb", display(tout, *c););        
+        TRACE("pb", display(tout, *c, true););        
        //app_ref fml1(m), fml2(m);
        //fml1 = c->to_expr(ctx, m);
        c->unique();
--- a/src/smt/theory_seq.cpp
+++ b/src/smt/theory_seq.cpp
@ -134,8 +134,7 @@ void theory_seq::solution_map::pop_scope(unsigned num_scopes) {
    if (num_scopes == 0) return;
    m_cache.reset();
    unsigned start = m_limit[m_limit.size() - num_scopes];
-    for (unsigned i = m_updates.size(); i > start; ) {
-        --i;
+    for (unsigned i = m_updates.size(); i-- > start; ) {
        if (m_updates[i] == INS) {
            m_map.remove(m_lhs[i].get());
        }
@ -436,8 +435,8 @@ bool theory_seq::is_unit_eq(expr_ref_vector const& ls, expr_ref_vector const& rs
    if (ls.empty() || !is_var(ls[0])) {
        return false;
    }
-    for (unsigned i = 0; i < rs.size(); ++i) {
-        if (!m_util.str.is_unit(rs[i])) {
+    for (expr* r : rs) {
+        if (!m_util.str.is_unit(r)) {
            return false;
        }
    }
@ -482,8 +481,7 @@ void theory_seq::branch_unit_variable(dependency* dep, expr* X, expr_ref_vector

 bool theory_seq::branch_variable_mb() {
    bool change = false;
-    for (unsigned i = 0; i < m_eqs.size(); ++i) {
-        eq const& e = m_eqs[i];
+    for (eq const& e : m_eqs) {
        vector<rational> len1, len2;
        if (!is_complex(e)) {
            continue;
@ -1473,7 +1471,7 @@ bool theory_seq::add_solution(expr* l, expr* r, dependency* deps)  {
    if (l == r) {
        return false;
    }
-    TRACE("seq", tout << mk_pp(l, m) << " ==> " << mk_pp(r, m) << "\n";);
+    TRACE("seq", tout << mk_pp(l, m) << " ==> " << mk_pp(r, m) << "\n"; display_deps(tout, deps););
    m_new_solution = true;
    m_rep.update(l, r, deps);
    enode* n1 = ensure_enode(l);
@ -1513,7 +1511,9 @@ bool theory_seq::solve_eq(expr_ref_vector const& l, expr_ref_vector const& r, de
    change = canonize(r, rs, dep2) || change;
    deps = m_dm.mk_join(dep2, deps);
    TRACE("seq", tout << l << " = " << r << " ==> ";
-          tout << ls << " = " << rs << "\n";);
+          tout << ls << " = " << rs << "\n";
+          display_deps(tout, deps);
+          );
    if (!ctx.inconsistent() && simplify_eq(ls, rs, deps)) {
        return true;
    }
@ -2224,63 +2224,7 @@ void theory_seq::internalize_eq_eh(app * atom, bool_var v) {
 }

 bool theory_seq::internalize_atom(app* a, bool) {
-#if 1
    return internalize_term(a);
-#else
-    if (is_skolem(m_eq, a)) {
-        return internalize_term(a);
-    }
-    context & ctx   = get_context();
-    bool_var bv = ctx.mk_bool_var(a);
-    ctx.set_var_theory(bv, get_id());
-    ctx.mark_as_relevant(bv);
-
-    expr* e1, *e2;
-    if (m_util.str.is_in_re(a, e1, e2)) {        
-        return internalize_term(to_app(e1)) && internalize_re(e2);
-    }
-    if (m_util.str.is_contains(a, e1, e2) ||
-        m_util.str.is_prefix(a, e1, e2) ||
-        m_util.str.is_suffix(a, e1, e2)) {
-        return internalize_term(to_app(e1)) && internalize_term(to_app(e2));        
-    }
-    if (is_accept(a) || is_reject(a) || is_step(a) || is_skolem(symbol("seq.is_digit"), a)) {
-        return true;
-    }
-    UNREACHABLE();
-    return internalize_term(a);
-#endif
-}
-
-bool theory_seq::internalize_re(expr* e) {
-    expr* e1, *e2;
-    unsigned lc, uc;
-    if (m_util.re.is_to_re(e, e1)) {
-        return internalize_term(to_app(e1));
-    }
-    if (m_util.re.is_star(e, e1) ||
-        m_util.re.is_plus(e, e1) ||
-        m_util.re.is_opt(e, e1) ||
-        m_util.re.is_loop(e, e1, lc) ||
-        m_util.re.is_loop(e, e1, lc, uc) ||
-        m_util.re.is_complement(e, e1)) {
-        return internalize_re(e1);
-    }
-    if (m_util.re.is_union(e, e1, e2) ||
-        m_util.re.is_intersection(e, e1, e2) ||
-        m_util.re.is_concat(e, e1, e2)) {
-        return internalize_re(e1) && internalize_re(e2);
-    }
-    if (m_util.re.is_full_seq(e) ||
-        m_util.re.is_full_char(e) ||
-        m_util.re.is_empty(e)) {
-        return true;
-    }
-    if (m_util.re.is_range(e, e1, e2)) {
-        return internalize_term(to_app(e1)) && internalize_term(to_app(e2));
-    }
-    UNREACHABLE();
-    return internalize_term(to_app(e));
 }

 bool theory_seq::internalize_term(app* term) {
@ -2344,8 +2288,8 @@ void theory_seq::add_int_string(expr* e) {

 bool theory_seq::check_int_string() {
    bool change = false;
-    for (unsigned i = 0; i < m_int_string.size(); ++i) {
-        expr* e = m_int_string[i].get(), *n;
+    for (expr * e : m_int_string) {
+        expr* n = nullptr;
        if (m_util.str.is_itos(e) && add_itos_val_axiom(e)) {
            change = true;
        }
@ -2358,9 +2302,21 @@ bool theory_seq::check_int_string() {

 void theory_seq::add_stoi_axiom(expr* e) {
    TRACE("seq", tout << mk_pp(e, m) << "\n";);
-    SASSERT(m_util.str.is_stoi(e));
-    literal l = mk_simplified_literal(m_autil.mk_ge(e, arith_util(m).mk_int(-1)));
+    expr* s = nullptr;
+    VERIFY (m_util.str.is_stoi(e, s));
+
+    // stoi(s) >= -1
+    literal l = mk_simplified_literal(m_autil.mk_ge(e, m_autil.mk_int(-1)));
    add_axiom(l);    
+    
+    // stoi(s) >= 0 <=> s in (0-9)+
+    expr_ref num_re(m);
+    num_re = m_util.re.mk_range(m_util.str.mk_string(symbol("0")), m_util.str.mk_string(symbol("9")));
+    num_re = m_util.re.mk_plus(num_re);
+    app_ref in_re(m_util.re.mk_in_re(s, num_re), m);
+    literal ge0 = mk_simplified_literal(m_autil.mk_ge(e, m_autil.mk_int(0)));
+    add_axiom(~ge0, mk_literal(in_re));
+    add_axiom(ge0, ~mk_literal(in_re));
 }

 bool theory_seq::add_stoi_val_axiom(expr* e) {
@ -2404,8 +2360,9 @@ bool theory_seq::add_stoi_val_axiom(expr* e) {
            lits.push_back(~is_digit(ith_char));
            nums.push_back(digit2int(ith_char));
        }        
-        for (unsigned i = sz, c = 1; i-- > 0; c *= 10) {
-            coeff = m_autil.mk_int(c);
+        rational c(1);
+        for (unsigned i = sz; i-- > 0; c *= rational(10)) {
+            coeff = m_autil.mk_numeral(c, true);
            nums[i] = m_autil.mk_mul(coeff, nums[i].get());
        }
        num = m_autil.mk_add(nums.size(), nums.c_ptr());
@ -2674,7 +2631,12 @@ void theory_seq::init_model(expr_ref_vector const& es) {
    }
 }

+void theory_seq::finalize_model(model_generator& mg) {
+    m_rep.pop_scope(1);
+}
+
 void theory_seq::init_model(model_generator & mg) {
+    m_rep.push_scope();
    m_factory = alloc(seq_factory, get_manager(), get_family_id(), mg.get_model());
    mg.register_factory(m_factory);
    for (ne const& n : m_nqs) {
@ -3428,8 +3390,8 @@ void theory_seq::add_itos_length_axiom(expr* len) {

 void theory_seq::propagate_in_re(expr* n, bool is_true) {
    TRACE("seq", tout << mk_pp(n, m) << " <- " << (is_true?"true":"false") << "\n";);
-    expr* e1 = nullptr, *e2 = nullptr;
-    VERIFY(m_util.str.is_in_re(n, e1, e2));
+    expr* s = nullptr, *re = nullptr;
+    VERIFY(m_util.str.is_in_re(n, s, re));

    expr_ref tmp(n, m);
    m_rewrite(tmp);
@ -3450,21 +3412,21 @@ void theory_seq::propagate_in_re(expr* n, bool is_true) {
        return;
    }

-    expr_ref e3(e2, m);
+    expr_ref e3(re, m);
    context& ctx = get_context();
    literal lit = ctx.get_literal(n);
    if (!is_true) {
-        e3 = m_util.re.mk_complement(e2);
+        e3 = m_util.re.mk_complement(re);
        lit.neg();
    }
    eautomaton* a = get_automaton(e3);
    if (!a) return;


-    expr_ref len(m_util.str.mk_length(e1), m);
+    expr_ref len(m_util.str.mk_length(s), m);
    for (unsigned i = 0; i < a->num_states(); ++i) {
-        literal acc = mk_accept(e1, len, e3, i);
-        literal rej = mk_reject(e1, len, e3, i);
+        literal acc = mk_accept(s, len, e3, i);
+        literal rej = mk_reject(s, len, e3, i);
        add_axiom(a->is_final_state(i)?acc:~acc);
        add_axiom(a->is_final_state(i)?~rej:rej);
    }
@ -3475,8 +3437,8 @@ void theory_seq::propagate_in_re(expr* n, bool is_true) {
    literal_vector lits;
    lits.push_back(~lit);
    
-    for (unsigned i = 0; i < states.size(); ++i) {
-        lits.push_back(mk_accept(e1, zero, e3, states[i]));
+    for (unsigned st : states) {
+        lits.push_back(mk_accept(s, zero, e3, st));
    }
    if (lits.size() == 2) {
        propagate_lit(nullptr, 1, &lit, lits[1]);
@ -3527,8 +3489,8 @@ static bool get_arith_value(context& ctx, theory_id afid, expr* e, expr_ref& v)
 bool theory_seq::get_num_value(expr* e, rational& val) const {
    context& ctx = get_context();
    expr_ref _val(m);
-	if (!ctx.e_internalized(e))
-		return false;
+    if (!ctx.e_internalized(e))
+        return false;
    enode* next = ctx.get_enode(e), *n = next;
    do { 
        if (get_arith_value(ctx, m_autil.get_family_id(), next->get_owner(), _val) && m_autil.is_numeral(_val, val) && val.is_int()) {
@ -3925,8 +3887,8 @@ theory_seq::dependency* theory_seq::mk_join(dependency* deps, literal lit) {
 }

 theory_seq::dependency* theory_seq::mk_join(dependency* deps, literal_vector const& lits) {
-    for (unsigned i = 0; i < lits.size(); ++i) {
-        deps = mk_join(deps, lits[i]);
+    for (literal l : lits) {
+        deps = mk_join(deps, l);
    } 
    return deps;
 }
@ -4131,53 +4093,15 @@ void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) {
    TRACE("seq", tout << "new disequality " << get_context().get_scope_level() << ": " << eq << "\n";);
    m_rewrite(eq);
    if (!m.is_false(eq)) {
-
        literal lit = mk_eq(e1, e2, false);

-
        if (m_util.str.is_empty(e2)) {
            std::swap(e1, e2);
        }

-        if (false && m_util.str.is_empty(e1)) {
-            expr_ref head(m), tail(m), conc(m);
-            mk_decompose(e2, head, tail);
-            conc = mk_concat(head, tail);
-            propagate_eq(~lit, e2, conc, true);
-        }
-#if 0
-
-        // (e1 = "" & e2 = xdz) or (e2 = "" & e1 = xcy) or (e1 = xcy & e2 = xdz & c != d) or (e1 = x & e2 = xdz) or (e2 = x & e1 = xcy)
-        // e1 = "" or e1 = xcy or e1 = x
-        // e2 = "" or e2 = xdz or e2 = x
-        // e1 = xcy or e2 = xdz
-        // c != d
-
-        sort* char_sort = 0;
-        expr_ref emp(m);
-        VERIFY(m_util.is_seq(m.get_sort(e1), char_sort));
-        emp = m_util.str.mk_empty(m.get_sort(e1));
-
-        expr_ref x = mk_skolem(symbol("seq.ne.x"), e1, e2);
-        expr_ref y = mk_skolem(symbol("seq.ne.y"), e1, e2);
-        expr_ref z = mk_skolem(symbol("seq.ne.z"), e1, e2);
-        expr_ref c = mk_skolem(symbol("seq.ne.c"), e1, e2, 0, char_sort);
-        expr_ref d = mk_skolem(symbol("seq.ne.d"), e1, e2, 0, char_sort);
-        literal e1_is_emp = mk_seq_eq(e1, emp);
-        literal e2_is_emp = mk_seq_eq(e2, emp);
-        literal e1_is_xcy = mk_seq_eq(e1, mk_concat(x, m_util.str.mk_unit(c), y));
-        literal e2_is_xdz = mk_seq_eq(e2, mk_concat(x, m_util.str.mk_unit(d), z));
-        add_axiom(lit, e1_is_emp, e1_is_xcy, mk_seq_eq(e1, x));
-        add_axiom(lit, e2_is_emp, e2_is_xdz, mk_seq_eq(e2, x));
-        add_axiom(lit, e1_is_xcy, e2_is_xdz);
-        add_axiom(lit, ~mk_eq(c, d, false));
-#else
-        else {
-            dependency* dep = m_dm.mk_leaf(assumption(~lit));
-            m_nqs.push_back(ne(e1, e2, dep));
-            solve_nqs(m_nqs.size() - 1);
-        }
-#endif
+        dependency* dep = m_dm.mk_leaf(assumption(~lit));
+        m_nqs.push_back(ne(e1, e2, dep));
+        solve_nqs(m_nqs.size() - 1);        
    }
 }

@ -4508,8 +4432,7 @@ bool theory_seq::add_reject2reject(expr* rej, bool& change) {
    ensure_nth(~len_le_idx, s, idx);
    literal_vector eqs;
    bool has_undef = false;
-    for (unsigned i = 0; i < mvs.size(); ++i) {
-        eautomaton::move const& mv = mvs[i];
+    for (eautomaton::move const& mv : mvs) {
        literal eq = mk_literal(mv.t()->accept(nth));
        switch (ctx.get_assignment(eq)) {
        case l_false:
--- a/src/smt/theory_seq.h
+++ b/src/smt/theory_seq.h
@ -64,14 +64,14 @@ namespace smt {
        // + a cache for normalization.
        class solution_map {
            enum map_update { INS, DEL };
-            ast_manager&                      m;
+            ast_manager&           m;
            dependency_manager&    m_dm;
-            eqdep_map_t                       m_map;            
-            eval_cache                        m_cache;
-            expr_ref_vector                   m_lhs, m_rhs;
+            eqdep_map_t            m_map;            
+            eval_cache             m_cache;
+            expr_ref_vector        m_lhs, m_rhs;
            ptr_vector<dependency> m_deps;
-            svector<map_update>               m_updates;
-            unsigned_vector                   m_limit;
+            svector<map_update>    m_updates;
+            unsigned_vector        m_limit;

            void add_trail(map_update op, expr* l, expr* r, dependency* d);
        public:
@ -362,6 +362,7 @@ namespace smt {
        void collect_statistics(::statistics & st) const override;
        model_value_proc * mk_value(enode * n, model_generator & mg) override;
        void init_model(model_generator & mg) override;
+        void finalize_model(model_generator & mg) override;
        void init_search_eh() override;

        void init_model(expr_ref_vector const& es);
@ -389,7 +390,6 @@ namespace smt {
                           vector<rational> const& ll, vector<rational> const& rl);
        bool set_empty(expr* x);
        bool is_complex(eq const& e);
-        bool internalize_re(expr* e);

        bool check_extensionality();
        bool check_contains();
--- a/src/smt/theory_str.cpp
+++ b/src/smt/theory_str.cpp
--- a/src/smt/theory_str.h
+++ b/src/smt/theory_str.h
@ -20,9 +20,11 @@
 #include "util/trail.h"
 #include "util/union_find.h"
 #include "util/scoped_ptr_vector.h"
+#include "util/hashtable.h"
 #include "ast/ast_pp.h"
 #include "ast/arith_decl_plugin.h"
 #include "ast/rewriter/th_rewriter.h"
+#include "ast/rewriter/seq_rewriter.h"
 #include "ast/seq_decl_plugin.h"
 #include "smt/smt_theory.h"
 #include "smt/params/theory_str_params.h"
@ -36,6 +38,7 @@
 namespace smt {

 typedef hashtable<symbol, symbol_hash_proc, symbol_eq_proc> symbol_set;
+typedef int_hashtable<int_hash, default_eq<int> > integer_set;

 class str_value_factory : public value_factory {
    seq_util u;
@ -148,6 +151,70 @@ public:
    bool matches(zstring input);
 };

+class regex_automaton_under_assumptions {
+protected:
+    expr * re_term;
+    eautomaton * aut;
+    bool polarity;
+
+    bool assume_lower_bound;
+    rational lower_bound;
+
+    bool assume_upper_bound;
+    rational upper_bound;
+public:
+    regex_automaton_under_assumptions() :
+        re_term(NULL), aut(NULL), polarity(false),
+        assume_lower_bound(false), assume_upper_bound(false) {}
+
+    regex_automaton_under_assumptions(expr * re_term, eautomaton * aut, bool polarity) :
+        re_term(re_term), aut(aut), polarity(polarity),
+        assume_lower_bound(false), assume_upper_bound(false) {}
+
+    void set_lower_bound(rational & lb) {
+        lower_bound = lb;
+        assume_lower_bound = true;
+    }
+    void unset_lower_bound() {
+        assume_lower_bound = false;
+    }
+
+    void set_upper_bound(rational & ub) {
+        upper_bound = ub;
+        assume_upper_bound = true;
+    }
+    void unset_upper_bound() {
+        assume_upper_bound = false;
+    }
+
+    bool get_lower_bound(rational & lb) const {
+        if (assume_lower_bound) {
+            lb = lower_bound;
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    bool get_upper_bound(rational & ub) const {
+        if (assume_upper_bound) {
+            ub = upper_bound;
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    eautomaton * get_automaton() const { return aut; }
+    expr * get_regex_term() const { return re_term; }
+    bool get_polarity() const { return polarity; }
+
+    virtual ~regex_automaton_under_assumptions() {
+        // don't free str_in_re or aut;
+        // they are managed separately
+    }
+};
+
 class theory_str : public theory {
    struct T_cut
    {
@ -250,6 +317,8 @@ protected:

    str_value_factory * m_factory;

+    re2automaton m_mk_aut;
+
    // Unique identifier appended to unused variables to ensure that model construction
    // does not introduce equalities when they weren't enforced.
    unsigned m_unused_id;
@ -267,6 +336,10 @@ protected:
    // enode lists for library-aware/high-level string terms (e.g. substr, contains)
    ptr_vector<enode> m_library_aware_axiom_todo;

+    // list of axioms that are re-asserted every time the scope is popped
+    expr_ref_vector m_persisted_axioms;
+    expr_ref_vector m_persisted_axiom_todo;
+
    // hashtable of all exprs for which we've already set up term-specific axioms --
    // this prevents infinite recursive descent with respect to axioms that
    // include an occurrence of the term for which axioms are being generated
@ -320,7 +393,31 @@ protected:
    // TBD: do a curried map for determinism.
    std::map<std::pair<expr*, zstring>, expr*> regex_in_bool_map;
    obj_map<expr, std::set<zstring> > regex_in_var_reg_str_map;
+
+    // regex automata
+    scoped_ptr_vector<eautomaton> m_automata;
+    ptr_vector<eautomaton> regex_automata;
+    obj_hashtable<expr> regex_terms;
+    obj_map<expr, ptr_vector<expr> > regex_terms_by_string; // S --> [ (str.in.re S *) ]
+    obj_map<expr, svector<regex_automaton_under_assumptions> > regex_automaton_assumptions; // RegEx --> [ aut+assumptions ]
    obj_map<expr, nfa> regex_nfa_cache; // Regex term --> NFA
+    obj_hashtable<expr> regex_terms_with_path_constraints; // set of string terms which have had path constraints asserted in the current scope
+    obj_hashtable<expr> regex_terms_with_length_constraints; // set of regex terms which had had length constraints asserted in the current scope
+    obj_map<expr, expr*> regex_term_to_length_constraint; // (str.in.re S R) -> (length constraint over S wrt. R)
+    obj_map<expr, ptr_vector<expr> > regex_term_to_extra_length_vars; // extra length vars used in regex_term_to_length_constraint entries
+
+    // keep track of the last lower/upper bound we saw for each string term
+    // so we don't perform duplicate work
+    obj_map<expr, rational> regex_last_lower_bound;
+    obj_map<expr, rational> regex_last_upper_bound;
+
+    // each counter maps a (str.in.re) expression to an integer.
+    // use helper functions regex_inc_counter() and regex_get_counter() to access
+    obj_map<expr, unsigned> regex_length_attempt_count;
+    obj_map<expr, unsigned> regex_fail_count;
+    obj_map<expr, unsigned> regex_intersection_fail_count;
+
+    obj_map<expr, ptr_vector<expr> > string_chars; // S --> [S_0, S_1, ...] for character terms S_i

    svector<char> char_set;
    std::map<char, int>  charSetLookupTable;
@ -439,14 +536,32 @@ protected:
    void instantiate_axiom_str_to_int(enode * e);
    void instantiate_axiom_int_to_str(enode * e);

+    void add_persisted_axiom(expr * a);
+
    expr * mk_RegexIn(expr * str, expr * regexp);
    void instantiate_axiom_RegexIn(enode * e);
    app * mk_unroll(expr * n, expr * bound);
-
    void process_unroll_eq_const_str(expr * unrollFunc, expr * constStr);
    void unroll_str2reg_constStr(expr * unrollFunc, expr * eqConstStr);
    void process_concat_eq_unroll(expr * concat, expr * unroll);

+    // regex automata and length-aware regex
+    unsigned estimate_regex_complexity(expr * re);
+    unsigned estimate_regex_complexity_under_complement(expr * re);
+    unsigned estimate_automata_intersection_difficulty(eautomaton * aut1, eautomaton * aut2);
+    bool check_regex_length_linearity(expr * re);
+    bool check_regex_length_linearity_helper(expr * re, bool already_star);
+    expr_ref infer_all_regex_lengths(expr * lenVar, expr * re, expr_ref_vector & freeVariables);
+    void check_subterm_lengths(expr * re, integer_set & lens);
+    void find_automaton_initial_bounds(expr * str_in_re, eautomaton * aut);
+    bool refine_automaton_lower_bound(eautomaton * aut, rational current_lower_bound, rational & refined_lower_bound);
+    bool refine_automaton_upper_bound(eautomaton * aut, rational current_upper_bound, rational & refined_upper_bound);
+    expr_ref generate_regex_path_constraints(expr * stringTerm, eautomaton * aut, rational lenVal, expr_ref & characterConstraints);
+    void aut_path_add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond);
+    expr_ref aut_path_rewrite_constraint(expr * cond, expr * ch_var);
+    void regex_inc_counter(obj_map<expr, unsigned> & counter_map, expr * key);
+    unsigned regex_get_counter(obj_map<expr, unsigned> & counter_map, expr * key);
+
    void set_up_axioms(expr * ex);
    void handle_equality(expr * lhs, expr * rhs);

@ -535,6 +650,7 @@ protected:
            std::map<expr*, std::map<expr*, int> > & concat_eq_concat_map,
            std::map<expr*, std::set<expr*> > & unrollGroupMap);

+    bool term_appears_as_subterm(expr * needle, expr * haystack);
    void classify_ast_by_type(expr * node, std::map<expr*, int> & varMap,
            std::map<expr*, int> & concatMap, std::map<expr*, int> & unrollMap);
    void classify_ast_by_type_in_positive_context(std::map<expr*, int> & varMap,
@ -623,6 +739,7 @@ protected:
    void new_diseq_eh(theory_var, theory_var) override;

    theory* mk_fresh(context*) override { return alloc(theory_str, get_manager(), m_params); }
+    void init(context * ctx) override;
    void init_search_eh() override;
    void add_theory_assumptions(expr_ref_vector & assumptions) override;
    lbool validate_unsat_core(expr_ref_vector & unsat_core) override;