From 58b57b26324c74c160c7680ecae613a6859c6064 Mon Sep 17 00:00:00 2001
From: Nikolaj Bjorner <nbjorner@microsoft.com>
Date: Fri, 27 Feb 2026 17:26:10 -0800
Subject: [PATCH] Add Phase 2: core data structures for theory_nseq

- nseq_constraint.h: constraint types (nseq_eq, nseq_ne, nseq_mem,
  nseq_pred) with dependency tracking via scoped_dependency_manager
- nseq_state.h/cpp: backtrackable state management with scoped_vector
  collections, axiom queue, length tracking, linearization for
  conflict/propagation justifications, statistics, display
- theory_nseq.h/cpp: full internalization (term/atom/bool), equality
  and disequality handling with union-find, literal assignment dispatch
  (prefix/suffix/contains/in_re), axiom management, propagation
  helpers (propagate_eq/lit, set_conflict), scope push/pop wired
  to nseq_state

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/smt/CMakeLists.txt    |   1 +
 src/smt/nseq_constraint.h | 122 +++++++++++++++
 src/smt/nseq_state.cpp    | 142 +++++++++++++++++
 src/smt/nseq_state.h      | 126 +++++++++++++++
 src/smt/theory_nseq.cpp   | 320 ++++++++++++++++++++++++++++++++++++--
 src/smt/theory_nseq.h     |  31 ++++
 6 files changed, 726 insertions(+), 16 deletions(-)
 create mode 100644 src/smt/nseq_constraint.h
 create mode 100644 src/smt/nseq_state.cpp
 create mode 100644 src/smt/nseq_state.h

diff --git a/src/smt/CMakeLists.txt b/src/smt/CMakeLists.txt
index f5c35c441..768bb3595 100644
--- a/src/smt/CMakeLists.txt
+++ b/src/smt/CMakeLists.txt
@@ -69,6 +69,7 @@ z3_add_component(smt
     theory_recfun.cpp
     theory_seq.cpp
     theory_nseq.cpp
+    nseq_state.cpp
     theory_sls.cpp
     theory_special_relations.cpp    
     theory_user_propagator.cpp
diff --git a/src/smt/nseq_constraint.h b/src/smt/nseq_constraint.h
new file mode 100644
index 000000000..d12db0a76
--- /dev/null
+++ b/src/smt/nseq_constraint.h
@@ -0,0 +1,122 @@
+/*++
+Copyright (c) 2025 Microsoft Corporation
+
+Module Name:
+
+    nseq_constraint.h
+
+Abstract:
+
+    Constraint types for the Nielsen-based string theory solver.
+    Defines word equations, disequalities, regex memberships,
+    and string predicate constraints with dependency tracking.
+
+Author:
+
+    Clemens Eisenhofer
+    Nikolaj Bjorner (nbjorner) 2025-2-28
+
+--*/
+#pragma once
+
+#include "ast/seq_decl_plugin.h"
+#include "ast/arith_decl_plugin.h"
+#include "util/scoped_vector.h"
+#include "util/dependency.h"
+#include "smt/smt_enode.h"
+
+namespace smt {
+
+    // Dependency tracking for conflict explanations.
+    // Each assumption is either an enode equality or a literal.
+    struct nseq_assumption {
+        enode* n1;
+        enode* n2;
+        literal lit;
+        nseq_assumption(enode* n1, enode* n2) : n1(n1), n2(n2), lit(null_literal) {}
+        nseq_assumption(literal lit) : n1(nullptr), n2(nullptr), lit(lit) {}
+    };
+
+    typedef scoped_dependency_manager<nseq_assumption> nseq_dep_manager;
+    typedef nseq_dep_manager::dependency nseq_dependency;
+
+    // A word equation: lhs_1 · lhs_2 · ... = rhs_1 · rhs_2 · ...
+    // Each side is a concatenation of string expressions (variables, constants, units).
+    class nseq_eq {
+        unsigned          m_id;
+        expr_ref_vector   m_lhs;
+        expr_ref_vector   m_rhs;
+        nseq_dependency*  m_dep;
+    public:
+        nseq_eq(unsigned id, expr_ref_vector& lhs, expr_ref_vector& rhs, nseq_dependency* dep)
+            : m_id(id), m_lhs(lhs), m_rhs(rhs), m_dep(dep) {}
+
+        unsigned id() const { return m_id; }
+        expr_ref_vector const& lhs() const { return m_lhs; }
+        expr_ref_vector const& rhs() const { return m_rhs; }
+        nseq_dependency* dep() const { return m_dep; }
+    };
+
+    // A disequality constraint: lhs != rhs
+    // with decomposed sub-equations and justification literals.
+    class nseq_ne {
+    public:
+        typedef std::pair<expr_ref_vector, expr_ref_vector> decomposed_eq;
+    private:
+        expr_ref                 m_lhs;
+        expr_ref                 m_rhs;
+        vector<decomposed_eq>    m_eqs;
+        literal_vector           m_lits;
+        nseq_dependency*         m_dep;
+    public:
+        nseq_ne(expr_ref const& l, expr_ref const& r, nseq_dependency* dep)
+            : m_lhs(l), m_rhs(r), m_dep(dep) {
+            expr_ref_vector ls(l.get_manager()); ls.push_back(l);
+            expr_ref_vector rs(r.get_manager()); rs.push_back(r);
+            m_eqs.push_back(std::make_pair(ls, rs));
+        }
+
+        expr_ref const& l() const { return m_lhs; }
+        expr_ref const& r() const { return m_rhs; }
+        vector<decomposed_eq> const& eqs() const { return m_eqs; }
+        literal_vector const& lits() const { return m_lits; }
+        nseq_dependency* dep() const { return m_dep; }
+    };
+
+    // A regex membership constraint: str.in_re(s, r)
+    // Tracks the string expression, regex, and sign.
+    class nseq_mem {
+        expr_ref          m_str;
+        expr_ref          m_regex;
+        bool              m_sign;     // true = in_re, false = not in_re
+        nseq_dependency*  m_dep;
+    public:
+        nseq_mem(expr_ref const& s, expr_ref const& r, bool sign, nseq_dependency* dep)
+            : m_str(s), m_regex(r), m_sign(sign), m_dep(dep) {}
+
+        expr* str() const { return m_str; }
+        expr* regex() const { return m_regex; }
+        bool sign() const { return m_sign; }
+        nseq_dependency* dep() const { return m_dep; }
+    };
+
+    // String predicate constraints: contains, prefix, suffix
+    enum nseq_pred_kind { NSEQ_CONTAINS, NSEQ_PREFIX, NSEQ_SUFFIX };
+
+    class nseq_pred {
+        nseq_pred_kind    m_kind;
+        expr_ref          m_arg1;     // the haystack or the full string
+        expr_ref          m_arg2;     // the needle or the prefix/suffix
+        bool              m_sign;     // true = positive, false = negated
+        nseq_dependency*  m_dep;
+    public:
+        nseq_pred(nseq_pred_kind kind, expr_ref const& a1, expr_ref const& a2, bool sign, nseq_dependency* dep)
+            : m_kind(kind), m_arg1(a1), m_arg2(a2), m_sign(sign), m_dep(dep) {}
+
+        nseq_pred_kind kind() const { return m_kind; }
+        expr* arg1() const { return m_arg1; }
+        expr* arg2() const { return m_arg2; }
+        bool sign() const { return m_sign; }
+        nseq_dependency* dep() const { return m_dep; }
+    };
+}
diff --git a/src/smt/nseq_state.cpp b/src/smt/nseq_state.cpp
new file mode 100644
index 000000000..55a9b1930
--- /dev/null
+++ b/src/smt/nseq_state.cpp
@@ -0,0 +1,142 @@
+/*++
+Copyright (c) 2025 Microsoft Corporation
+
+Module Name:
+
+    nseq_state.cpp
+
+Abstract:
+
+    State management for the Nielsen-based string theory solver.
+
+Author:
+
+    Clemens Eisenhofer
+    Nikolaj Bjorner (nbjorner) 2025-2-28
+
+--*/
+
+#include "ast/ast_pp.h"
+#include "ast/ast_ll_pp.h"
+#include "smt/nseq_state.h"
+
+namespace smt {
+
+    nseq_state::nseq_state(ast_manager& m, seq_util& u)
+        : m(m),
+          m_util(u),
+          m_eq_id(0),
+          m_axioms(m),
+          m_axioms_head(0),
+          m_length_apps(m) {
+    }
+
+    void nseq_state::push_scope() {
+        m_dm.push_scope();
+        m_trail.push_scope();
+        m_trail.push(value_trail<unsigned>(m_axioms_head));
+        m_eqs.push_scope();
+        m_neqs.push_scope();
+        m_mems.push_scope();
+        m_preds.push_scope();
+    }
+
+    void nseq_state::pop_scope(unsigned num_scopes) {
+        m_trail.pop_scope(num_scopes);
+        m_dm.pop_scope(num_scopes);
+        m_eqs.pop_scope(num_scopes);
+        m_neqs.pop_scope(num_scopes);
+        m_mems.pop_scope(num_scopes);
+        m_preds.pop_scope(num_scopes);
+    }
+
+    void nseq_state::reset() {
+        m_axioms.reset();
+        m_axiom_set.reset();
+        m_axioms_head = 0;
+        m_has_length.reset();
+        m_length_apps.reset();
+        m_eq_id = 0;
+    }
+
+    unsigned nseq_state::add_eq(expr* l, expr* r, nseq_dependency* dep) {
+        expr_ref_vector lhs(m), rhs(m);
+        m_util.str.get_concat_units(l, lhs);
+        m_util.str.get_concat_units(r, rhs);
+        unsigned id = m_eq_id++;
+        m_eqs.push_back(nseq_eq(id, lhs, rhs, dep));
+        m_stats.m_num_eqs++;
+        return id;
+    }
+
+    void nseq_state::add_ne(expr* l, expr* r, nseq_dependency* dep) {
+        expr_ref el(l, m), er(r, m);
+        m_neqs.push_back(nseq_ne(el, er, dep));
+        m_stats.m_num_neqs++;
+    }
+
+    void nseq_state::add_mem(expr* s, expr* re, bool sign, nseq_dependency* dep) {
+        expr_ref es(s, m), ere(re, m);
+        m_mems.push_back(nseq_mem(es, ere, sign, dep));
+        m_stats.m_num_memberships++;
+    }
+
+    void nseq_state::add_pred(nseq_pred_kind kind, expr* a1, expr* a2, bool sign, nseq_dependency* dep) {
+        expr_ref ea1(a1, m), ea2(a2, m);
+        m_preds.push_back(nseq_pred(kind, ea1, ea2, sign, dep));
+        m_stats.m_num_predicates++;
+    }
+
+    bool nseq_state::enqueue_axiom(expr* e) {
+        if (m_axiom_set.contains(e))
+            return false;
+        m_axiom_set.insert(e);
+        m_axioms.push_back(e);
+        return true;
+    }
+
+    void nseq_state::add_length(expr* len_app, expr* e, trail_stack& ts) {
+        if (m_has_length.contains(e))
+            return;
+        m_length_apps.push_back(len_app);
+        m_has_length.insert(e);
+        ts.push(push_back_vector<expr_ref_vector>(m_length_apps));
+        ts.push(insert_obj_trail<expr>(m_has_length, e));
+    }
+
+    void nseq_state::linearize(nseq_dependency* dep, enode_pair_vector& eqs, literal_vector& lits) const {
+        if (!dep)
+            return;
+        svector<nseq_assumption> assumptions;
+        m_dm.linearize(dep, assumptions);
+        for (auto const& a : assumptions) {
+            if (a.n1 != nullptr)
+                eqs.push_back(enode_pair(a.n1, a.n2));
+            else if (a.lit != null_literal)
+                lits.push_back(a.lit);
+        }
+    }
+
+    std::ostream& nseq_state::display(std::ostream& out) const {
+        if (!m_eqs.empty()) {
+            out << "equations:\n";
+            for (auto const& eq : m_eqs) {
+                out << "  [" << eq.id() << "] ";
+                for (auto* e : eq.lhs()) out << mk_bounded_pp(e, m, 2) << " ";
+                out << "= ";
+                for (auto* e : eq.rhs()) out << mk_bounded_pp(e, m, 2) << " ";
+                out << "\n";
+            }
+        }
+        if (!m_neqs.empty()) {
+            out << "disequalities: " << m_neqs.size() << "\n";
+        }
+        if (!m_mems.empty()) {
+            out << "memberships: " << m_mems.size() << "\n";
+        }
+        if (!m_preds.empty()) {
+            out << "predicates: " << m_preds.size() << "\n";
+        }
+        return out;
+    }
+}
diff --git a/src/smt/nseq_state.h b/src/smt/nseq_state.h
new file mode 100644
index 000000000..ce11f556b
--- /dev/null
+++ b/src/smt/nseq_state.h
@@ -0,0 +1,126 @@
+/*++
+Copyright (c) 2025 Microsoft Corporation
+
+Module Name:
+
+    nseq_state.h
+
+Abstract:
+
+    State management for the Nielsen-based string theory solver.
+    Tracks word equations, disequalities, regex memberships,
+    and string predicates with backtrackable scope management.
+
+Author:
+
+    Clemens Eisenhofer
+    Nikolaj Bjorner (nbjorner) 2025-2-28
+
+--*/
+#pragma once
+
+#include "ast/seq_decl_plugin.h"
+#include "ast/arith_decl_plugin.h"
+#include "ast/ast_trail.h"
+#include "util/scoped_vector.h"
+#include "util/union_find.h"
+#include "util/obj_hashtable.h"
+#include "smt/smt_theory.h"
+#include "smt/nseq_constraint.h"
+
+namespace smt {
+
+    class theory_nseq;
+
+    // Union-find for tracking equivalence classes of string variables.
+    typedef union_find<theory_nseq> nseq_union_find;
+
+    // Statistics for the nseq solver.
+    struct nseq_stats {
+        unsigned m_num_eqs;
+        unsigned m_num_neqs;
+        unsigned m_num_memberships;
+        unsigned m_num_predicates;
+        unsigned m_num_conflicts;
+        unsigned m_num_propagations;
+        unsigned m_num_splits;
+        unsigned m_num_axioms;
+        nseq_stats() { reset(); }
+        void reset() { memset(this, 0, sizeof(nseq_stats)); }
+    };
+
+    // Core state for the nseq solver.
+    // All collections use scoped_vector for automatic backtracking.
+    class nseq_state {
+        ast_manager&             m;
+        seq_util&                m_util;
+        nseq_dep_manager         m_dm;
+
+        // Constraint stores (backtrackable)
+        unsigned                 m_eq_id;
+        scoped_vector<nseq_eq>   m_eqs;
+        scoped_vector<nseq_ne>   m_neqs;
+        scoped_vector<nseq_mem>  m_mems;
+        scoped_vector<nseq_pred> m_preds;
+
+        // Axiom queue
+        expr_ref_vector          m_axioms;
+        obj_hashtable<expr>      m_axiom_set;
+        unsigned                 m_axioms_head;
+
+        // Length tracking
+        obj_hashtable<expr>      m_has_length;
+        expr_ref_vector          m_length_apps;
+
+        // Trail for undo
+        trail_stack              m_trail;
+
+        nseq_stats               m_stats;
+
+    public:
+        nseq_state(ast_manager& m, seq_util& u);
+
+        // Scope management
+        void push_scope();
+        void pop_scope(unsigned num_scopes);
+        void reset();
+
+        // Dependency manager access
+        nseq_dep_manager& dm() { return m_dm; }
+        nseq_dependency* mk_dep(enode* n1, enode* n2) { return m_dm.mk_leaf(nseq_assumption(n1, n2)); }
+        nseq_dependency* mk_dep(literal lit) { return m_dm.mk_leaf(nseq_assumption(lit)); }
+        nseq_dependency* mk_join(nseq_dependency* a, nseq_dependency* b) { return m_dm.mk_join(a, b); }
+
+        // Add constraints
+        unsigned add_eq(expr* l, expr* r, nseq_dependency* dep);
+        void add_ne(expr* l, expr* r, nseq_dependency* dep);
+        void add_mem(expr* s, expr* re, bool sign, nseq_dependency* dep);
+        void add_pred(nseq_pred_kind kind, expr* a1, expr* a2, bool sign, nseq_dependency* dep);
+
+        // Axiom management
+        bool enqueue_axiom(expr* e);
+        bool has_pending_axioms() const { return m_axioms_head < m_axioms.size(); }
+        unsigned axioms_head() const { return m_axioms_head; }
+        void set_axioms_head(unsigned h) { m_axioms_head = h; }
+        expr_ref_vector const& axioms() const { return m_axioms; }
+
+        // Length tracking
+        bool has_length(expr* e) const { return m_has_length.contains(e); }
+        void add_length(expr* len_app, expr* e, trail_stack& ts);
+
+        // Accessors
+        scoped_vector<nseq_eq> const& eqs() const { return m_eqs; }
+        scoped_vector<nseq_ne> const& neqs() const { return m_neqs; }
+        scoped_vector<nseq_mem> const& mems() const { return m_mems; }
+        scoped_vector<nseq_pred> const& preds() const { return m_preds; }
+        trail_stack& trail() { return m_trail; }
+        nseq_stats& stats() { return m_stats; }
+        nseq_stats const& stats() const { return m_stats; }
+
+        // Linearize dependencies for conflict/propagation
+        void linearize(nseq_dependency* dep, enode_pair_vector& eqs, literal_vector& lits) const;
+
+        // Display
+        std::ostream& display(std::ostream& out) const;
+    };
+}
diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp
index 6ce242d26..9e28b7fc8 100644
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@@ -17,6 +17,8 @@ Author:
 
 --*/
 
+#include "ast/ast_pp.h"
+#include "ast/ast_trail.h"
 #include "smt/theory_nseq.h"
 #include "smt/smt_context.h"
 
@@ -30,84 +32,369 @@ namespace smt {
           m_rewrite(ctx.get_manager()),
           m_sk(ctx.get_manager(), m_rewrite),
           m_arith_value(ctx.get_manager()),
-          m_has_seq(false) {
+          m_state(ctx.get_manager(), m_util),
+          m_find(*this),
+          m_has_seq(false),
+          m_new_propagation(false) {
     }
 
     theory_nseq::~theory_nseq() {
     }
 
     void theory_nseq::init() {
+        m_arith_value.init(&ctx);
     }
 
+    // -------------------------------------------------------
+    // Final check
+    // -------------------------------------------------------
+
     final_check_status theory_nseq::final_check_eh(unsigned) {
-        if (m_has_seq)
-            return FC_GIVEUP;
-        return FC_DONE;
+        if (!m_has_seq)
+            return FC_DONE;
+
+        TRACE(seq, display(tout << "final_check level=" << ctx.get_scope_level() << "\n"););
+
+        // Process pending axioms
+        if (m_state.has_pending_axioms()) {
+            unsigned head = m_state.axioms_head();
+            auto const& axioms = m_state.axioms();
+            for (unsigned i = head; i < axioms.size(); ++i)
+                deque_axiom(axioms[i]);
+            m_state.set_axioms_head(axioms.size());
+            return FC_CONTINUE;
+        }
+
+        // TODO: implement Nielsen transformation-based solving
+        // TODO: implement regex membership checking
+        // TODO: implement length/Parikh reasoning
+        return FC_GIVEUP;
     }
 
-    bool theory_nseq::internalize_atom(app* atom, bool) {
-        if (!m_has_seq) {
-            get_context().push_trail(value_trail<bool>(m_has_seq));
-            m_has_seq = true;
-        }
-        return false;
+    // -------------------------------------------------------
+    // Internalization
+    // -------------------------------------------------------
+
+    bool theory_nseq::internalize_atom(app* a, bool) {
+        return internalize_term(a);
     }
 
     bool theory_nseq::internalize_term(app* term) {
         if (!m_has_seq) {
-            get_context().push_trail(value_trail<bool>(m_has_seq));
+            ctx.push_trail(value_trail<bool>(m_has_seq));
             m_has_seq = true;
         }
-        return false;
+
+        if (m_util.str.is_in_re(term))
+            mk_var(ensure_enode(term->get_arg(0)));
+
+        if (m_util.str.is_length(term))
+            mk_var(ensure_enode(term->get_arg(0)));
+
+        if (ctx.e_internalized(term)) {
+            mk_var(ctx.get_enode(term));
+            return true;
+        }
+
+        if (m.is_bool(term) &&
+            (m_util.str.is_in_re(term) || m_sk.is_skolem(term))) {
+            bool_var bv = ctx.mk_bool_var(term);
+            ctx.set_var_theory(bv, get_id());
+            ctx.mark_as_relevant(bv);
+            return true;
+        }
+
+        for (auto arg : *term)
+            mk_var(ensure_enode(arg));
+
+        if (m.is_bool(term)) {
+            bool_var bv = ctx.mk_bool_var(term);
+            ctx.set_var_theory(bv, get_id());
+            ctx.mark_as_relevant(bv);
+        }
+
+        enode* e = nullptr;
+        if (ctx.e_internalized(term))
+            e = ctx.get_enode(term);
+        else
+            e = ctx.mk_enode(term, false, m.is_bool(term), true);
+        mk_var(e);
+
+        if (!ctx.relevancy())
+            relevant_eh(term);
+
+        return true;
     }
 
     void theory_nseq::internalize_eq_eh(app* atom, bool_var v) {
     }
 
+    theory_var theory_nseq::mk_var(enode* n) {
+        expr* o = n->get_expr();
+        if (!m_util.is_seq(o) && !m_util.is_re(o))
+            return null_theory_var;
+        if (is_attached_to_var(n))
+            return n->get_th_var(get_id());
+        theory_var v = theory::mk_var(n);
+        m_find.mk_var();
+        ctx.attach_th_var(n, this, v);
+        ctx.mark_as_relevant(n);
+        return v;
+    }
+
+    void theory_nseq::apply_sort_cnstr(enode* n, sort* s) {
+        mk_var(n);
+    }
+
+    // -------------------------------------------------------
+    // Equality and disequality callbacks
+    // -------------------------------------------------------
+
     void theory_nseq::new_eq_eh(theory_var v1, theory_var v2) {
+        enode* n1 = get_enode(v1);
+        enode* n2 = get_enode(v2);
+        expr* o1 = n1->get_expr();
+        expr* o2 = n2->get_expr();
+        if (!m_util.is_seq(o1) && !m_util.is_re(o1))
+            return;
+
+        nseq_dependency* dep = m_state.mk_dep(n1, n2);
+        if (m_util.is_seq(o1)) {
+            if (m_find.find(v1) != m_find.find(v2))
+                m_find.merge(v1, v2);
+            m_state.add_eq(o1, o2, dep);
+            TRACE(seq, tout << "new eq: " << mk_bounded_pp(o1, m) << " = " << mk_bounded_pp(o2, m) << "\n";);
+        }
     }
 
     void theory_nseq::new_diseq_eh(theory_var v1, theory_var v2) {
+        enode* n1 = get_enode(v1);
+        enode* n2 = get_enode(v2);
+        expr* e1 = n1->get_expr();
+        expr* e2 = n2->get_expr();
+        if (!m_util.is_seq(e1))
+            return;
+        if (n1->get_root() == n2->get_root())
+            return;
+
+        literal lit = mk_eq(e1, e2, false);
+        nseq_dependency* dep = m_state.mk_dep(~lit);
+        m_state.add_ne(e1, e2, dep);
+        TRACE(seq, tout << "new diseq: " << mk_bounded_pp(e1, m) << " != " << mk_bounded_pp(e2, m) << "\n";);
     }
 
+    // -------------------------------------------------------
+    // Assignment callback
+    // -------------------------------------------------------
+
     void theory_nseq::assign_eh(bool_var v, bool is_true) {
+        expr* e = ctx.bool_var2expr(v);
+        expr* e1 = nullptr, *e2 = nullptr;
+        literal lit(v, !is_true);
+        TRACE(seq, tout << (is_true ? "" : "not ") << mk_bounded_pp(e, m) << "\n";);
+
+        if (m_util.str.is_prefix(e, e1, e2)) {
+            nseq_dependency* dep = m_state.mk_dep(lit);
+            m_state.add_pred(NSEQ_PREFIX, e1, e2, is_true, dep);
+        }
+        else if (m_util.str.is_suffix(e, e1, e2)) {
+            nseq_dependency* dep = m_state.mk_dep(lit);
+            m_state.add_pred(NSEQ_SUFFIX, e1, e2, is_true, dep);
+        }
+        else if (m_util.str.is_contains(e, e1, e2)) {
+            nseq_dependency* dep = m_state.mk_dep(lit);
+            m_state.add_pred(NSEQ_CONTAINS, e1, e2, is_true, dep);
+        }
+        else if (m_util.str.is_in_re(e, e1, e2)) {
+            nseq_dependency* dep = m_state.mk_dep(lit);
+            m_state.add_mem(e1, e2, is_true, dep);
+        }
     }
 
+    // -------------------------------------------------------
+    // Propagation
+    // -------------------------------------------------------
+
     bool theory_nseq::can_propagate() {
-        return false;
+        return m_state.has_pending_axioms();
     }
 
     void theory_nseq::propagate() {
+        unsigned head = m_state.axioms_head();
+        auto const& axioms = m_state.axioms();
+        for (unsigned i = head; i < axioms.size(); ++i)
+            deque_axiom(axioms[i]);
+        m_state.set_axioms_head(axioms.size());
     }
 
+    // -------------------------------------------------------
+    // Scope management
+    // -------------------------------------------------------
+
     void theory_nseq::push_scope_eh() {
         theory::push_scope_eh();
+        m_state.push_scope();
     }
 
     void theory_nseq::pop_scope_eh(unsigned num_scopes) {
+        m_state.pop_scope(num_scopes);
         theory::pop_scope_eh(num_scopes);
+        m_rewrite.reset();
     }
 
     void theory_nseq::restart_eh() {
     }
 
     void theory_nseq::relevant_eh(app* n) {
+        if (m_util.str.is_length(n))
+            add_length(n);
+
+        // Enqueue axioms for operations that need reduction
+        if (m_util.str.is_index(n)   ||
+            m_util.str.is_replace(n) ||
+            m_util.str.is_extract(n) ||
+            m_util.str.is_at(n)      ||
+            m_util.str.is_nth_i(n)   ||
+            m_util.str.is_itos(n)    ||
+            m_util.str.is_stoi(n)    ||
+            m_util.str.is_from_code(n) ||
+            m_util.str.is_to_code(n))
+            enque_axiom(n);
     }
 
-    theory_var theory_nseq::mk_var(enode* n) {
-        return theory::mk_var(n);
+    // -------------------------------------------------------
+    // Axiom management
+    // -------------------------------------------------------
+
+    void theory_nseq::enque_axiom(expr* e) {
+        m_state.enqueue_axiom(e);
     }
 
-    void theory_nseq::apply_sort_cnstr(enode* n, sort* s) {
+    void theory_nseq::deque_axiom(expr* e) {
+        // TODO: generate axioms for string operations
+        TRACE(seq, tout << "deque axiom: " << mk_bounded_pp(e, m) << "\n";);
     }
 
+    void theory_nseq::add_axiom(literal l1, literal l2, literal l3, literal l4, literal l5) {
+        literal_vector lits;
+        if (l1 != null_literal) lits.push_back(l1);
+        if (l2 != null_literal) lits.push_back(l2);
+        if (l3 != null_literal) lits.push_back(l3);
+        if (l4 != null_literal) lits.push_back(l4);
+        if (l5 != null_literal) lits.push_back(l5);
+        ctx.mk_th_axiom(get_id(), lits);
+        m_state.stats().m_num_axioms++;
+    }
+
+    // -------------------------------------------------------
+    // Propagation helpers
+    // -------------------------------------------------------
+
+    bool theory_nseq::propagate_eq(nseq_dependency* dep, enode* n1, enode* n2) {
+        if (n1->get_root() == n2->get_root())
+            return false;
+        enode_pair_vector eqs;
+        literal_vector lits;
+        m_state.linearize(dep, eqs, lits);
+        TRACE(seq, tout << "propagate eq: " << mk_bounded_pp(n1->get_expr(), m) << " = " << mk_bounded_pp(n2->get_expr(), m) << "\n";);
+        justification* js = ctx.mk_justification(
+            ext_theory_eq_propagation_justification(get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data(), n1, n2));
+        ctx.assign_eq(n1, n2, eq_justification(js));
+        m_state.stats().m_num_propagations++;
+        m_new_propagation = true;
+        return true;
+    }
+
+    bool theory_nseq::propagate_eq(nseq_dependency* dep, expr* e1, expr* e2, bool add_to_eqs) {
+        enode* n1 = ensure_enode(e1);
+        enode* n2 = ensure_enode(e2);
+        return propagate_eq(dep, n1, n2);
+    }
+
+    bool theory_nseq::propagate_lit(nseq_dependency* dep, literal lit) {
+        if (ctx.get_assignment(lit) == l_true)
+            return false;
+        enode_pair_vector eqs;
+        literal_vector lits;
+        m_state.linearize(dep, eqs, lits);
+        justification* js = ctx.mk_justification(
+            ext_theory_propagation_justification(get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data(), lit));
+        ctx.assign(lit, js);
+        m_state.stats().m_num_propagations++;
+        m_new_propagation = true;
+        return true;
+    }
+
+    void theory_nseq::set_conflict(nseq_dependency* dep, literal_vector const& extra_lits) {
+        enode_pair_vector eqs;
+        literal_vector lits;
+        lits.append(extra_lits);
+        m_state.linearize(dep, eqs, lits);
+        TRACE(seq, tout << "conflict: " << lits << "\n";);
+        ctx.set_conflict(
+            ctx.mk_justification(
+                ext_theory_conflict_justification(get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data())));
+        m_state.stats().m_num_conflicts++;
+    }
+
+    // -------------------------------------------------------
+    // Utility helpers
+    // -------------------------------------------------------
+
+    void theory_nseq::add_length(expr* l) {
+        expr* e = nullptr;
+        VERIFY(m_util.str.is_length(l, e));
+        m_state.add_length(l, e, m_state.trail());
+    }
+
+    literal theory_nseq::mk_literal(expr* e) {
+        expr_ref _e(e, m);
+        if (!ctx.e_internalized(e))
+            ctx.internalize(e, false);
+        return ctx.get_literal(e);
+    }
+
+    literal theory_nseq::mk_eq_empty(expr* e, bool phase) {
+        expr_ref emp(m_util.str.mk_empty(e->get_sort()), m);
+        literal lit = mk_eq(e, emp, false);
+        ctx.force_phase(phase ? lit : ~lit);
+        return lit;
+    }
+
+    expr_ref theory_nseq::mk_len(expr* s) {
+        return expr_ref(m_util.str.mk_length(s), m);
+    }
+
+    expr_ref theory_nseq::mk_concat(expr_ref_vector const& es, sort* s) {
+        SASSERT(!es.empty());
+        return expr_ref(m_util.str.mk_concat(es.size(), es.data(), s), m);
+    }
+
+    // -------------------------------------------------------
+    // Display and statistics
+    // -------------------------------------------------------
+
     void theory_nseq::display(std::ostream& out) const {
         out << "theory_nseq\n";
+        m_state.display(out);
     }
 
     void theory_nseq::collect_statistics(::statistics& st) const {
+        auto const& s = m_state.stats();
+        st.update("nseq eqs", s.m_num_eqs);
+        st.update("nseq neqs", s.m_num_neqs);
+        st.update("nseq memberships", s.m_num_memberships);
+        st.update("nseq predicates", s.m_num_predicates);
+        st.update("nseq conflicts", s.m_num_conflicts);
+        st.update("nseq propagations", s.m_num_propagations);
+        st.update("nseq splits", s.m_num_splits);
+        st.update("nseq axioms", s.m_num_axioms);
     }
 
+    // -------------------------------------------------------
+    // Model generation (stub)
+    // -------------------------------------------------------
+
     model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) {
         return nullptr;
     }
@@ -119,3 +406,4 @@ namespace smt {
     void theory_nseq::finalize_model(model_generator& mg) {
     }
 }
+
diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h
index 791d91038..3125c9e2d 100644
--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@@ -27,6 +27,7 @@ Author:
 #include "smt/smt_theory.h"
 #include "smt/smt_arith_value.h"
 #include "smt/smt_model_generator.h"
+#include "smt/nseq_state.h"
 
 namespace smt {
 
@@ -37,8 +38,12 @@ namespace smt {
         th_rewriter      m_rewrite;
         seq::skolem      m_sk;
         arith_value      m_arith_value;
+        nseq_state       m_state;
+        nseq_union_find  m_find;
         bool             m_has_seq;
+        bool             m_new_propagation;
 
+        // Theory interface
         final_check_status final_check_eh(unsigned) override;
         bool internalize_atom(app* atom, bool) override;
         bool internalize_term(app*) override;
@@ -62,9 +67,35 @@ namespace smt {
         theory_var mk_var(enode* n) override;
         void apply_sort_cnstr(enode* n, sort* s) override;
 
+        // Axiom management
+        void enque_axiom(expr* e);
+        void deque_axiom(expr* e);
+        void add_axiom(literal l1, literal l2 = null_literal, literal l3 = null_literal,
+                        literal l4 = null_literal, literal l5 = null_literal);
+
+        // Propagation helpers
+        bool propagate_eq(nseq_dependency* dep, enode* n1, enode* n2);
+        bool propagate_eq(nseq_dependency* dep, expr* e1, expr* e2, bool add_to_eqs = true);
+        bool propagate_lit(nseq_dependency* dep, literal lit);
+        void set_conflict(nseq_dependency* dep, literal_vector const& lits = literal_vector());
+
+        // Helpers
+        void add_length(expr* l);
+        literal mk_literal(expr* e);
+        literal mk_eq_empty(expr* e, bool phase = true);
+        expr_ref mk_len(expr* s);
+        expr_ref mk_concat(expr_ref_vector const& es, sort* s);
+
     public:
         theory_nseq(context& ctx);
         ~theory_nseq() override;
         void init() override;
+
+        // union_find callbacks
+        trail_stack& get_trail_stack() { return m_state.trail(); }
+        void merge_eh(theory_var, theory_var, theory_var, theory_var) {}
+        void after_merge_eh(theory_var, theory_var, theory_var, theory_var) {}
+        void unmerge_eh(theory_var, theory_var) {}
     };
 }
+