first end-pass. Atomic

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2026-07-08 00:06:21 +00:00 · 2026-03-04 02:05:26 -08:00 · 2026-03-04 02:05:26 -08:00 · 5aa3713d19
commit 5aa3713d19
parent 13f9fec339
15 changed files with 6160 additions and 209 deletions
--- a/src/ast/euf/euf_sgraph.cpp
+++ b/src/ast/euf/euf_sgraph.cpp
@ -355,6 +355,22 @@ namespace euf {
        if (n)
            return n;

+        // decompose non-empty string constants into character chains
+        // so that Nielsen graph can do prefix matching on them
+        zstring s;
+        if (m_seq.str.is_string(e, s) && !s.empty()) {
+            snode* result = mk_char(s[s.length() - 1]);
+            for (unsigned i = s.length() - 1; i-- > 0; )
+                result = mk_concat(mk_char(s[i]), result);
+            // register the original string expression as an alias
+            unsigned eid = e->get_id();
+            m_expr2snode.reserve(eid + 1, nullptr);
+            m_expr2snode[eid] = result;
+            m_alias_trail.push_back(eid);
+            mk_enode(e);
+            return result;
+        }
+
        snode_kind k = classify(e);

        if (!is_app(e))
@ -400,6 +416,7 @@ namespace euf {

    void sgraph::push() {
        m_scopes.push_back(m_nodes.size());
+        m_alias_trail_lim.push_back(m_alias_trail.size());
        ++m_num_scopes;
        m_egraph.push();
    }
@ -420,6 +437,15 @@ namespace euf {
        }
        m_nodes.shrink(old_sz);
        m_scopes.shrink(new_lvl);
+        // undo alias entries (string constant decompositions)
+        unsigned alias_old = m_alias_trail_lim[new_lvl];
+        for (unsigned i = m_alias_trail.size(); i-- > alias_old; ) {
+            unsigned eid = m_alias_trail[i];
+            if (eid < m_expr2snode.size())
+                m_expr2snode[eid] = nullptr;
+        }
+        m_alias_trail.shrink(alias_old);
+        m_alias_trail_lim.shrink(new_lvl);
        m_num_scopes = new_lvl;
        m_egraph.pop(num_scopes);
    }
@ -520,6 +546,25 @@ namespace euf {
        expr* ch = nullptr;
        if (m_seq.str.is_unit(elem_expr, ch))
            elem_expr = ch;
+
+        // If elem is a regex predicate (e.g., re.allchar from compute_minterms),
+        // extract a representative character for the derivative.
+        sort* seq_sort = nullptr, *ele_sort = nullptr;
+        if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) {
+            if (ele_sort != elem_expr->get_sort()) {
+                expr* lo = nullptr, *hi = nullptr;
+                if (m_seq.re.is_full_char(elem_expr)) {
+                    // re.allchar represents the entire alphabet; computing a derivative
+                    // w.r.t. a single character would be imprecise and could incorrectly
+                    // report fail. Return nullptr to prevent incorrect pruning.
+                    return nullptr;
+                }
+                else if (m_seq.re.is_range(elem_expr, lo, hi) && lo)
+                    elem_expr = lo;
+                else
+                    return nullptr;
+            }
+        }
        expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr);
        if (!result)
            return nullptr;
--- a/src/ast/euf/euf_sgraph.h
+++ b/src/ast/euf/euf_sgraph.h
@ -97,6 +97,10 @@ namespace euf {
        // maps expression id to snode
        ptr_vector<snode> m_expr2snode;

+        // trail of alias entries (string constant → decomposed snode) for pop
+        unsigned_vector  m_alias_trail;       // expression ids
+        unsigned_vector  m_alias_trail_lim;   // scope boundaries
+
        snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
        snode_kind classify(expr* e) const;
        void compute_metadata(snode* n);
--- a/src/smt/nseq_model.cpp
+++ b/src/smt/nseq_model.cpp
@ -7,7 +7,8 @@ Module Name:

 Abstract:

-    Implementation of nseq_model.
+    Implementation of nseq_model: model construction for the
+    Nielsen-based string solver.

 Author:

@ -15,3 +16,292 @@ Author:

 --*/
 #include "smt/nseq_model.h"
+#include "smt/theory_nseq.h"
+#include "smt/nseq_regex.h"
+#include "smt/nseq_state.h"
+#include "smt/smt_context.h"
+#include "smt/smt_model_generator.h"
+#include "smt/proto_model/proto_model.h"
+#include "ast/ast_pp.h"
+
+namespace smt {
+
+    nseq_model::nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq,
+                           seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex)
+        : m_th(th), m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_regex(regex), m_trail(m)
+    {}
+
+    void nseq_model::init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state) {
+        m_var_values.reset();
+        m_var_regex.reset();
+        m_trail.reset();
+
+        m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model());
+        mg.register_factory(m_factory);
+
+        register_existing_values(nielsen);
+        collect_var_regex_constraints(state);
+
+        // if the last solve returned sat, extract assignments from the
+        // satisfying leaf node found during DFS.
+        seq::nielsen_node const* root = nielsen.root();
+        if (root && root->is_satisfied())
+            extract_assignments(root);
+    }
+
+    model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) {
+        app* e = n->get_expr();
+        if (!m_seq.is_seq(e) && !m_seq.is_re(e) && !m_seq.str.is_nth_u(e))
+            return nullptr;
+
+        // For regex-sorted enodes, return the expression itself as a model value.
+        // Regexes are interpreted as themselves in the model.
+        if (m_seq.is_re(e)) {
+            m_trail.push_back(e);
+            return alloc(expr_wrapper_proc, e);
+        }
+
+        // For nth_u (underspecified nth), return a fresh value of the element sort.
+        if (m_seq.str.is_nth_u(e)) {
+            sort* srt = e->get_sort();
+            expr* val = m_factory->get_fresh_value(srt);
+            if (val) {
+                m_trail.push_back(val);
+                return alloc(expr_wrapper_proc, to_app(val));
+            }
+            return nullptr;
+        }
+
+        // look up snode for this expression
+        euf::snode* sn = m_sg.find(e);
+        expr_ref val(m);
+        if (sn)
+            val = snode_to_value(sn);
+
+        if (!val) {
+            // no assignment found — generate fresh value
+            val = m_factory->get_fresh_value(e->get_sort());
+        }
+
+        if (val) {
+            m_trail.push_back(val);
+            m_factory->add_trail(val);
+            return alloc(expr_wrapper_proc, to_app(val));
+        }
+
+        return alloc(expr_wrapper_proc, to_app(m_seq.str.mk_empty(e->get_sort())));
+    }
+
+    void nseq_model::finalize(model_generator& mg) {
+        m_var_values.reset();
+        m_var_regex.reset();
+        m_trail.reset();
+        m_factory = nullptr;
+    }
+
+    void nseq_model::extract_assignments(seq::nielsen_node const* node) {
+        if (!node)
+            return;
+        for (auto const& eq : node->str_eqs()) {
+            if (!eq.m_lhs || !eq.m_rhs)
+                continue;
+            if (eq.m_lhs->is_var() && !m_var_values.contains(eq.m_lhs->id())) {
+                expr_ref val = snode_to_value(eq.m_rhs);
+                if (val) {
+                    m_trail.push_back(val);
+                    m_var_values.insert(eq.m_lhs->id(), val);
+                }
+            }
+            if (eq.m_rhs->is_var() && !m_var_values.contains(eq.m_rhs->id())) {
+                expr_ref val = snode_to_value(eq.m_lhs);
+                if (val) {
+                    m_trail.push_back(val);
+                    m_var_values.insert(eq.m_rhs->id(), val);
+                }
+            }
+        }
+    }
+
+    expr_ref nseq_model::snode_to_value(euf::snode* n) {
+        if (!n)
+            return expr_ref(m);
+
+        if (n->is_empty())
+            return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
+
+        if (n->is_char() || n->is_unit()) {
+            expr* e = n->get_expr();
+            return e ? expr_ref(e, m) : expr_ref(m);
+        }
+
+        if (n->is_var())
+            return expr_ref(get_var_value(n), m);
+
+        if (n->is_concat()) {
+            expr_ref lhs = snode_to_value(n->arg(0));
+            expr_ref rhs = snode_to_value(n->arg(1));
+            if (lhs && rhs)
+                return expr_ref(m_seq.str.mk_concat(lhs, rhs), m);
+            if (lhs) return lhs;
+            if (rhs) return rhs;
+            return expr_ref(m);
+        }
+
+        // fallback: use the underlying expression
+        expr* e = n->get_expr();
+        return e ? expr_ref(e, m) : expr_ref(m);
+    }
+
+    expr_ref nseq_model::generate_regex_witness(euf::snode* regex, unsigned depth) {
+        if (!regex)
+            return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
+
+        // depth bound to prevent stack overflow on deep regexes
+        if (depth > 1000) {
+            sort* srt = m_seq.str.mk_string_sort();
+            expr* fresh = m_factory->get_fresh_value(srt);
+            return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m);
+        }
+
+        // nullable regex: empty string is a valid witness
+        if (m_regex.is_nullable(regex))
+            return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
+
+        // collect first-position characters
+        euf::snode_vector chars;
+        m_regex.collect_first_chars(regex, chars);
+
+        if (!chars.empty()) {
+            // pick first concrete character, derive, and recurse
+            euf::snode* c = chars[0];
+            euf::snode* deriv = m_regex.derivative(regex, c);
+            expr_ref tail = generate_regex_witness(deriv, depth + 1);
+            if (tail && c->get_expr())
+                return expr_ref(m_seq.str.mk_concat(c->get_expr(), tail), m);
+        }
+
+        // fallback: return fresh value from factory (may not satisfy the regex,
+        // but avoids returning empty string which definitely doesn't satisfy non-nullable regex)
+        sort* srt = m_seq.str.mk_string_sort();
+        expr* fresh = m_factory->get_fresh_value(srt);
+        return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m);
+    }
+
+    void nseq_model::register_existing_values(seq::nielsen_graph& nielsen) {
+        seq::nielsen_node const* root = nielsen.root();
+        if (!root)
+            return;
+        for (auto const& eq : root->str_eqs()) {
+            if (eq.m_lhs && eq.m_lhs->get_expr())
+                m_factory->register_value(eq.m_lhs->get_expr());
+            if (eq.m_rhs && eq.m_rhs->get_expr())
+                m_factory->register_value(eq.m_rhs->get_expr());
+        }
+    }
+
+    expr* nseq_model::get_var_value(euf::snode* var) {
+        expr* val = nullptr;
+        if (m_var_values.find(var->id(), val))
+            return val;
+
+        // unconstrained or regex-constrained: delegate to mk_fresh_value
+        val = mk_fresh_value(var);
+        if (val) {
+            m_trail.push_back(val);
+            m_var_values.insert(var->id(), val);
+        }
+        return val;
+    }
+
+    expr* nseq_model::mk_fresh_value(euf::snode* var) {
+        // check if this variable has regex constraints
+        euf::snode* re = nullptr;
+        if (m_var_regex.find(var->id(), re) && re) {
+            // generate a witness string satisfying the regex
+            expr_ref witness = generate_regex_witness(re);
+            if (witness) {
+                m_trail.push_back(witness);
+                m_factory->register_value(witness);
+                return witness;
+            }
+        }
+
+        // no regex constraint or witness generation failed: plain fresh value
+        sort* srt = m_seq.str.mk_string_sort();
+        if (var->get_expr())
+            srt = var->get_expr()->get_sort();
+        return m_factory->get_fresh_value(srt);
+    }
+
+    void nseq_model::collect_var_regex_constraints(nseq_state const& state) {
+        for (auto const& mem : state.str_mems()) {
+            if (!mem.m_str || !mem.m_regex)
+                continue;
+            // only collect for variable snodes (leaf variables needing assignment)
+            if (!mem.m_str->is_var())
+                continue;
+            unsigned id = mem.m_str->id();
+            euf::snode* existing = nullptr;
+            if (m_var_regex.find(id, existing) && existing) {
+                // intersect with existing constraint:
+                // build re.inter(existing, new_regex)
+                expr* e1 = existing->get_expr();
+                expr* e2 = mem.m_regex->get_expr();
+                if (e1 && e2) {
+                    expr_ref inter(m_seq.re.mk_inter(e1, e2), m);
+                    euf::snode* inter_sn = m_sg.mk(inter);
+                    if (inter_sn)
+                        m_var_regex.insert(id, inter_sn);
+                }
+            }
+            else {
+                m_var_regex.insert(id, mem.m_regex);
+            }
+        }
+    }
+
+    bool nseq_model::validate_regex(nseq_state const& state, ::proto_model& mdl) {
+        bool ok = true;
+
+        // validate positive memberships: str ∈ regex
+        for (auto const& mem : state.str_mems()) {
+            if (!mem.m_str || !mem.m_regex)
+                continue;
+            expr* s_expr = mem.m_str->get_expr();
+            expr* r_expr = mem.m_regex->get_expr();
+            if (!s_expr || !r_expr)
+                continue;
+
+            expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
+            if (mdl.is_false(in_re)) {
+                IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: "
+                           << mk_bounded_pp(s_expr, m, 3)
+                           << " in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
+                ok = false;
+            }
+        }
+
+        // validate negative memberships: str ∉ regex
+        for (auto const& entry : state.neg_mems()) {
+            if (!entry.m_str || !entry.m_regex)
+                continue;
+            expr* s_expr = entry.m_str->get_expr();
+            expr* r_expr = entry.m_regex->get_expr();
+            if (!s_expr || !r_expr)
+                continue;
+
+            expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
+            expr_ref val(m);
+            mdl.eval(in_re, val, true);
+            if (val && m.is_true(val)) {
+                IF_VERBOSE(0, verbose_stream() << "nseq model: negative membership violated: "
+                           << mk_bounded_pp(s_expr, m, 3)
+                           << " not in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
+                ok = false;
+            }
+        }
+
+        return ok;
+    }
+
+}
--- a/src/smt/nseq_model.h
+++ b/src/smt/nseq_model.h
@ -7,7 +7,18 @@ Module Name:

 Abstract:

-    Model generation from solved Nielsen graph.
+    Model construction for the Nielsen-based string solver (theory_nseq).
+
+    After the Nielsen graph search returns sat, this module extracts
+    variable-to-value assignments from the satisfying leaf node and
+    builds model_value_proc callbacks for the SMT model generator.
+
+    The workflow is:
+      1. init() — allocate seq_factory, register existing string literals,
+         and extract variable assignments from the satisfying Nielsen node.
+      2. mk_value(enode*) — return a model_value_proc that lazily builds
+         the concrete value for a given enode.
+      3. finalize() — clean up temporary state.

 Author:

@ -16,57 +27,99 @@ Author:
 --*/
 #pragma once

-#include "ast/ast.h"
 #include "ast/seq_decl_plugin.h"
-#include "util/zstring.h"
+#include "ast/rewriter/seq_rewriter.h"
 #include "ast/euf/euf_sgraph.h"
+#include "smt/smt_types.h"
 #include "smt/seq/seq_nielsen.h"
-#include <vector>
-#include <utility>
+#include "model/seq_factory.h"
+
+class proto_model;

 namespace smt {

+    class theory_nseq;
+    class nseq_regex;
+    class nseq_state;
+    class model_value_proc;
+
    class nseq_model {
-        ast_manager& m;
-        seq_util     m_seq;
-        euf::sgraph& m_sg;
-        unsigned     m_fresh_counter = 0;
+        theory_nseq&    m_th;
+        ast_manager&    m;
+        seq_util&       m_seq;
+        seq_rewriter&   m_rewriter;
+        euf::sgraph&    m_sg;
+        nseq_regex&     m_regex;
+
+        // factory for generating fresh string/regex values
+        seq_factory*    m_factory = nullptr;
+
+        // variable assignments extracted from the satisfying Nielsen node.
+        // maps snode id -> expr* (concrete value)
+        u_map<expr*> m_var_values;
+
+        // trail for GC protection of generated expressions
+        expr_ref_vector m_trail;
+
+        // per-variable regex constraints: maps snode id -> intersected regex snode.
+        // collected during init() from the state's str_mem list.
+        u_map<euf::snode*> m_var_regex;

    public:
-        nseq_model(ast_manager& m, euf::sgraph& sg) : m(m), m_seq(m), m_sg(sg) {}
+        nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq,
+                   seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex);

-        // generate a fresh string value (used when a variable is unconstrained)
-        expr_ref mk_fresh_value() {
-            std::string name = "s!" + std::to_string(m_fresh_counter++);
-            zstring zs(name.c_str());
-            return expr_ref(m_seq.str.mk_string(zs), m);
-        }
+        // Phase 1: initialize model construction.
+        // Allocates seq_factory, registers it with mg, collects
+        // existing string literals, and extracts variable assignments
+        // from the satisfying Nielsen leaf node.
+        void init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state);

-        // extract variable assignments from a satisfied leaf node
-        // Returns true if all variables got a valid assignment
-        bool extract_assignments(seq::nielsen_node* node,
-                                 std::vector<std::pair<euf::snode*, expr*>>& assignment) {
-            if (!node)
-                return false;
-            for (auto const& eq : node->str_eqs()) {
-                if (!eq.m_lhs || !eq.m_rhs)
-                    continue;
-                if (eq.m_lhs->is_var() && eq.m_rhs->get_expr()) {
-                    assignment.emplace_back(eq.m_lhs, eq.m_rhs->get_expr());
-                }
-                else if (eq.m_rhs->is_var() && eq.m_lhs->get_expr()) {
-                    assignment.emplace_back(eq.m_rhs, eq.m_lhs->get_expr());
-                }
-            }
-            return true;
-        }
+        // Phase 2: build a model_value_proc for the given enode.
+        // Returns nullptr if the enode is not a sequence/string sort.
+        model_value_proc* mk_value(enode* n, model_generator& mg);

-        // validate that a regex membership constraint is satisfied by the assignment
-        bool validate_regex(seq::str_mem const& mem,
-                            obj_map<euf::snode, expr*> const& assignment) {
-            // stub: assume valid for now
-            return true;
-        }
+        // Phase 3: clean up temporary model construction state.
+        void finalize(model_generator& mg);
+
+        // Validate that model assignments satisfy all regex membership
+        // constraints from the state.  Checks positive and negative
+        // memberships.  Returns true if all constraints pass.
+        bool validate_regex(nseq_state const& state, ::proto_model& mdl);
+
+    private:
+        // extract variable assignments from a satisfying Nielsen node.
+        // Walks str_eqs looking for x = value patterns and records them.
+        void extract_assignments(seq::nielsen_node const* node);
+
+        // recursively substitute known variable assignments into an snode tree.
+        // Returns a concrete Z3 expression.
+        expr_ref snode_to_value(euf::snode* n);
+
+        // generate a concrete witness string for a regex.
+        // Uses nullable check and first-char collection to build
+        // a minimal satisfying string. depth bounds recursion.
+        expr_ref generate_regex_witness(euf::snode* regex, unsigned depth = 0);
+
+        // register all string literals appearing in the constraint store
+        // with the factory to avoid collisions with fresh values.
+        void register_existing_values(seq::nielsen_graph& nielsen);
+
+        // look up or compute the value for an snode variable.
+        // If no assignment exists, delegates to mk_fresh_value.
+        expr* get_var_value(euf::snode* var);
+
+        // generate a fresh value for a variable, respecting regex
+        // membership constraints. If the variable has associated
+        // regex constraints (collected during init), generates a
+        // witness satisfying the intersection; otherwise falls back
+        // to a plain fresh value from the factory.
+        expr* mk_fresh_value(euf::snode* var);
+
+        // collect per-variable regex constraints from the state.
+        // For each positive str_mem, records the regex (or intersects
+        // with existing) into m_var_regex keyed by the string snode id.
+        void collect_var_regex_constraints(nseq_state const& state);
    };

 }
--- a/src/smt/nseq_regex.cpp
+++ b/src/smt/nseq_regex.cpp
@ -7,7 +7,7 @@ Module Name:

 Abstract:

-    Implementation of nseq_regex.
+    Lazy regex membership processing for the Nielsen-based string solver.

 Author:

@ -15,3 +15,394 @@ Author:

 --*/
 #include "smt/nseq_regex.h"
+
+namespace smt {
+
+    // -----------------------------------------------------------------------
+    // Regex emptiness checking (structural analysis)
+    // -----------------------------------------------------------------------
+
+    bool nseq_regex::is_empty_regex(euf::snode* re) const {
+        if (!re)
+            return false;
+        // direct empty language constant
+        if (re->is_fail())
+            return true;
+        // kinds that are never empty
+        if (re->is_star() || re->is_to_re() ||
+            re->is_full_char() || re->is_full_seq())
+            return false;
+        // loop with lo == 0 accepts ε
+        if (re->is_loop() && re->is_nullable())
+            return false;
+
+        seq_util& seq = m_sg.get_seq_util();
+        expr* e = re->get_expr();
+        if (!e)
+            return false;
+
+        expr* r1, * r2;
+        // union is empty iff both children are empty
+        if (seq.re.is_union(e, r1, r2)) {
+            SASSERT(re->num_args() == 2);
+            return is_empty_regex(re->arg(0)) && is_empty_regex(re->arg(1));
+        }
+        // regex concat is empty if either child is empty
+        if (seq.re.is_concat(e, r1, r2)) {
+            SASSERT(re->num_args() == 2);
+            return is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1));
+        }
+        // intersection is empty if either child is empty
+        if (seq.re.is_intersection(e, r1, r2)) {
+            SASSERT(re->num_args() == 2);
+            if (is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1)))
+                return true;
+        }
+        // complement of full_seq is empty
+        if (re->is_complement() && re->num_args() == 1 && re->arg(0)->is_full_seq())
+            return true;
+        // loop(empty, lo, _) with lo > 0 is empty
+        if (re->is_loop() && re->num_args() >= 1 && is_empty_regex(re->arg(0)))
+            return !re->is_nullable(); // empty if not nullable (i.e., lo > 0)
+
+        return false;
+    }
+
+    // -----------------------------------------------------------------------
+    // Cycle detection
+    // -----------------------------------------------------------------------
+
+    bool nseq_regex::detect_cycle(seq::str_mem const& mem) const {
+        return extract_cycle(mem) != nullptr;
+    }
+
+    // -----------------------------------------------------------------------
+    // Ground prefix consumption
+    // -----------------------------------------------------------------------
+
+    nseq_regex::simplify_status nseq_regex::simplify_ground_prefix(seq::str_mem& mem) {
+        if (!mem.m_str || !mem.m_regex)
+            return simplify_status::ok;
+
+        while (mem.m_str && !mem.m_str->is_empty()) {
+            euf::snode* first = mem.m_str->first();
+            if (!first || !first->is_char())
+                break;
+            euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, first);
+            if (!deriv)
+                break;
+            if (deriv->is_fail())
+                return simplify_status::conflict;
+            mem.m_str = m_sg.drop_first(mem.m_str);
+            mem.m_regex = deriv;
+        }
+
+        // check final state
+        if (mem.m_str && mem.m_str->is_empty()) {
+            if (mem.m_regex->is_nullable())
+                return simplify_status::satisfied;
+            return simplify_status::conflict;
+        }
+
+        return simplify_status::ok;
+    }
+
+    // -----------------------------------------------------------------------
+    // Ground suffix consumption (best-effort)
+    // -----------------------------------------------------------------------
+
+    nseq_regex::simplify_status nseq_regex::simplify_ground_suffix(seq::str_mem& mem) {
+        // Suffix consumption via reverse derivatives is complex.
+        // For now, only handle the case where the entire string is ground:
+        // consume all characters from the front (which covers trailing chars
+        // when the string is fully ground).
+        if (!mem.m_str || !mem.m_regex)
+            return simplify_status::ok;
+        if (!mem.m_str->is_ground())
+            return simplify_status::ok;
+
+        // If the string is ground, simplify_ground_prefix handles everything.
+        return simplify_ground_prefix(mem);
+    }
+
+    // -----------------------------------------------------------------------
+    // Trivial checks
+    // -----------------------------------------------------------------------
+
+    int nseq_regex::check_trivial(seq::str_mem const& mem) const {
+        if (!mem.m_str || !mem.m_regex)
+            return 0;
+        // regex is ∅ => always conflict
+        if (is_empty_regex(mem.m_regex))
+            return -1;
+        // regex is Σ* => always satisfied
+        if (is_full_regex(mem.m_regex))
+            return 1;
+        // empty string checks
+        if (mem.m_str->is_empty()) {
+            if (mem.m_regex->is_nullable())
+                return 1;
+            return -1;
+        }
+        return 0;
+    }
+
+    // -----------------------------------------------------------------------
+    // Minterm computation with filtering
+    // -----------------------------------------------------------------------
+
+    void nseq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) {
+        if (!regex)
+            return;
+
+        // compute raw minterms from the regex predicates
+        euf::snode_vector raw;
+        m_sg.compute_minterms(regex, raw);
+
+        // filter: keep only minterms that are non-fail (non-empty character class).
+        // note: minterms are regex character-class expressions, not concrete
+        // characters, so we cannot compute Brzozowski derivatives with them.
+        // callers should compute derivatives using concrete or fresh chars.
+        for (euf::snode* mt : raw) {
+            if (!mt || mt->is_fail())
+                continue;
+            minterms.push_back(mt);
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Collect first characters
+    // -----------------------------------------------------------------------
+
+    void nseq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) {
+        if (!re)
+            return;
+
+        // to_re(s): extract first character of the string body
+        if (re->is_to_re()) {
+            euf::snode* body = re->arg(0);
+            if (body && !body->is_empty()) {
+                euf::snode* first = body->first();
+                if (first && first->is_char()) {
+                    bool dup = false;
+                    for (euf::snode* c : chars)
+                        if (c == first) { dup = true; break; }
+                    if (!dup)
+                        chars.push_back(first);
+                }
+                // Handle string literals (classified as s_other in sgraph)
+                else if (first && first->get_expr()) {
+                    seq_util& seq = m_sg.get_seq_util();
+                    zstring s;
+                    if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) {
+                        euf::snode* ch = m_sg.mk_char(s[0]);
+                        bool dup = false;
+                        for (euf::snode* c : chars)
+                            if (c == ch) { dup = true; break; }
+                        if (!dup)
+                            chars.push_back(ch);
+                    }
+                }
+            }
+            return;
+        }
+
+        // leaf cases: produce representative characters for character classes
+        if (re->is_full_char()) {
+            // full character set (.): use 'a' as representative
+            euf::snode* ch = m_sg.mk_char('a');
+            bool dup = false;
+            for (euf::snode* c : chars)
+                if (c == ch) { dup = true; break; }
+            if (!dup)
+                chars.push_back(ch);
+            return;
+        }
+
+        // re.range(lo, hi): use lo as representative
+        if (re->get_expr()) {
+            seq_util& seq = m_sg.get_seq_util();
+            expr* lo = nullptr, *hi = nullptr;
+            if (seq.re.is_range(re->get_expr(), lo, hi) && lo) {
+                zstring s;
+                unsigned ch_val = 'a';
+                if (seq.is_const_char(lo, ch_val)) {
+                    euf::snode* ch = m_sg.mk_char(ch_val);
+                    bool dup = false;
+                    for (euf::snode* c : chars)
+                        if (c == ch) { dup = true; break; }
+                    if (!dup)
+                        chars.push_back(ch);
+                }
+                return;
+            }
+        }
+
+        if (re->is_fail() || re->is_full_seq())
+            return;
+
+        // recurse into children (handles union, concat, star, loop, etc.)
+        for (unsigned i = 0; i < re->num_args(); ++i)
+            collect_first_chars(re->arg(i), chars);
+    }
+
+    // -----------------------------------------------------------------------
+    // Membership processing
+    // -----------------------------------------------------------------------
+
+    bool nseq_regex::process_str_mem(seq::str_mem const& mem,
+                                     vector<seq::str_mem>& out_mems) {
+        if (!mem.m_str || !mem.m_regex)
+            return true;
+        // empty string: check nullable
+        if (mem.m_str->is_empty())
+            return mem.m_regex->is_nullable();
+
+        // consume ground prefix: derive regex by each leading concrete char
+        seq::str_mem working = mem;
+        simplify_status st = simplify_ground_prefix(working);
+        if (st == simplify_status::conflict)
+            return false;
+        if (st == simplify_status::satisfied)
+            return true;
+
+        // after ground prefix consumption, if the front is still a concrete
+        // character we can take one more step (shouldn't happen after
+        // simplify_ground_prefix, but guard defensively)
+        euf::snode* first = working.m_str->first();
+        if (first && first->is_char()) {
+            seq::str_mem derived = derive(working, first);
+            if (is_empty_regex(derived.m_regex))
+                return false;
+            out_mems.push_back(derived);
+            return true;
+        }
+
+        // string starts with a non-ground element (variable or unit):
+        // return the simplified constraint for the Nielsen graph to expand
+        // via character-split modifiers.
+        out_mems.push_back(working);
+        return true;
+    }
+
+    // -----------------------------------------------------------------------
+    // History recording
+    // -----------------------------------------------------------------------
+
+    seq::str_mem nseq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) {
+        // Build a history chain by prepending the new regex entry to the
+        // existing history. Uses regex-concat as a cons cell:
+        //   new_history = re.concat(history_re, old_history)
+        // where arg(0) is the latest entry and arg(1) is the tail.
+        // If old_history is nullptr, the new entry becomes the terminal leaf.
+        euf::snode* new_history = history_re;
+        if (mem.m_history && history_re) {
+            expr* re_expr = history_re->get_expr();
+            expr* old_expr = mem.m_history->get_expr();
+            if (re_expr && old_expr) {
+                seq_util& seq = m_sg.get_seq_util();
+                expr_ref chain(seq.re.mk_concat(re_expr, old_expr), m_sg.get_manager());
+                new_history = m_sg.mk(chain);
+            }
+        }
+        return seq::str_mem(mem.m_str, mem.m_regex, new_history, mem.m_id, mem.m_dep);
+    }
+
+    // -----------------------------------------------------------------------
+    // Cycle detection
+    // -----------------------------------------------------------------------
+
+    euf::snode* nseq_regex::extract_cycle(seq::str_mem const& mem) const {
+        // Walk the history chain looking for a repeated regex.
+        // A cycle exists when the current regex matches a regex in the history.
+        if (!mem.m_regex || !mem.m_history)
+            return nullptr;
+
+        euf::snode* current = mem.m_regex;
+        euf::snode* hist = mem.m_history;
+
+        // Walk the history chain up to a bounded depth.
+        // The history is structured as a chain of regex snapshots connected
+        // via the sgraph's regex-concat: each level's arg(0) is a snapshot
+        // and arg(1) is the tail. A leaf (non-concat) is a terminal entry.
+        unsigned bound = 1000;
+        while (hist && bound-- > 0) {
+            euf::snode* entry = hist;
+            euf::snode* tail = nullptr;
+
+            // If the history node is a regex concat, decompose it:
+            // arg(0) is the regex snapshot, arg(1) is the rest of the chain
+            seq_util& seq = m_sg.get_seq_util();
+            if (hist->is_concat() && hist->get_expr() &&
+                seq.re.is_concat(hist->get_expr())) {
+                entry = hist->arg(0);
+                tail = hist->arg(1);
+            }
+
+            // Check pointer equality (fast, covers normalized regexes)
+            if (entry == current)
+                return entry;
+            // Check expression-level equality as fallback
+            if (entry->get_expr() && current->get_expr() &&
+                entry->get_expr() == current->get_expr())
+                return entry;
+
+            hist = tail;
+        }
+        return nullptr;
+    }
+
+    // -----------------------------------------------------------------------
+    // Stabilizer from cycle
+    // -----------------------------------------------------------------------
+
+    euf::snode* nseq_regex::stabilizer_from_cycle(euf::snode* cycle_regex,
+                                                   euf::snode* current_regex) {
+        if (!cycle_regex || !current_regex)
+            return nullptr;
+
+        // The stabilizer is the Kleene star of the "cycle body" regex.
+        // If the cycle regex and current regex are the same (pointer equal),
+        // the stabilizer is cycle_regex* (Kleene star).
+        // This mirrors ZIPT's StabilizerFromCycle which extracts the
+        // regex between the cycle entry and current point and wraps it in *.
+
+        // Build cycle_regex* via the sgraph's expression factory
+        expr* re_expr = cycle_regex->get_expr();
+        if (!re_expr)
+            return nullptr;
+
+        seq_util& seq = m_sg.get_seq_util();
+        expr_ref star_expr(seq.re.mk_star(re_expr), m_sg.get_manager());
+        return m_sg.mk(star_expr);
+    }
+
+    // -----------------------------------------------------------------------
+    // Stabilizer-based subsumption
+    // -----------------------------------------------------------------------
+
+    bool nseq_regex::try_subsume(seq::str_mem const& mem) {
+        // Check if the derivation history exhibits a cycle, and if so,
+        // whether the current regex is subsumed by the stabilizer.
+        euf::snode* cycle = extract_cycle(mem);
+        if (!cycle)
+            return false;
+
+        euf::snode* stab = stabilizer_from_cycle(cycle, mem.m_regex);
+        if (!stab)
+            return false;
+
+        // A constraint x ∈ R is subsumed when R ⊆ stab.
+        // For the simple case where cycle == current regex,
+        // R ⊆ R* is always true (since R* accepts everything R does, and more).
+        // This handles the common idempotent cycle case.
+        if (cycle == mem.m_regex)
+            return true;
+
+        // More sophisticated subsumption checks (regex containment)
+        // would require a regex inclusion decision procedure.
+        // For now, only handle the pointer-equality case.
+        return false;
+    }
+
+}
--- a/src/smt/nseq_regex.h
+++ b/src/smt/nseq_regex.h
@ -7,8 +7,20 @@ Module Name:

 Abstract:

-    Regex membership handling using Brzozowski derivatives.
-    Processes str_mem constraints after character consumption.
+    Lazy regex membership processing for the Nielsen-based string solver.
+
+    Provides Brzozowski derivative computation, ground prefix/suffix
+    consumption, cycle detection in derivation histories, and
+    stabilizer-based subsumption for regex membership constraints.
+
+    Ports the following ZIPT StrMem operations:
+      - SimplifyCharRegex / SimplifyDir (ground prefix/suffix consumption)
+      - ExtractCycle / StabilizerFromCycle (cycle detection and widening)
+      - TrySubsume (stabilizer-based subsumption)
+
+    The class wraps sgraph operations (brzozowski_deriv, compute_minterms,
+    drop_first, etc.) and provides a higher-level interface for
+    nielsen_graph and theory_nseq.

 Author:

@ -28,39 +40,146 @@ namespace smt {
    public:
        nseq_regex(euf::sgraph& sg) : m_sg(sg) {}

-        // check if a regex snode represents the empty language
-        bool is_empty_regex(euf::snode* re) const {
-            return re && re->is_fail();
+        euf::sgraph& sg() { return m_sg; }
+
+        // -----------------------------------------------------------------
+        // Basic regex predicates
+        // -----------------------------------------------------------------
+
+        // check if regex is the empty language (∅ / re.empty).
+        // performs structural analysis beyond is_fail() to detect
+        // derived emptiness (e.g., union of empties, concat with empty).
+        bool is_empty_regex(euf::snode* re) const;
+
+        // check if regex is the full language (Σ* / re.all)
+        bool is_full_regex(euf::snode* re) const {
+            return re && re->is_full_seq();
        }

-        // compute derivative of regex re with respect to char elem and
-        // return a new str_mem for the resulting constraint
+        // check if regex accepts the empty string
+        bool is_nullable(euf::snode* re) const {
+            return re && re->is_nullable();
+        }
+
+        // check if regex is ground (no string variables)
+        bool is_ground(euf::snode* re) const {
+            return re && re->is_ground();
+        }
+
+        // -----------------------------------------------------------------
+        // Derivative computation
+        // -----------------------------------------------------------------
+
+        // compute Brzozowski derivative of regex w.r.t. character element.
+        // returns nullptr on failure.
+        euf::snode* derivative(euf::snode* re, euf::snode* elem) {
+            return m_sg.brzozowski_deriv(re, elem);
+        }
+
+        // compute derivative of a str_mem constraint: advance past one character.
+        // the string side is shortened by drop_first and the regex is derived.
        seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) {
            euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem);
            euf::snode* new_str = m_sg.drop_first(mem.m_str);
            return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep);
        }

-        // process a regex membership constraint after one character has been consumed
-        // returns false if the resulting regex is empty (conflict)
-        bool process_str_mem(seq::str_mem const& mem,
-                              vector<seq::str_mem>& out_mems) {
-            if (!mem.m_str || !mem.m_regex)
-                return true;
-            // if regex does not accept the empty string and the string side is empty, conflict
-            if (mem.m_str->is_empty()) {
-                return mem.m_regex->is_nullable();
-            }
-            // compute minterms for the regex
-            euf::snode_vector minterms;
-            m_sg.compute_minterms(mem.m_regex, minterms);
-            for (euf::snode* ch : minterms) {
-                seq::str_mem new_mem = derive(mem, ch);
-                if (!is_empty_regex(new_mem.m_regex))
-                    out_mems.push_back(new_mem);
-            }
-            return true;
+        // -----------------------------------------------------------------
+        // Ground prefix/suffix consumption
+        // -----------------------------------------------------------------
+
+        enum class simplify_status { ok, conflict, satisfied };
+
+        // consume ground characters from the front of mem.m_str by computing
+        // Brzozowski derivatives against mem.m_regex.
+        // stops when:
+        //   - the string front is not a concrete character (ok)
+        //   - a derivative produces ∅ (conflict)
+        //   - the string becomes empty and regex is nullable (satisfied)
+        //   - the string becomes empty and regex is not nullable (conflict)
+        // modifies mem in-place.
+        simplify_status simplify_ground_prefix(seq::str_mem& mem);
+
+        // consume ground characters from the back of mem.m_str by computing
+        // reverse derivatives. modifies mem in-place.
+        // (reverse derivatives require regex reversal; this is a best-effort
+        //  simplification that handles the common case of trailing constants.)
+        simplify_status simplify_ground_suffix(seq::str_mem& mem);
+
+        // -----------------------------------------------------------------
+        // Trivial checks
+        // -----------------------------------------------------------------
+
+        // quick check for trivially sat/unsat membership.
+        //   returns  1 if satisfied (empty string in nullable regex, or full regex)
+        //   returns -1 if conflicting (empty string in non-nullable, or ∅ regex)
+        //   returns  0 if undetermined
+        int check_trivial(seq::str_mem const& mem) const;
+
+        // -----------------------------------------------------------------
+        // Minterm and character computation
+        // -----------------------------------------------------------------
+
+        // compute minterms (character class partition) from regex
+        void compute_minterms(euf::snode* re, euf::snode_vector& minterms) {
+            m_sg.compute_minterms(re, minterms);
        }
+
+        // compute minterms for character splitting, filtering out empty
+        // (fail) minterms.  Minterms are regex character-class expressions
+        // forming a partition of the alphabet; callers use them to drive
+        // fresh-variable creation in character-split modifiers.
+        void get_minterms(euf::snode* regex, euf::snode_vector& minterms);
+
+        // collect concrete first-position characters from a regex.
+        // extracts characters reachable from to_re leaves and simple ranges.
+        void collect_first_chars(euf::snode* re, euf::snode_vector& chars);
+
+        // -----------------------------------------------------------------
+        // Membership processing
+        // -----------------------------------------------------------------
+
+        // process a str_mem constraint by consuming ground characters from
+        // the string front via Brzozowski derivatives.  If the entire ground
+        // prefix is consumed and the constraint is neither satisfied nor
+        // conflicting, the (simplified) constraint is pushed to out_mems
+        // for the Nielsen graph to expand via character-split modifiers.
+        // returns false if the constraint is immediately conflicting
+        // (empty string in non-nullable regex, or derivative yields ∅).
+        bool process_str_mem(seq::str_mem const& mem,
+                             vector<seq::str_mem>& out_mems);
+
+        // -----------------------------------------------------------------
+        // Cycle detection and stabilizers
+        // -----------------------------------------------------------------
+
+        // record current regex in the derivation history of a str_mem.
+        // the history tracks a chain of (regex, id) pairs for cycle detection.
+        // returns the updated str_mem.
+        seq::str_mem record_history(seq::str_mem const& mem, euf::snode* history_re);
+
+        // check if the derivation history of mem contains a cycle, i.e.,
+        // the same regex id appears twice in the history chain.
+        // if found, returns the cycle entry point regex; nullptr otherwise.
+        euf::snode* extract_cycle(seq::str_mem const& mem) const;
+
+        // check if the derivation history exhibits a cycle.
+        // returns true when the current regex matches a previously seen regex
+        // in the history chain. used to trigger stabilizer introduction.
+        bool detect_cycle(seq::str_mem const& mem) const;
+
+        // compute a Kleene star stabilizer from a cycle.
+        // given the regex at the cycle point and the current regex,
+        // builds r* that over-approximates any number of cycle iterations.
+        // returns nullptr if no stabilizer can be computed.
+        euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex,
+                                          euf::snode* current_regex);
+
+        // try to subsume a str_mem constraint using stabilizer-based
+        // reasoning: if extract_cycle finds a cycle, check whether
+        // the current regex is already covered by the stabilizer.
+        // returns true if the constraint can be dropped.
+        bool try_subsume(seq::str_mem const& mem);
    };

 }
--- a/src/smt/nseq_state.h
+++ b/src/smt/nseq_state.h
@ -21,15 +21,48 @@ Author:
 #include "util/vector.h"
 #include "ast/euf/euf_sgraph.h"
 #include "smt/seq/seq_nielsen.h"
+#include "smt/smt_literal.h"

 namespace smt {

+    class enode;
+
+    // source info for a string equality (the two enodes whose merge caused it)
+    struct eq_source {
+        enode* m_n1;
+        enode* m_n2;
+    };
+
+    // source info for a regex membership (the literal that asserted it)
+    struct mem_source {
+        literal m_lit;
+    };
+
+    // source info for a string disequality
+    struct diseq_source {
+        enode* m_n1;
+        enode* m_n2;
+    };
+
+    // negative regex membership: ¬(str in regex)
+    struct neg_mem_entry {
+        euf::snode* m_str;
+        euf::snode* m_regex;
+        literal     m_lit;
+    };
+
    class nseq_state {
        euf::sgraph&            m_sg;
        vector<seq::str_eq>     m_str_eqs;
        vector<seq::str_mem>    m_str_mems;
+        vector<eq_source>       m_eq_sources;
+        vector<mem_source>      m_mem_sources;
+        vector<diseq_source>    m_diseqs;
+        vector<neg_mem_entry>   m_neg_mems;
        unsigned_vector         m_str_eq_lim;
        unsigned_vector         m_str_mem_lim;
+        unsigned_vector         m_diseq_lim;
+        unsigned_vector         m_neg_mem_lim;
        unsigned                m_next_mem_id = 0;

    public:
@ -38,37 +71,68 @@ namespace smt {
        void push() {
            m_str_eq_lim.push_back(m_str_eqs.size());
            m_str_mem_lim.push_back(m_str_mems.size());
+            m_diseq_lim.push_back(m_diseqs.size());
+            m_neg_mem_lim.push_back(m_neg_mems.size());
        }

        void pop(unsigned n) {
            for (unsigned i = 0; i < n; ++i) {
                m_str_eqs.shrink(m_str_eq_lim.back());
+                m_eq_sources.shrink(m_str_eq_lim.back());
                m_str_eq_lim.pop_back();
                m_str_mems.shrink(m_str_mem_lim.back());
+                m_mem_sources.shrink(m_str_mem_lim.back());
                m_str_mem_lim.pop_back();
+                m_diseqs.shrink(m_diseq_lim.back());
+                m_diseq_lim.pop_back();
+                m_neg_mems.shrink(m_neg_mem_lim.back());
+                m_neg_mem_lim.pop_back();
            }
        }

-        void add_str_eq(euf::snode* lhs, euf::snode* rhs) {
+        void add_str_eq(euf::snode* lhs, euf::snode* rhs, enode* n1, enode* n2) {
            seq::dep_tracker dep;
            m_str_eqs.push_back(seq::str_eq(lhs, rhs, dep));
+            m_eq_sources.push_back({n1, n2});
        }

-        void add_str_mem(euf::snode* str, euf::snode* regex) {
+        void add_str_mem(euf::snode* str, euf::snode* regex, literal lit) {
            seq::dep_tracker dep;
            m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep));
+            m_mem_sources.push_back({lit});
+        }
+
+        void add_diseq(enode* n1, enode* n2) {
+            m_diseqs.push_back({n1, n2});
+        }
+
+        void add_neg_mem(euf::snode* str, euf::snode* regex, literal lit) {
+            m_neg_mems.push_back({str, regex, lit});
        }

        vector<seq::str_eq> const&  str_eqs()  const { return m_str_eqs; }
        vector<seq::str_mem> const& str_mems() const { return m_str_mems; }
+        vector<diseq_source> const& diseqs()   const { return m_diseqs; }
+        vector<neg_mem_entry> const& neg_mems() const { return m_neg_mems; }

-        bool empty() const { return m_str_eqs.empty() && m_str_mems.empty(); }
+        eq_source const& get_eq_source(unsigned i) const { return m_eq_sources[i]; }
+        mem_source const& get_mem_source(unsigned i) const { return m_mem_sources[i]; }
+        diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; }
+        neg_mem_entry const& get_neg_mem(unsigned i) const { return m_neg_mems[i]; }
+
+        bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_neg_mems.empty() && m_diseqs.empty(); }

        void reset() {
            m_str_eqs.reset();
            m_str_mems.reset();
+            m_eq_sources.reset();
+            m_mem_sources.reset();
+            m_diseqs.reset();
+            m_neg_mems.reset();
            m_str_eq_lim.reset();
            m_str_mem_lim.reset();
+            m_diseq_lim.reset();
+            m_neg_mem_lim.reset();
        }
    };

--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@ -183,22 +183,27 @@ Abstract:
      detection during character substitution are not ported.

    Modifier hierarchy (Constraints/Modifier/):
-    - All ~15 Modifier subclasses driving graph expansion are not ported:
-      VarNielsenModifier, ConstNielsenModifier, DirectedNielsenModifier,
-      EqSplitModifier, RegexVarSplitModifier, RegexCharSplitModifier,
-      StarIntrModifier, PowerSplitModifier, GPowerIntrModifier,
-      NumCmpModifier, NumUnwindingModifier, PowerEpsilonModifier,
-      DecomposeModifier, CombinedModifier, DetModifier.
-    - The modifier pattern (each Modifier produces one or more child nodes by
-      applying substitutions + side conditions to the parent node) is not ported.
+    - 13 Modifier subclasses driving graph expansion are ported as
+      apply_* methods in generate_extensions, matching ZIPT's TypeOrder
+      priority: DetModifier(1), PowerEpsilonModifier(2), NumCmpModifier(3),
+      ConstNumUnwindingModifier(4), EqSplitModifier(5), StarIntrModifier(6),
+      GPowerIntrModifier(7), ConstNielsenModifier(8), RegexCharSplitModifier(9),
+      RegexVarSplitModifier(10), PowerSplitModifier(11), VarNielsenModifier(12),
+      VarNumUnwindingModifier(13).
+    - NOT PORTED: DirectedNielsenModifier, DecomposeModifier, CombinedModifier.
+    - NumCmp, ConstNumUnwinding, VarNumUnwinding are approximated (no PDD
+      integer polynomial infrastructure; power tokens are replaced with ε
+      or peeled with fresh variables instead of exact exponent arithmetic).

    Search procedure:
-    - NielsenNode.GraphExpansion(): the recursive search with iterative deepening
-      (depth-bounded DFS with SAT/UNSAT/CYCLIC return codes) is not ported.
-    - NielsenNode.SimplifyAndInit(): the simplification-and-initialization pass
-      run at node creation is not ported.
-    - NielsenGraph.Check(): the top-level entry point with iterative deepening,
-      inner solver setup and subsumption-node lookup is not ported.
+    - NielsenGraph.Check() / NielsenNode.GraphExpansion(): ported as
+      nielsen_graph::solve() (iterative deepening, 6 rounds starting at
+      depth 10, doubling) and search_dfs() (depth-bounded DFS with
+      eval_idx cycle detection and node status tracking). The inner solver
+      setup and subsumption-node lookup within Check() are not ported.
+    - NielsenNode.SimplifyAndInit(): ported as
+      nielsen_node::simplify_and_init() with prefix matching, symbol clash,
+      empty propagation, and Brzozowski derivative consumption.
    - NielsenGraph.FindExisting(): the subsumption cache lookup over
      subsumptionCandidates is not ported.

@ -231,6 +236,7 @@ Author:
 #include "util/vector.h"
 #include "util/uint_set.h"
 #include "ast/ast.h"
+#include "ast/arith_decl_plugin.h"
 #include "ast/seq_decl_plugin.h"
 #include "ast/euf/euf_sgraph.h"

@ -281,6 +287,9 @@ namespace seq {
        bool is_superset(dep_tracker const& other) const;
        bool empty() const;

+        // collect indices of all set bits into 'indices'
+        void get_set_bits(unsigned_vector& indices) const;
+
        bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; }
        bool operator!=(dep_tracker const& other) const { return !(*this == other); }
    };
@ -353,6 +362,24 @@ namespace seq {
        }
    };

+    // kind of length constraint determines propagation strategy
+    enum class length_kind {
+        nonneg,   // len(x) >= 0: unconditional axiom
+        eq,       // len(lhs) = len(rhs): conditional on string equality
+        bound     // Parikh bound: conditional on regex membership
+    };
+
+    // arithmetic length constraint derived from string equations
+    struct length_constraint {
+        expr_ref    m_expr;  // arithmetic expression (e.g., len(x) + len(y) = len(a) + 1)
+        dep_tracker m_dep;   // tracks which input constraints contributed
+        length_kind m_kind;  // determines propagation strategy
+
+        length_constraint(ast_manager& m): m_expr(m), m_kind(length_kind::nonneg) {}
+        length_constraint(expr* e, dep_tracker const& dep, length_kind kind, ast_manager& m):
+            m_expr(e, m), m_dep(dep), m_kind(kind) {}
+    };
+
    // edge in the Nielsen graph connecting two nodes
    // mirrors ZIPT's NielsenEdge
    class nielsen_edge {
@ -469,6 +496,39 @@ namespace seq {

        // true if other's constraint set is a subset of this node's
        bool is_subsumed_by(nielsen_node const& other) const;
+
+        // true if any constraint has opaque (s_other) terms that
+        // the Nielsen graph cannot decompose
+        bool has_opaque_terms() const;
+    };
+
+    // search statistics collected during Nielsen graph solving
+    struct nielsen_stats {
+        unsigned m_num_solve_calls     = 0;
+        unsigned m_num_dfs_nodes       = 0;
+        unsigned m_num_sat             = 0;
+        unsigned m_num_unsat           = 0;
+        unsigned m_num_unknown         = 0;
+        unsigned m_num_simplify_conflict = 0;
+        unsigned m_num_subsumptions    = 0;
+        unsigned m_num_extensions      = 0;
+        unsigned m_num_fresh_vars      = 0;
+        unsigned m_max_depth           = 0;
+        // modifier application counts
+        unsigned m_mod_det             = 0;
+        unsigned m_mod_power_epsilon   = 0;
+        unsigned m_mod_num_cmp         = 0;
+        unsigned m_mod_const_num_unwinding = 0;
+        unsigned m_mod_eq_split        = 0;
+        unsigned m_mod_star_intr       = 0;
+        unsigned m_mod_gpower_intr     = 0;
+        unsigned m_mod_const_nielsen   = 0;
+        unsigned m_mod_regex_char_split = 0;
+        unsigned m_mod_regex_var_split = 0;
+        unsigned m_mod_power_split     = 0;
+        unsigned m_mod_var_nielsen     = 0;
+        unsigned m_mod_var_num_unwinding = 0;
+        void reset() { memset(this, 0, sizeof(nielsen_stats)); }
    };

    // the overall Nielsen transformation graph
@ -482,6 +542,10 @@ namespace seq {
        unsigned                      m_run_idx = 0;
        unsigned                      m_depth_bound = 0;
        unsigned                      m_next_mem_id = 0;
+        unsigned                      m_fresh_cnt = 0;
+        unsigned                      m_num_input_eqs = 0;
+        unsigned                      m_num_input_mems = 0;
+        nielsen_stats                 m_stats;

    public:
        nielsen_graph(euf::sgraph& sg);
@ -519,6 +583,10 @@ namespace seq {
        // generate next unique regex membership id
        unsigned next_mem_id() { return m_next_mem_id++; }

+        // number of input constraints (for dep_tracker bit mapping)
+        unsigned num_input_eqs() const { return m_num_input_eqs; }
+        unsigned num_input_mems() const { return m_num_input_mems; }
+
        // display for debugging
        std::ostream& display(std::ostream& out) const;

@ -541,8 +609,111 @@ namespace seq {
        // collect dependency information from conflicting constraints
        void collect_conflict_deps(dep_tracker& deps) const;

+        // explain a conflict: partition the set bits into str_eq indices
+        // (bits 0..num_eqs-1) and str_mem indices (bits num_eqs..num_eqs+num_mems-1).
+        // Must be called after solve() returns unsat.
+        void explain_conflict(unsigned_vector& eq_indices, unsigned_vector& mem_indices) const;
+
+        // accumulated search statistics
+        nielsen_stats const& stats() const { return m_stats; }
+        void reset_stats() { m_stats.reset(); }
+
+        // generate arithmetic length constraints from the root node's string
+        // equalities and regex memberships. For each non-trivial equation lhs = rhs,
+        // produces len(lhs) = len(rhs) by expanding concatenations into sums.
+        // For each regex membership str in regex, produces Parikh interval
+        // constraints: len(str) >= min_len and len(str) <= max_len.
+        // Also generates len(x) >= 0 for each variable appearing in the equations.
+        void generate_length_constraints(vector<length_constraint>& constraints);
+
    private:
        search_result search_dfs(nielsen_node* node, unsigned depth);
+
+        // create a fresh variable with a unique name
+        euf::snode* mk_fresh_var();
+
+        // deterministic modifier: var = ε, same-head cancel
+        bool apply_det_modifier(nielsen_node* node);
+
+        // const nielsen modifier: char vs var (2 branches per case)
+        bool apply_const_nielsen(nielsen_node* node);
+
+        // variable Nielsen modifier: var vs var, all progress (3 branches)
+        bool apply_var_nielsen(nielsen_node* node);
+
+        // eq split modifier: var vs var (3 branches)
+        bool apply_eq_split(nielsen_node* node);
+
+        // apply regex character split modifier to a node.
+        // for a str_mem constraint x·s ∈ R where x is a variable:
+        //   (1) x → c·z for each char c accepted by R at first position
+        //   (2) x → ε (x is empty)
+        // returns true if children were generated.
+        bool apply_regex_char_split(nielsen_node* node);
+
+        // power epsilon modifier: for a power token u^n in an equation,
+        // branch: (1) base u = ε, (2) power is empty (n = 0 semantics).
+        // mirrors ZIPT's PowerEpsilonModifier
+        bool apply_power_epsilon(nielsen_node* node);
+
+        // numeric comparison modifier: for equations involving power tokens
+        // u^m and u^n with the same base, branch on m < n vs n <= m.
+        // mirrors ZIPT's NumCmpModifier
+        bool apply_num_cmp(nielsen_node* node);
+
+        // constant numeric unwinding: for a power token u^n vs a constant
+        // (non-variable), branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u).
+        // mirrors ZIPT's ConstNumUnwindingModifier
+        bool apply_const_num_unwinding(nielsen_node* node);
+
+        // star introduction: for a str_mem x·s ∈ R where a cycle is detected
+        // (backedge exists), introduce stabilizer: x ∈ base* with x split.
+        // mirrors ZIPT's StarIntrModifier
+        bool apply_star_intr(nielsen_node* node);
+
+        // generalized power introduction: for a variable x matched against
+        // a ground repeated pattern, introduce x = base^n · prefix(base)
+        // with fresh power variable n and side constraint n >= 0.
+        // mirrors ZIPT's GPowerIntrModifier
+        bool apply_gpower_intr(nielsen_node* node);
+
+        // regex variable split: for str_mem x·s ∈ R where x is a variable,
+        // split using minterms: x → ε, or x → c·x' for each minterm c.
+        // More general than regex_char_split, uses minterm partitioning.
+        // mirrors ZIPT's RegexVarSplitModifier
+        bool apply_regex_var_split(nielsen_node* node);
+
+        // power split: for a variable x facing a power token u^n,
+        // branch: x = u^m · prefix(u) with m < n, or x = u^n · x.
+        // mirrors ZIPT's PowerSplitModifier
+        bool apply_power_split(nielsen_node* node);
+
+        // variable numeric unwinding: for a power token u^n vs a variable,
+        // branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u).
+        // mirrors ZIPT's VarNumUnwindingModifier
+        bool apply_var_num_unwinding(nielsen_node* node);
+
+        // collect concrete first-position characters from a regex snode
+        void collect_first_chars(euf::snode* re, euf::snode_vector& chars);
+
+        // find the first power token in any str_eq at this node
+        euf::snode* find_power_token(nielsen_node* node) const;
+
+        // find a power token facing a constant (char) head
+        bool find_power_vs_const(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;
+
+        // find a power token facing a variable head
+        bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const;
+
+        // build an arithmetic expression representing the length of an snode tree.
+        // concatenations are expanded to sums, chars to 1, empty to 0,
+        // variables to (str.len var_expr).
+        expr_ref compute_length_expr(euf::snode* n);
+
+        // compute Parikh length interval [min_len, max_len] for a regex snode.
+        // uses seq_util::rex min_length/max_length on the underlying expression.
+        // max_len == UINT_MAX means unbounded.
+        void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);
    };

 }
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@ -17,6 +17,10 @@ Author:
 --*/
 #include "smt/theory_nseq.h"
 #include "smt/smt_context.h"
+#include "smt/smt_justification.h"
+#include "smt/proto_model/proto_model.h"
+#include "ast/array_decl_plugin.h"
+#include "ast/ast_pp.h"
 #include "util/statistics.h"

 namespace smt {
@ -26,43 +30,102 @@ namespace smt {
        m_seq(ctx.get_manager()),
        m_autil(ctx.get_manager()),
        m_rewriter(ctx.get_manager()),
+        m_arith_value(ctx.get_manager()),
        m_egraph(ctx.get_manager()),
        m_sgraph(ctx.get_manager(), m_egraph),
        m_nielsen(m_sgraph),
-        m_state(m_sgraph)
+        m_state(m_sgraph),
+        m_regex(m_sgraph),
+        m_model(*this, ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex)
    {}

+    // -----------------------------------------------------------------------
+    // Initialization
+    // -----------------------------------------------------------------------
+
+    void theory_nseq::init() {
+        m_arith_value.init(&get_context());
+    }
+
    // -----------------------------------------------------------------------
    // Internalization
    // -----------------------------------------------------------------------

    bool theory_nseq::internalize_atom(app* atom, bool /*gate_ctx*/) {
+        context& ctx = get_context();
+        ast_manager& m = get_manager();
+
+        // str.in_re atoms are boolean predicates: register as bool_var
+        // so that assign_eh fires when the SAT solver assigns them.
+        // Following theory_seq: create a bool_var directly without an enode
+        // for the str.in_re predicate (avoids needing to internalize the regex arg).
+        if (m_seq.str.is_in_re(atom)) {
+            expr* str_arg = atom->get_arg(0);
+            mk_var(ensure_enode(str_arg));
+            if (!ctx.b_internalized(atom)) {
+                bool_var bv = ctx.mk_bool_var(atom);
+                ctx.set_var_theory(bv, get_id());
+                ctx.mark_as_relevant(bv);
+            }
+            get_snode(str_arg);
+            return true;
+        }
        return internalize_term(atom);
    }

+    theory_var theory_nseq::mk_var(enode* n) {
+        expr* o = n->get_expr();
+        if (!m_seq.is_seq(o) && !m_seq.is_re(o) && !m_seq.str.is_nth_u(o))
+            return null_theory_var;
+        if (is_attached_to_var(n))
+            return n->get_th_var(get_id());
+        theory_var v = theory::mk_var(n);
+        get_context().attach_th_var(n, this, v);
+        get_context().mark_as_relevant(n);
+        return v;
+    }
+
    bool theory_nseq::internalize_term(app* term) {
        context& ctx = get_context();
        ast_manager& m = get_manager();

-        // ensure children are internalized first
-        for (expr* arg : *term) {
-            if (is_app(arg) && m_seq.is_seq(arg)) {
-                ctx.internalize(arg, false);
-            }
+        // ensure ALL children are internalized (following theory_seq pattern)
+        for (auto arg : *term)
+            mk_var(ensure_enode(arg));
+
+        if (ctx.e_internalized(term)) {
+            mk_var(ctx.get_enode(term));
+            return true;
        }

-        if (!ctx.e_internalized(term)) {
-            ctx.mk_enode(term, false, m.is_bool(term), true);
+        if (m.is_bool(term)) {
+            bool_var bv = ctx.mk_bool_var(term);
+            ctx.set_var_theory(bv, get_id());
+            ctx.mark_as_relevant(bv);
        }

-        enode* en = ctx.get_enode(term);
-        if (!is_attached_to_var(en)) {
-            theory_var v = mk_var(en);
-            (void)v;
+        enode* en;
+        if (ctx.e_internalized(term)) {
+            en = ctx.get_enode(term);
        }
+        else {
+            en = ctx.mk_enode(term, false, m.is_bool(term), true);
+        }
+        mk_var(en);

        // register in our private sgraph
        get_snode(term);
+
+        // track higher-order terms for lazy unfolding
+        expr* ho_f = nullptr, *ho_s = nullptr, *ho_b = nullptr, *ho_i = nullptr;
+        if (m_seq.str.is_map(term, ho_f, ho_s) ||
+            m_seq.str.is_mapi(term, ho_f, ho_i, ho_s) ||
+            m_seq.str.is_foldl(term, ho_f, ho_b, ho_s) ||
+            m_seq.str.is_foldli(term, ho_f, ho_i, ho_b, ho_s)) {
+            m_ho_terms.push_back(term);
+            ensure_length_var(ho_s);
+        }
+
        return true;
    }

@ -73,16 +136,73 @@ namespace smt {
    void theory_nseq::new_eq_eh(theory_var v1, theory_var v2) {
        expr* e1 = get_enode(v1)->get_expr();
        expr* e2 = get_enode(v2)->get_expr();
+        if (m_seq.is_re(e1)) {
+            ++m_num_unhandled_bool;
+            return;
+        }
        if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2))
            return;
        euf::snode* s1 = get_snode(e1);
        euf::snode* s2 = get_snode(e2);
-        if (s1 && s2)
-            m_state.add_str_eq(s1, s2);
+        if (s1 && s2) {
+            unsigned idx = m_state.str_eqs().size();
+            m_state.add_str_eq(s1, s2, get_enode(v1), get_enode(v2));
+            m_prop_queue.push_back({prop_item::eq_prop, idx});
+        }
    }

-    void theory_nseq::new_diseq_eh(theory_var /*v1*/, theory_var /*v2*/) {
-        // not handled in this initial skeleton
+    void theory_nseq::new_diseq_eh(theory_var v1, theory_var v2) {
+        expr* e1 = get_enode(v1)->get_expr();
+        expr* e2 = get_enode(v2)->get_expr();
+        if (m_seq.is_re(e1)) {
+            // regex disequality: nseq cannot verify language non-equivalence
+            ++m_num_unhandled_bool;
+            return;
+        }
+        if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2))
+            return;
+        unsigned idx = m_state.diseqs().size();
+        m_state.add_diseq(get_enode(v1), get_enode(v2));
+        m_prop_queue.push_back({prop_item::diseq_prop, idx});
+    }
+
+    // -----------------------------------------------------------------------
+    // Boolean assignment notification
+    // -----------------------------------------------------------------------
+
+    void theory_nseq::assign_eh(bool_var v, bool is_true) {
+        context& ctx = get_context();
+        expr* e = ctx.bool_var2expr(v);
+        expr* s = nullptr;
+        expr* re = nullptr;
+        if (!m_seq.str.is_in_re(e, s, re)) {
+            // Track unhandled boolean string predicates (prefixof, contains, etc.)
+            if (is_app(e) && to_app(e)->get_family_id() == m_seq.get_family_id())
+                ++m_num_unhandled_bool;
+            return;
+        }
+        euf::snode* sn_str = get_snode(s);
+        euf::snode* sn_re  = get_snode(re);
+        if (!sn_str || !sn_re)
+            return;
+
+        if (is_true) {
+            unsigned idx = m_state.str_mems().size();
+            literal lit(v, false);
+            m_state.add_str_mem(sn_str, sn_re, lit);
+            m_prop_queue.push_back({prop_item::pos_mem_prop, idx});
+        }
+        else {
+            unsigned idx = m_state.neg_mems().size();
+            literal lit(v, true);
+            m_state.add_neg_mem(sn_str, sn_re, lit);
+            m_prop_queue.push_back({prop_item::neg_mem_prop, idx});
+        }
+
+        TRACE(seq, tout << "nseq assign_eh: " << (is_true ? "" : "¬")
+                        << "str.in_re "
+                        << mk_bounded_pp(s, get_manager(), 3) << " in "
+                        << mk_bounded_pp(re, get_manager(), 3) << "\n";);
    }

    // -----------------------------------------------------------------------
@ -93,12 +213,139 @@ namespace smt {
        theory::push_scope_eh();
        m_state.push();
        m_sgraph.push();
+        m_prop_lim.push_back(m_prop_queue.size());
+        m_ho_lim.push_back(m_ho_terms.size());
+        m_unhandled_bool_lim.push_back(m_num_unhandled_bool);
    }

    void theory_nseq::pop_scope_eh(unsigned num_scopes) {
        theory::pop_scope_eh(num_scopes);
        m_state.pop(num_scopes);
        m_sgraph.pop(num_scopes);
+        unsigned new_sz = m_prop_lim[m_prop_lim.size() - num_scopes];
+        m_prop_queue.shrink(new_sz);
+        m_prop_lim.shrink(m_prop_lim.size() - num_scopes);
+        if (m_prop_qhead > m_prop_queue.size())
+            m_prop_qhead = m_prop_queue.size();
+        unsigned ho_sz = m_ho_lim[m_ho_lim.size() - num_scopes];
+        m_ho_terms.shrink(ho_sz);
+        m_ho_lim.shrink(m_ho_lim.size() - num_scopes);
+        m_num_unhandled_bool = m_unhandled_bool_lim[m_unhandled_bool_lim.size() - num_scopes];
+        m_unhandled_bool_lim.shrink(m_unhandled_bool_lim.size() - num_scopes);
+    }
+
+    // -----------------------------------------------------------------------
+    // Propagation: eager eq/diseq/literal dispatch
+    // -----------------------------------------------------------------------
+
+    bool theory_nseq::can_propagate() {
+        return m_prop_qhead < m_prop_queue.size();
+    }
+
+    void theory_nseq::propagate() {
+        context& ctx = get_context();
+        while (m_prop_qhead < m_prop_queue.size() && !ctx.inconsistent()) {
+            prop_item const& item = m_prop_queue[m_prop_qhead++];
+            switch (item.m_kind) {
+            case prop_item::eq_prop:
+                propagate_eq(item.m_idx);
+                break;
+            case prop_item::diseq_prop:
+                propagate_diseq(item.m_idx);
+                break;
+            case prop_item::pos_mem_prop:
+                propagate_pos_mem(item.m_idx);
+                break;
+            case prop_item::neg_mem_prop:
+                propagate_neg_mem(item.m_idx);
+                break;
+            }
+        }
+    }
+
+    void theory_nseq::propagate_eq(unsigned idx) {
+        // When s1 = s2 is learned, ensure len(s1) and len(s2) are
+        // internalized so congruence closure propagates len(s1) = len(s2).
+        eq_source const& src = m_state.get_eq_source(idx);
+        ensure_length_var(src.m_n1->get_expr());
+        ensure_length_var(src.m_n2->get_expr());
+    }
+
+    void theory_nseq::propagate_diseq(unsigned idx) {
+        // Disequalities are recorded for use during final_check.
+        // No eager propagation beyond recording.
+        TRACE(seq,
+            auto const& d = m_state.get_diseq(idx);
+            tout << "nseq diseq: "
+                 << mk_bounded_pp(d.m_n1->get_expr(), get_manager(), 3)
+                 << " != "
+                 << mk_bounded_pp(d.m_n2->get_expr(), get_manager(), 3) << "\n";);
+    }
+
+    void theory_nseq::propagate_pos_mem(unsigned idx) {
+        auto const& mem = m_state.str_mems()[idx];
+        auto const& src = m_state.get_mem_source(idx);
+
+        if (!mem.m_str || !mem.m_regex)
+            return;
+
+        // regex is ∅ → conflict
+        if (m_regex.is_empty_regex(mem.m_regex)) {
+            enode_pair_vector eqs;
+            literal_vector lits;
+            lits.push_back(src.m_lit);
+            set_conflict(eqs, lits);
+            return;
+        }
+
+        // empty string in non-nullable regex → conflict
+        if (mem.m_str->is_empty() && !mem.m_regex->is_nullable()) {
+            enode_pair_vector eqs;
+            literal_vector lits;
+            lits.push_back(src.m_lit);
+            set_conflict(eqs, lits);
+            return;
+        }
+
+        // ensure length term exists for the string argument
+        expr* s_expr = mem.m_str->get_expr();
+        if (s_expr)
+            ensure_length_var(s_expr);
+    }
+
+    void theory_nseq::propagate_neg_mem(unsigned idx) {
+        auto const& entry = m_state.get_neg_mem(idx);
+
+        if (!entry.m_str || !entry.m_regex)
+            return;
+
+        // ¬(s in Σ*) is always false → conflict
+        if (m_regex.is_full_regex(entry.m_regex)) {
+            enode_pair_vector eqs;
+            literal_vector lits;
+            lits.push_back(entry.m_lit);
+            set_conflict(eqs, lits);
+            return;
+        }
+
+        // ¬(ε in R) where R is nullable → conflict
+        if (entry.m_str->is_empty() && entry.m_regex->is_nullable()) {
+            enode_pair_vector eqs;
+            literal_vector lits;
+            lits.push_back(entry.m_lit);
+            set_conflict(eqs, lits);
+            return;
+        }
+    }
+
+    void theory_nseq::ensure_length_var(expr* e) {
+        if (!e || !m_seq.is_seq(e))
+            return;
+        context& ctx = get_context();
+        ast_manager& m = get_manager();
+        expr_ref len(m_seq.str.mk_length(e), m);
+        if (!ctx.e_internalized(len))
+            ctx.internalize(len, false);
    }

    // -----------------------------------------------------------------------
@ -107,30 +354,162 @@ namespace smt {

    void theory_nseq::populate_nielsen_graph() {
        m_nielsen.reset();
-        seq::nielsen_node* root = m_nielsen.mk_node();
-        m_nielsen.set_root(root);
-        for (auto const& eq : m_state.str_eqs())
-            root->add_str_eq(eq);
-        for (auto const& mem : m_state.str_mems())
-            root->add_str_mem(mem);
+        m_nielsen_to_state_mem.reset();
+
+        // transfer string equalities from state to nielsen graph root
+        for (auto const& eq : m_state.str_eqs()) {
+            m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs);
+        }
+
+        // transfer regex memberships, pre-processing through nseq_regex
+        // to consume ground prefixes via Brzozowski derivatives
+        for (unsigned state_idx = 0; state_idx < m_state.str_mems().size(); ++state_idx) {
+            auto const& mem = m_state.str_mems()[state_idx];
+            int triv = m_regex.check_trivial(mem);
+            if (triv > 0)
+                continue;  // trivially satisfied, skip
+            if (triv < 0) {
+                // trivially unsat: add anyway so solve() detects conflict
+                m_nielsen.add_str_mem(mem.m_str, mem.m_regex);
+                m_nielsen_to_state_mem.push_back(state_idx);
+                continue;
+            }
+            // pre-process: consume ground prefix characters
+            vector<seq::str_mem> processed;
+            if (!m_regex.process_str_mem(mem, processed)) {
+                // conflict during ground prefix consumption
+                m_nielsen.add_str_mem(mem.m_str, mem.m_regex);
+                m_nielsen_to_state_mem.push_back(state_idx);
+                continue;
+            }
+            for (auto const& pm : processed) {
+                m_nielsen.add_str_mem(pm.m_str, pm.m_regex);
+                m_nielsen_to_state_mem.push_back(state_idx);
+            }
+        }
+
+        TRACE(seq, tout << "nseq populate: " << m_state.str_eqs().size() << " eqs, "
+                        << m_state.str_mems().size() << " mems -> nielsen root with "
+                        << m_nielsen.num_input_eqs() << " eqs, "
+                        << m_nielsen.num_input_mems() << " mems\n";);
    }

    final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) {
+        // Always assert non-negativity for all string theory vars,
+        // even when there are no string equations/memberships.
+        if (assert_nonneg_for_all_vars())
+            return FC_CONTINUE;
+
+        // If there are unhandled boolean string predicates (prefixof, contains, etc.)
+        // we cannot declare sat — return unknown.
+        if (has_unhandled_preds())
+            return FC_GIVEUP;
+
+        if (m_state.empty() && m_ho_terms.empty())
+            return FC_DONE;
+
+        // unfold higher-order terms when sequence structure is known
+        if (unfold_ho_terms())
+            return FC_CONTINUE;
+
        if (m_state.empty())
            return FC_DONE;
-        // For now, give up if there are string constraints.
-        // The full search will be wired in once the Nielsen algorithms are complete.
+
        populate_nielsen_graph();
-        ++m_num_nodes_explored;
+
+        // assert length constraints derived from string equalities
+        if (assert_length_constraints())
+            return FC_CONTINUE;
+
+        ++m_num_final_checks;
+
+        auto result = m_nielsen.solve();
+
+        if (result == seq::nielsen_graph::search_result::sat) {
+            // Nielsen found a consistent assignment for positive constraints.
+            // If there are negative memberships or disequalities we haven't verified,
+            // we cannot soundly declare sat.
+            if (!m_state.neg_mems().empty() || !m_state.diseqs().empty())
+                return FC_GIVEUP;
+            return FC_DONE;
+        }
+
+        if (result == seq::nielsen_graph::search_result::unsat) {
+            explain_nielsen_conflict();
+            return FC_CONTINUE;
+        }
+
        return FC_GIVEUP;
    }

+    // -----------------------------------------------------------------------
+    // Conflict explanation
+    // -----------------------------------------------------------------------
+
+    void theory_nseq::deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits) {
+        context& ctx = get_context();
+        unsigned_vector bits;
+        deps.get_set_bits(bits);
+        unsigned num_input_eqs = m_nielsen.num_input_eqs();
+        for (unsigned b : bits) {
+            if (b < num_input_eqs) {
+                eq_source const& src = m_state.get_eq_source(b);
+                if (src.m_n1->get_root() == src.m_n2->get_root())
+                    eqs.push_back({src.m_n1, src.m_n2});
+            }
+            else {
+                unsigned mem_idx = b - num_input_eqs;
+                if (mem_idx < m_nielsen_to_state_mem.size()) {
+                    unsigned state_mem_idx = m_nielsen_to_state_mem[mem_idx];
+                    mem_source const& src = m_state.get_mem_source(state_mem_idx);
+                    if (ctx.get_assignment(src.m_lit) == l_true)
+                        lits.push_back(src.m_lit);
+                }
+            }
+        }
+    }
+
+    void theory_nseq::add_conflict_clause(seq::dep_tracker const& deps) {
+        enode_pair_vector eqs;
+        literal_vector lits;
+        deps_to_lits(deps, eqs, lits);
+        ++m_num_conflicts;
+        set_conflict(eqs, lits);
+    }
+
+    void theory_nseq::explain_nielsen_conflict() {
+        seq::dep_tracker deps;
+        m_nielsen.collect_conflict_deps(deps);
+        add_conflict_clause(deps);
+    }
+
+    void theory_nseq::set_conflict(enode_pair_vector const& eqs, literal_vector const& lits) {
+        context& ctx = get_context();
+        TRACE(seq, tout << "nseq conflict: " << eqs.size() << " eqs, " << lits.size() << " lits\n";);
+        ctx.set_conflict(
+            ctx.mk_justification(
+                ext_theory_conflict_justification(
+                    get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data(), 0, nullptr)));
+    }
+
    // -----------------------------------------------------------------------
    // Model generation
    // -----------------------------------------------------------------------

-    void theory_nseq::init_model(model_generator& /*mg*/) {
-        // stub – no model assignment for now
+    void theory_nseq::init_model(model_generator& mg) {
+        m_model.init(mg, m_nielsen, m_state);
+    }
+
+    model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) {
+        return m_model.mk_value(n, mg);
+    }
+
+    void theory_nseq::finalize_model(model_generator& mg) {
+        m_model.finalize(mg);
+    }
+
+    void theory_nseq::validate_model(proto_model& mdl) {
+        m_model.validate_regex(m_state, mdl);
    }

    // -----------------------------------------------------------------------
@ -139,14 +518,47 @@ namespace smt {

    void theory_nseq::collect_statistics(::statistics& st) const {
        st.update("nseq conflicts",       m_num_conflicts);
-        st.update("nseq nodes explored",  m_num_nodes_explored);
-        st.update("nseq depth increases", m_num_depth_increases);
+        st.update("nseq final checks",    m_num_final_checks);
+        st.update("nseq length axioms",   m_num_length_axioms);
+
+        // Nielsen graph search metrics
+        auto const& ns = m_nielsen.stats();
+        st.update("nseq solve calls",     ns.m_num_solve_calls);
+        st.update("nseq dfs nodes",       ns.m_num_dfs_nodes);
+        st.update("nseq sat",             ns.m_num_sat);
+        st.update("nseq unsat",           ns.m_num_unsat);
+        st.update("nseq unknown",         ns.m_num_unknown);
+        st.update("nseq simplify clash",  ns.m_num_simplify_conflict);
+        st.update("nseq subsumptions",    ns.m_num_subsumptions);
+        st.update("nseq extensions",      ns.m_num_extensions);
+        st.update("nseq fresh vars",      ns.m_num_fresh_vars);
+        st.update("nseq max depth",       ns.m_max_depth);
+
+        // modifier breakdown
+        st.update("nseq mod det",              ns.m_mod_det);
+        st.update("nseq mod power epsilon",    ns.m_mod_power_epsilon);
+        st.update("nseq mod num cmp",          ns.m_mod_num_cmp);
+        st.update("nseq mod const num unwind", ns.m_mod_const_num_unwinding);
+        st.update("nseq mod eq split",         ns.m_mod_eq_split);
+        st.update("nseq mod star intr",        ns.m_mod_star_intr);
+        st.update("nseq mod gpower intr",      ns.m_mod_gpower_intr);
+        st.update("nseq mod const nielsen",    ns.m_mod_const_nielsen);
+        st.update("nseq mod regex char",       ns.m_mod_regex_char_split);
+        st.update("nseq mod regex var",        ns.m_mod_regex_var_split);
+        st.update("nseq mod power split",      ns.m_mod_power_split);
+        st.update("nseq mod var nielsen",      ns.m_mod_var_nielsen);
+        st.update("nseq mod var num unwind",   ns.m_mod_var_num_unwinding);
+        st.update("nseq ho unfolds",          m_num_ho_unfolds);
    }

    void theory_nseq::display(std::ostream& out) const {
        out << "theory_nseq\n";
-        out << "  str_eqs: " << m_state.str_eqs().size() << "\n";
-        out << "  str_mems: " << m_state.str_mems().size() << "\n";
+        out << "  str_eqs:    " << m_state.str_eqs().size() << "\n";
+        out << "  str_mems:   " << m_state.str_mems().size() << "\n";
+        out << "  diseqs:     " << m_state.diseqs().size() << "\n";
+        out << "  neg_mems:   " << m_state.neg_mems().size() << "\n";
+        out << "  prop_queue: " << m_prop_qhead << "/" << m_prop_queue.size() << "\n";
+        out << "  ho_terms:   " << m_ho_terms.size() << "\n";
    }

    // -----------------------------------------------------------------------
@ -157,6 +569,129 @@ namespace smt {
        return alloc(theory_nseq, *ctx);
    }

+    // -----------------------------------------------------------------------
+    // Higher-order term unfolding (seq.map, seq.foldl, etc.)
+    // -----------------------------------------------------------------------
+
+    bool theory_nseq::unfold_ho_terms() {
+        if (m_ho_terms.empty())
+            return false;
+
+        context& ctx = get_context();
+        ast_manager& m = get_manager();
+        bool progress = false;
+
+        unsigned sz = m_ho_terms.size();
+        for (unsigned i = 0; i < sz; ++i) {
+            app* term = m_ho_terms[i];
+            expr* f = nullptr, *s = nullptr, *b = nullptr, *idx = nullptr;
+
+            if (!m_seq.str.is_map(term, f, s) &&
+                !m_seq.str.is_mapi(term, f, idx, s) &&
+                !m_seq.str.is_foldl(term, f, b, s) &&
+                !m_seq.str.is_foldli(term, f, idx, b, s))
+                continue;
+
+            if (!ctx.e_internalized(s))
+                continue;
+
+            // Find a structural representative in s's equivalence class
+            enode* s_root = ctx.get_enode(s)->get_root();
+            expr* repr = nullptr;
+            enode* curr = s_root;
+            do {
+                expr* e = curr->get_expr();
+                expr *a1, *a2;
+                if (m_seq.str.is_empty(e) ||
+                    m_seq.str.is_unit(e, a1) ||
+                    m_seq.str.is_concat(e, a1, a2)) {
+                    repr = e;
+                    break;
+                }
+                curr = curr->get_next();
+            } while (curr != s_root);
+
+            if (!repr)
+                continue;
+
+            // Build ho_term with structural seq arg, then rewrite
+            expr_ref ho_repr(m);
+            if (m_seq.str.is_map(term))
+                ho_repr = m_seq.str.mk_map(f, repr);
+            else if (m_seq.str.is_mapi(term))
+                ho_repr = m_seq.str.mk_mapi(f, idx, repr);
+            else if (m_seq.str.is_foldl(term))
+                ho_repr = m_seq.str.mk_foldl(f, b, repr);
+            else
+                ho_repr = m_seq.str.mk_foldli(f, idx, b, repr);
+
+            expr_ref rewritten(m);
+            br_status st = m_rewriter.mk_app_core(
+                to_app(ho_repr)->get_decl(),
+                to_app(ho_repr)->get_num_args(),
+                to_app(ho_repr)->get_args(),
+                rewritten);
+
+            if (st == BR_FAILED)
+                continue;
+
+            // Internalize both the structural ho_term and its rewrite
+            if (!ctx.e_internalized(ho_repr))
+                ctx.internalize(ho_repr, false);
+            if (!ctx.e_internalized(rewritten))
+                ctx.internalize(rewritten, false);
+
+            enode* ho_en = ctx.get_enode(ho_repr);
+            enode* res_en = ctx.get_enode(rewritten);
+
+            if (ho_en->get_root() == res_en->get_root())
+                continue;
+
+            // Assert tautological axiom: ho_repr = rewritten
+            // Congruence closure merges map(f,s) with map(f,repr)
+            // since s = repr in the E-graph.
+            expr_ref eq_expr(m.mk_eq(ho_repr, rewritten), m);
+            if (!ctx.b_internalized(eq_expr))
+                ctx.internalize(eq_expr, true);
+            literal eq_lit = ctx.get_literal(eq_expr);
+            if (ctx.get_assignment(eq_lit) != l_true) {
+                ctx.mk_th_axiom(get_id(), 1, &eq_lit);
+                TRACE(seq, tout << "nseq ho unfold: "
+                                << mk_bounded_pp(ho_repr, m, 3) << " = "
+                                << mk_bounded_pp(rewritten, m, 3) << "\n";);
+                ++m_num_ho_unfolds;
+                progress = true;
+            }
+        }
+
+        // For map/mapi: propagate length preservation
+        for (unsigned i = 0; i < sz; ++i) {
+            app* term = m_ho_terms[i];
+            expr* f = nullptr, *s = nullptr, *idx = nullptr;
+            bool is_map = m_seq.str.is_map(term, f, s);
+            bool is_mapi = !is_map && m_seq.str.is_mapi(term, f, idx, s);
+            if (!is_map && !is_mapi)
+                continue;
+            if (!m_seq.is_seq(term))
+                continue;
+
+            // len(map(f, s)) = len(s)
+            expr_ref len_map(m_seq.str.mk_length(term), m);
+            expr_ref len_s(m_seq.str.mk_length(s), m);
+            expr_ref len_eq(m.mk_eq(len_map, len_s), m);
+            if (!ctx.b_internalized(len_eq))
+                ctx.internalize(len_eq, true);
+            literal len_lit = ctx.get_literal(len_eq);
+            if (ctx.get_assignment(len_lit) != l_true) {
+                ctx.mk_th_axiom(get_id(), 1, &len_lit);
+                ++m_num_length_axioms;
+                progress = true;
+            }
+        }
+
+        return progress;
+    }
+
    // -----------------------------------------------------------------------
    // Helpers
    // -----------------------------------------------------------------------
@ -168,4 +703,136 @@ namespace smt {
        return s;
    }

+    // -----------------------------------------------------------------------
+    // Arithmetic value queries
+    // -----------------------------------------------------------------------
+
+    bool theory_nseq::get_num_value(expr* e, rational& val) const {
+        return m_arith_value.get_value_equiv(e, val) && val.is_int();
+    }
+
+    bool theory_nseq::lower_bound(expr* e, rational& lo) const {
+        bool is_strict = true;
+        return m_arith_value.get_lo(e, lo, is_strict) && !is_strict && lo.is_int();
+    }
+
+    bool theory_nseq::upper_bound(expr* e, rational& hi) const {
+        bool is_strict = true;
+        return m_arith_value.get_up(e, hi, is_strict) && !is_strict && hi.is_int();
+    }
+
+    bool theory_nseq::get_length(expr* e, rational& val) {
+        ast_manager& m = get_manager();
+        rational val1;
+        expr* e1 = nullptr;
+        expr* e2 = nullptr;
+        ptr_vector<expr> todo;
+        todo.push_back(e);
+        val.reset();
+        zstring s;
+        while (!todo.empty()) {
+            expr* c = todo.back();
+            todo.pop_back();
+            if (m_seq.str.is_concat(c, e1, e2)) {
+                todo.push_back(e1);
+                todo.push_back(e2);
+            }
+            else if (m_seq.str.is_unit(c))
+                val += rational(1);
+            else if (m_seq.str.is_empty(c))
+                continue;
+            else if (m_seq.str.is_string(c, s))
+                val += rational(s.length());
+            else {
+                expr_ref len(m_seq.str.mk_length(c), m);
+                if (m_arith_value.get_value(len, val1) && !val1.is_neg())
+                    val += val1;
+                else
+                    return false;
+            }
+        }
+        return val.is_int();
+    }
+
+    void theory_nseq::add_length_axiom(literal lit) {
+        context& ctx = get_context();
+        ctx.mark_as_relevant(lit);
+        ctx.mk_th_axiom(get_id(), 1, &lit);
+        ++m_num_length_axioms;
+    }
+
+    bool theory_nseq::propagate_length_lemma(literal lit, seq::length_constraint const& lc) {
+        context& ctx = get_context();
+        ast_manager& m = get_manager();
+
+        // unconditional constraints: assert as theory axiom
+        if (lc.m_kind == seq::length_kind::nonneg) {
+            add_length_axiom(lit);
+            return true;
+        }
+
+        // conditional constraints: propagate with justification from dep_tracker
+        enode_pair_vector eqs;
+        literal_vector lits;
+        deps_to_lits(lc.m_dep, eqs, lits);
+
+        ctx.mark_as_relevant(lit);
+        justification* js = ctx.mk_justification(
+            ext_theory_propagation_justification(
+                get_id(), ctx,
+                lits.size(), lits.data(),
+                eqs.size(), eqs.data(),
+                lit));
+        ctx.assign(lit, js);
+
+        TRACE(seq, tout << "nseq length propagation: " << mk_pp(lc.m_expr, m)
+                        << " (" << eqs.size() << " eqs, " << lits.size() << " lits)\n";);
+        ++m_num_length_axioms;
+        return true;
+    }
+
+    bool theory_nseq::assert_nonneg_for_all_vars() {
+        ast_manager& m = get_manager();
+        context& ctx = get_context();
+        arith_util arith(m);
+        bool new_axiom = false;
+        unsigned nv = get_num_vars();
+        for (unsigned v = 0; v < nv; ++v) {
+            expr* e = get_enode(v)->get_expr();
+            if (!m_seq.is_seq(e))
+                continue;
+            expr_ref len_var(m_seq.str.mk_length(e), m);
+            expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m);
+            if (!ctx.b_internalized(ge_zero))
+                ctx.internalize(ge_zero, true);
+            literal lit = ctx.get_literal(ge_zero);
+            if (ctx.get_assignment(lit) != l_true) {
+                add_length_axiom(lit);
+                new_axiom = true;
+            }
+        }
+        return new_axiom;
+    }
+
+    bool theory_nseq::assert_length_constraints() {
+        ast_manager& m = get_manager();
+        context& ctx = get_context();
+        vector<seq::length_constraint> constraints;
+        m_nielsen.generate_length_constraints(constraints);
+
+        bool new_axiom = false;
+        for (auto const& lc : constraints) {
+            expr* e = lc.m_expr;
+            if (!ctx.b_internalized(e))
+                ctx.internalize(e, true);
+            literal lit = ctx.get_literal(e);
+            if (ctx.get_assignment(lit) != l_true) {
+                TRACE(seq, tout << "nseq length lemma: " << mk_pp(e, m) << "\n";);
+                propagate_length_lemma(lit, lc);
+                new_axiom = true;
+            }
+        }
+        return new_axiom;
+    }
+
 }
--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@ -35,42 +35,99 @@ namespace smt {
        seq_util       m_seq;
        arith_util     m_autil;
        seq_rewriter   m_rewriter;
+        arith_value    m_arith_value;
        euf::egraph    m_egraph;  // private egraph (not shared with smt context)
        euf::sgraph    m_sgraph;  // private sgraph
        seq::nielsen_graph m_nielsen;
        nseq_state     m_state;
+        nseq_regex     m_regex;   // regex membership pre-processing
+        nseq_model     m_model;   // model construction helper
+
+        // propagation queue
+        struct prop_item {
+            enum kind_t { eq_prop, diseq_prop, pos_mem_prop, neg_mem_prop } m_kind;
+            unsigned m_idx;
+        };
+        svector<prop_item>  m_prop_queue;
+        unsigned            m_prop_qhead = 0;
+        unsigned_vector     m_prop_lim;   // saved queue sizes for push/pop

        // statistics
        unsigned m_num_conflicts        = 0;
-        unsigned m_num_nodes_explored   = 0;
-        unsigned m_num_depth_increases  = 0;
+        unsigned m_num_final_checks     = 0;
+        unsigned m_num_length_axioms    = 0;

        // map from context enode to private sgraph snode
        obj_map<expr, euf::snode*> m_expr2snode;

+        // mapping from nielsen mem index to state mem index
+        // (populated during populate_nielsen_graph, used in deps_to_lits)
+        unsigned_vector m_nielsen_to_state_mem;
+
+        // higher-order terms (seq.map, seq.mapi, seq.foldl, seq.foldli)
+        ptr_vector<app>  m_ho_terms;
+        unsigned_vector  m_ho_lim;        // push/pop limits for m_ho_terms
+        unsigned         m_num_ho_unfolds = 0;
+
+        // unhandled boolean string predicates (prefixof, suffixof, contains, etc.)
+        unsigned         m_num_unhandled_bool = 0;
+        unsigned_vector  m_unhandled_bool_lim;
+
+        bool has_unhandled_preds() const { return m_num_unhandled_bool > 0; }
+
        // required virtual methods
        bool internalize_atom(app* a, bool gate_ctx) override;
        bool internalize_term(app* term) override;
+        theory_var mk_var(enode* n) override;
        void new_eq_eh(theory_var v1, theory_var v2) override;
        void new_diseq_eh(theory_var v1, theory_var v2) override;
        theory* mk_fresh(context* ctx) override;
        void display(std::ostream& out) const override;

        // optional overrides
-        bool can_propagate() override { return false; }
-        void propagate() override {}
+        bool can_propagate() override;
+        void propagate() override;
+        void init() override;
+        void assign_eh(bool_var v, bool is_true) override;
        final_check_status final_check_eh(unsigned) override;
        void push_scope_eh() override;
        void pop_scope_eh(unsigned num_scopes) override;
        void init_model(model_generator& mg) override;
+        model_value_proc* mk_value(enode* n, model_generator& mg) override;
+        void finalize_model(model_generator& mg) override;
+        void validate_model(proto_model& mdl) override;
        void collect_statistics(::statistics& st) const override;

        char const* get_name() const override { return "nseq"; }

        // private helpers
        void populate_nielsen_graph();
+        void explain_nielsen_conflict();
+        void deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits);
+        void add_conflict_clause(seq::dep_tracker const& deps);
+        void set_conflict(enode_pair_vector const& eqs, literal_vector const& lits);
        euf::snode* get_snode(expr* e);

+        // propagation dispatch helpers
+        void propagate_eq(unsigned idx);
+        void propagate_diseq(unsigned idx);
+        void propagate_pos_mem(unsigned idx);
+        void propagate_neg_mem(unsigned idx);
+        void ensure_length_var(expr* e);
+
+        // higher-order term unfolding
+        bool unfold_ho_terms();
+
+        // arithmetic value queries for length reasoning
+        bool get_num_value(expr* e, rational& val) const;
+        bool lower_bound(expr* e, rational& lo) const;
+        bool upper_bound(expr* e, rational& hi) const;
+        bool get_length(expr* e, rational& val);
+        void add_length_axiom(literal lit);
+        bool propagate_length_lemma(literal lit, seq::length_constraint const& lc);
+        bool assert_nonneg_for_all_vars();
+        bool assert_length_constraints();
+
    public:
        theory_nseq(context& ctx);
    };
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@ -133,6 +133,7 @@ add_executable(test-z3
  sls_seq_plugin.cpp
  seq_nielsen.cpp
  nseq_basic.cpp
+  nseq_regex.cpp
  small_object_allocator.cpp
  smt2print_parse.cpp
  smt_context.cpp
--- a/src/test/main.cpp
+++ b/src/test/main.cpp
@ -288,6 +288,7 @@ int main(int argc, char ** argv) {
    TST(sls_seq_plugin);
    TST(seq_nielsen);
    TST(nseq_basic);
+    TST(nseq_regex);
    TST(ho_matcher);
    TST(finite_set);
    TST(finite_set_rewriter);
--- a/src/test/nseq_basic.cpp
+++ b/src/test/nseq_basic.cpp
@ -100,10 +100,119 @@ static void test_nseq_node_satisfied() {
    std::cout << "  ok\n";
 }

+// Test 5: symbol clash conflict ("a" = "b" is unsat)
+static void test_nseq_symbol_clash() {
+    std::cout << "test_nseq_symbol_clash\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* a = sg.mk_char('a');
+    euf::snode* b = sg.mk_char('b');
+    ng.add_str_eq(a, b);
+
+    auto r = ng.solve();
+    SASSERT(r == seq::nielsen_graph::search_result::unsat);
+
+    // verify conflict explanation returns the equality index
+    unsigned_vector eq_idx, mem_idx;
+    ng.explain_conflict(eq_idx, mem_idx);
+    SASSERT(eq_idx.size() == 1);
+    SASSERT(eq_idx[0] == 0);
+    SASSERT(mem_idx.empty());
+    std::cout << "  ok: symbol clash detected as unsat\n";
+}
+
+// Test 6: variable equality x = x is sat
+static void test_nseq_var_eq_self() {
+    std::cout << "test_nseq_var_eq_self\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    ng.add_str_eq(x, x);
+
+    auto r = ng.solve();
+    SASSERT(r == seq::nielsen_graph::search_result::sat);
+    std::cout << "  ok: x = x solved as sat\n";
+}
+
+// Test 7: x·a = x·b is unsat (prefix match then clash)
+static void test_nseq_prefix_clash() {
+    std::cout << "test_nseq_prefix_clash\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* a = sg.mk_char('a');
+    euf::snode* b = sg.mk_char('b');
+    euf::snode* xa = sg.mk_concat(x, a);
+    euf::snode* xb = sg.mk_concat(x, b);
+
+    ng.add_str_eq(xa, xb);
+    auto r = ng.solve();
+    SASSERT(r == seq::nielsen_graph::search_result::unsat);
+    std::cout << "  ok: x·a = x·b detected as unsat\n";
+}
+
+// Test 8: a·x = a·y has solutions (not unsat)
+static void test_nseq_const_nielsen_solvable() {
+    std::cout << "test_nseq_const_nielsen_solvable\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('a');
+    euf::snode* ax = sg.mk_concat(a, x);
+    euf::snode* ay = sg.mk_concat(a, y);
+
+    ng.add_str_eq(ax, ay);
+    auto r = ng.solve();
+    // a·x = a·y simplifies to x = y which is satisfiable (x = y = ε)
+    SASSERT(r == seq::nielsen_graph::search_result::sat);
+    std::cout << "  ok: a·x = a·y solved as sat\n";
+}
+
+// Test 9: length mismatch - "ab" = "a" is unsat
+static void test_nseq_length_mismatch() {
+    std::cout << "test_nseq_length_mismatch\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* a = sg.mk_char('a');
+    euf::snode* b = sg.mk_char('b');
+    euf::snode* ab = sg.mk_concat(a, b);
+
+    ng.add_str_eq(ab, a);
+    auto r = ng.solve();
+    SASSERT(r == seq::nielsen_graph::search_result::unsat);
+    std::cout << "  ok: ab = a detected as unsat\n";
+}
+
 void tst_nseq_basic() {
    test_nseq_instantiation();
    test_nseq_param_validation();
    test_nseq_simplification();
    test_nseq_node_satisfied();
+    test_nseq_symbol_clash();
+    test_nseq_var_eq_self();
+    test_nseq_prefix_clash();
+    test_nseq_const_nielsen_solvable();
+    test_nseq_length_mismatch();
    std::cout << "nseq_basic: all tests passed\n";
 }
--- a/src/test/seq_nielsen.cpp
+++ b/src/test/seq_nielsen.cpp