Merge pull request #8807 from Z3Prover/copilot/update-clemens-work

nseq: Fix axiom scope leak in incremental solving + propagate_ground_split optimization
2026-06-15 05:15:41 +00:00 · 2026-03-01 16:02:53 -08:00 · 2026-03-01 16:02:53 -08:00 · 6339623877
commit 6339623877
parent 03cf706eb5 5c56e5fb0a
7 changed files with 757 additions and 66 deletions
--- a/src/smt/nseq_state.cpp
+++ b/src/smt/nseq_state.cpp
@ -38,6 +38,9 @@ namespace smt {
        m_trail.push_scope();
        m_trail.push(value_trail<unsigned>(m_axioms_head));
        m_trail.push(value_trail<unsigned>(m_preds_head));
+        // Save axiom vector size so we can truncate it on pop
+        m_trail.push(value_trail<unsigned>(m_axioms_size_at_push));
+        m_axioms_size_at_push = m_axioms.size();
        m_eqs.push_scope();
        m_neqs.push_scope();
        m_mems.push_scope();
@ -45,7 +48,14 @@ namespace smt {
    }

    void nseq_state::pop_scope(unsigned num_scopes) {
+        // m_trail.pop_scope will restore m_axioms_size_at_push to the value saved
+        // at the outermost of the popped scopes (i.e., the correct target size).
        m_trail.pop_scope(num_scopes);
+        // Now m_axioms_size_at_push is the size we should truncate back to.
+        unsigned target = m_axioms_size_at_push;
+        for (unsigned i = target; i < m_axioms.size(); ++i)
+            m_axiom_set.remove(m_axioms.get(i));
+        m_axioms.shrink(target);
        m_dm.pop_scope(num_scopes);
        m_eqs.pop_scope(num_scopes);
        m_neqs.pop_scope(num_scopes);
--- a/src/smt/nseq_state.h
+++ b/src/smt/nseq_state.h
@ -68,6 +68,7 @@ namespace smt {
        expr_ref_vector          m_axioms;
        obj_hashtable<expr>      m_axiom_set;
        unsigned                 m_axioms_head;
+        unsigned                 m_axioms_size_at_push { 0 }; // saved on push, restored on pop

        // Length tracking
        obj_hashtable<expr>      m_has_length;
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@ -66,23 +66,30 @@ namespace smt {
            for (unsigned i = head; i < axioms.size(); ++i)
                deque_axiom(axioms[i]);
            m_state.set_axioms_head(axioms.size());
-            return FC_CONTINUE;
+            if (ctx.inconsistent())
+                return FC_CONTINUE;
        }

        // Reduce predicates (prefix, suffix, contains) to word equations
        if (reduce_predicates())
            return FC_CONTINUE;

-        // Check zero-length variables before solving (uses arithmetic bounds)
-        if (check_zero_length())
+        // Propagate length equalities and zero-length variables.
+        // Don't short-circuit: continue to solve_eqs so force_phase is set on
+        // branching variables before the SAT solver makes its next decision.
+        propagate_length_eqs();
+        if (ctx.inconsistent())
+            return FC_CONTINUE;
+        check_zero_length();
+        if (ctx.inconsistent())
            return FC_CONTINUE;

        // Solve word equations using Nielsen transformations
        if (solve_eqs())
            return FC_CONTINUE;

-        // Propagate length equalities from equations
-        if (propagate_length_eqs())
+        // After a successful solve pass, pick up any freshly-added length axioms
+        if (m_state.has_pending_axioms())
            return FC_CONTINUE;

        // Check regex membership constraints
@ -93,8 +100,11 @@ namespace smt {
        if (check_disequalities())
            return FC_CONTINUE;

-        if (all_eqs_solved() && m_state.mems().empty())
-            return FC_DONE;
+        if (all_eqs_solved()) {
+            // All word equations solved; check if remaining memberships are handled
+            if (all_mems_checked())
+                return FC_DONE;
+        }

        return FC_GIVEUP;
    }
@ -194,6 +204,12 @@ namespace smt {
            if (m_find.find(v1) != m_find.find(v2))
                m_find.merge(v1, v2);
            m_state.add_eq(o1, o2, dep);
+            // Propagate length equality: len(o1) = len(o2)
+            ensure_length_axiom(o1);
+            ensure_length_axiom(o2);
+            expr_ref len1 = mk_len(o1);
+            expr_ref len2 = mk_len(o2);
+            add_axiom(mk_eq(len1, len2, false));
            IF_VERBOSE(3, verbose_stream() << "new_eq_eh: " << mk_bounded_pp(o1, m) << " = " << mk_bounded_pp(o2, m)
                  << " (total eqs: " << m_state.eqs().size() << ")\n");
            TRACE(seq, tout << "new eq: " << mk_bounded_pp(o1, m) << " = " << mk_bounded_pp(o2, m) << "\n";);
@ -700,6 +716,11 @@ namespace smt {
            break;
        }

+        // Optimization: if one side is a pure ground string and the other side
+        // has variables with fully-known lengths, directly propagate substr values.
+        if (propagate_ground_split(lhs, rhs, dep))
+            return true;
+
        // Try branching: find a variable and decide x = "" or x ≠ ""
        if (branch_eq(lhs, rhs, dep))
            return true;
@ -741,7 +762,10 @@ namespace smt {
                literal eq_empty = mk_eq(e, emp, false);
                switch (ctx.get_assignment(eq_empty)) {
                case l_undef:
-                    // Force the empty branch first
+                    // If lower length bound is already > 0, skip the empty branch
+                    if (ctx.e_internalized(len_e) && lower_bound(len_e, lo) && lo > rational(0))
+                        break;
+                    // Force the empty branch first.
                    TRACE(seq, tout << "branch " << mk_bounded_pp(e, m) << " = \"\"\n";);
                    IF_VERBOSE(3, verbose_stream() << "  branch " << mk_bounded_pp(e, m) << " = \"\"\n";);
                    ctx.force_phase(eq_empty);
@ -853,7 +877,10 @@ namespace smt {
    }

    // -------------------------------------------------------
-    // Nielsen splitting: x · α = c · β → x = c · x_tail
+    // Nielsen splitting
+    // -------------------------------------------------------
+    // var-vs-unit: x · α = c · β → x = c · x_tail (peel one char)
+    // var-vs-var:  x · α = y · β → determine |x| vs |y| and split
    // -------------------------------------------------------

    bool theory_nseq::split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs,
@ -861,7 +888,7 @@ namespace smt {
        if (lhs.empty() || rhs.empty())
            return false;

-        // Find: variable on one side facing a unit/constant on the other
+        // Find: variable on one side facing a non-variable on the other
        expr* var = nullptr;
        expr* head = nullptr;
        bool var_on_left = false;
@ -876,11 +903,13 @@ namespace smt {
            head = lhs[0];
            var_on_left = false;
        }
-        // Also handle var vs var: pick either and peel one character
+        // var vs var case
+        bool both_var = false;
        if (!var && !head && m_nielsen.is_var(lhs[0]) && m_nielsen.is_var(rhs[0]) && lhs[0] != rhs[0]) {
            var = lhs[0];
            head = rhs[0];
            var_on_left = true;
+            both_var = true;
        }
        if (!var || !head)
            return false;
@ -897,31 +926,126 @@ namespace smt {
        if (ctx.get_assignment(eq_empty) != l_false)
            return false;

+        // ---- var-vs-var: length-guided split ----
+        if (both_var) {
+            expr_ref len_var = mk_len(var);
+            expr_ref len_head = mk_len(head);
+            rational var_len, head_len;
+            bool has_var_len  = get_length(var,  var_len);
+            bool has_head_len = get_length(head, head_len);
+
+            if (has_var_len && has_head_len) {
+                if (var_len == head_len) {
+                    // Same length → must be equal
+                    IF_VERBOSE(3, verbose_stream() << "  var-var-split: |" << mk_bounded_pp(var, m, 1)
+                               << "|=|" << mk_bounded_pp(head, m, 1) << "| → unify\n";);
+                    bool changed = propagate_eq(dep, var, head);
+                    if (changed) m_state.stats().m_num_splits++;
+                    return changed;
+                }
+                else if (var_len < head_len) {
+                    // head = var · z_rest, |z_rest| = head_len - var_len
+                    IF_VERBOSE(3, verbose_stream() << "  var-var-split: |" << mk_bounded_pp(var, m, 1)
+                               << "| < |" << mk_bounded_pp(head, m, 1) << "| → split head\n";);
+                    expr_ref z(m_sk.mk_post(head, len_var), m);
+                    ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                    expr_ref rhs_eq(m_util.str.mk_concat(var, z), m);
+                    ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
+                    bool changed = propagate_eq(dep, head, rhs_eq);
+                    if (changed) m_state.stats().m_num_splits++;
+                    return changed;
+                }
+                else {
+                    // var = head · z_rest, |z_rest| = var_len - head_len
+                    IF_VERBOSE(3, verbose_stream() << "  var-var-split: |" << mk_bounded_pp(var, m, 1)
+                               << "| > |" << mk_bounded_pp(head, m, 1) << "| → split var\n";);
+                    expr_ref z(m_sk.mk_post(var, len_head), m);
+                    ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                    expr_ref rhs_eq(m_util.str.mk_concat(head, z), m);
+                    ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
+                    bool changed = propagate_eq(dep, var, rhs_eq);
+                    if (changed) m_state.stats().m_num_splits++;
+                    return changed;
+                }
+            }
+
+            // Lengths not fully determined: branch on |var| vs |head| via SAT
+            // First try: |var| ≤ |head| (use as a SAT decision)
+            literal len_le = mk_literal(
+                m_autil.mk_ge(m_autil.mk_sub(len_head, len_var), m_autil.mk_int(0)));
+            switch (ctx.get_assignment(len_le)) {
+            case l_undef:
+                IF_VERBOSE(3, verbose_stream() << "  var-var-split: branching |"
+                           << mk_bounded_pp(var, m, 1) << "| ≤ |" << mk_bounded_pp(head, m, 1) << "|\n";);
+                ctx.force_phase(len_le);
+                ctx.mark_as_relevant(len_le);
+                m_new_propagation = true;
+                m_state.stats().m_num_splits++;
+                return true;
+            case l_true: {
+                // |var| ≤ |head|: check if equal
+                literal len_eq_lit = mk_eq(len_var, len_head, false);
+                switch (ctx.get_assignment(len_eq_lit)) {
+                case l_undef:
+                    ctx.force_phase(len_eq_lit);
+                    ctx.mark_as_relevant(len_eq_lit);
+                    m_new_propagation = true;
+                    return true;
+                case l_true:
+                    return propagate_eq(dep, var, head);
+                case l_false: {
+                    // |var| < |head|: head = var · z_rest, |z_rest| = |head| - |var|
+                    expr_ref z(m_sk.mk_post(head, len_var), m);
+                    ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                    // Add len(z) = len(head) - len(var) as an explicit axiom so arithmetic
+                    // knows the exact length and terminates the split chain.
+                    expr_ref len_z = mk_len(z);
+                    expr_ref len_diff(m_autil.mk_sub(len_head, len_var), m);
+                    ensure_enode(len_z); ensure_enode(len_diff);
+                    add_axiom(mk_eq(len_z, len_diff, false));
+                    expr_ref rhs_eq(m_util.str.mk_concat(var, z), m);
+                    ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
+                    bool changed = propagate_eq(dep, head, rhs_eq);
+                    if (changed) m_state.stats().m_num_splits++;
+                    return changed;
+                }
+                }
+                break;
+            }
+            case l_false: {
+                // |var| > |head|: var = head · z_rest, |z_rest| = |var| - |head|
+                expr_ref z(m_sk.mk_post(var, len_head), m);
+                ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                // Add len(z) = len(var) - len(head) as an explicit axiom.
+                expr_ref len_z = mk_len(z);
+                expr_ref len_diff(m_autil.mk_sub(len_var, len_head), m);
+                ensure_enode(len_z); ensure_enode(len_diff);
+                add_axiom(mk_eq(len_z, len_diff, false));
+                expr_ref rhs_eq(m_util.str.mk_concat(head, z), m);
+                ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
+                bool changed = propagate_eq(dep, var, rhs_eq);
+                if (changed) m_state.stats().m_num_splits++;
+                return changed;
+            }
+            }
+            return false;
+        }
+
+        // ---- var-vs-unit/constant: peel one character ----
        // For multi-character string constants, extract only the first character
-        // so we peel one character at a time (Nielsen style)
        zstring head_str;
        if (m_util.str.is_string(head, head_str) && head_str.length() > 1) {
            head = m_util.str.mk_unit(m_util.mk_char(head_str[0]));
        }

-        // var is non-empty and the equation forces var to start with head.
-        // Nielsen split: var = head · var_tail
+        // var is non-empty and the equation forces var to start with head (a single char).
+        // Nielsen split: var = head · var_tail  where var_tail = seq.tail(var, 1)
        expr_ref one(m_autil.mk_int(1), m);
        expr_ref var_tail(m_sk.mk_tail(var, one), m);
        ensure_enode(var_tail);
        mk_var(ctx.get_enode(var_tail));
        ensure_length_axiom(var_tail);

-        // Before committing to the split, ensure the tail's emptiness is decided.
-        expr_ref tail_emp(m_util.str.mk_empty(var_tail->get_sort()), m);
-        literal tail_eq_empty = mk_eq(var_tail, tail_emp, false);
-        if (ctx.get_assignment(tail_eq_empty) == l_undef) {
-            ctx.force_phase(tail_eq_empty);
-            ctx.mark_as_relevant(tail_eq_empty);
-            m_new_propagation = true;
-            return true;
-        }
-
        expr_ref split_rhs(m_util.str.mk_concat(head, var_tail), m);
        ensure_enode(split_rhs);
        ensure_length_axiom(split_rhs);
@ -1084,6 +1208,62 @@ namespace smt {
        return progress;
    }

+    // If one side is a pure ground string and the other side has variables with
+    // fully known lengths, directly propagate each variable's value as a substr.
+    // E.g. x·y = "abcdef" with len(x)=2, len(y)=4 → x="ab", y="cdef"
+    bool theory_nseq::propagate_ground_split(expr_ref_vector const& lhs,
+                                              expr_ref_vector const& rhs,
+                                              nseq_dependency* dep) {
+        // Determine which side is pure-variable and which is pure-ground
+        auto has_var = [&](expr_ref_vector const& v) {
+            for (expr* e : v) if (m_nielsen.is_var(e)) return true;
+            return false;
+        };
+        auto all_var = [&](expr_ref_vector const& v) {
+            for (expr* e : v) if (!m_nielsen.is_var(e)) return false;
+            return true;
+        };
+        expr_ref_vector const* ground_side = nullptr;
+        expr_ref_vector const* var_side = nullptr;
+        if (!has_var(lhs) && all_var(rhs) && !rhs.empty()) {
+            ground_side = &lhs; var_side = &rhs;
+        }
+        else if (!has_var(rhs) && all_var(lhs) && !lhs.empty()) {
+            ground_side = &rhs; var_side = &lhs;
+        }
+        if (!ground_side)
+            return false;
+
+        // Build the ground string
+        zstring ground;
+        {
+            sort* s = var_side->get(0)->get_sort();
+            expr_ref gc = mk_concat(*ground_side, s);
+            if (!is_ground_string(gc, ground))
+                return false;
+        }
+
+        // Collect vars and their known lengths
+        unsigned offset = 0;
+        bool changed = false;
+        for (expr* e : *var_side) {
+            rational len_v;
+            if (!get_length(e, len_v) || !len_v.is_nonneg() || !len_v.is_unsigned())
+                return false; // length unknown, can't propagate
+            unsigned len = len_v.get_unsigned();
+            if (offset + len > ground.length())
+                return false; // length conflict, let solve_eq handle it
+            zstring substr = ground.extract(offset, len);
+            expr_ref val(m_util.str.mk_string(substr), m);
+            ensure_enode(val);
+            changed |= propagate_eq(dep, e, val);
+            offset += len;
+        }
+        if (offset != ground.length())
+            return false; // leftover ground chars, conflict
+        return changed;
+    }
+
    bool theory_nseq::check_diseq(nseq_ne const& ne) {
        expr* l = ne.l();
        expr* r = ne.r();
@ -1393,6 +1573,84 @@ namespace smt {
        return r;
    }

+    // DFS: find a string of exactly target_len in regex r.
+    // On success, appends characters to 'result' and returns true.
+    // cur_len is how many characters have already been appended.
+    bool theory_nseq::dfs_regex(expr* r, unsigned target_len, unsigned cur_len, zstring& result) {
+        if (cur_len == target_len) {
+            expr_ref nullable = m_seq_rewrite.is_nullable(expr_ref(r, m));
+            m_rewrite(nullable);
+            return m.is_true(nullable);
+        }
+        // Try printable characters: lowercase, uppercase, digits, then common punctuation
+        static const unsigned char priority[] = {
+            'a','b','c','d','e','f','g','h','i','j','k','l','m',
+            'n','o','p','q','r','s','t','u','v','w','x','y','z',
+            '0','1','2','3','4','5','6','7','8','9',
+            'A','B','C','D','E','F','G','H','I','J','K','L','M',
+            'N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
+            ' ','.', ',', '!', '?', '-', '_', '@', '#', '$', '%', '&', '*'
+        };
+        for (unsigned c : priority) {
+            expr_ref ch(m_util.mk_char(c), m);
+            expr_ref dr = m_seq_rewrite.mk_derivative(ch, expr_ref(r, m));
+            m_rewrite(dr);
+            if (m_util.re.is_empty(dr))
+                continue;
+            result = result + zstring(c);
+            if (dfs_regex(dr, target_len, cur_len + 1, result))
+                return true;
+            // Backtrack: remove last character
+            result = result.extract(0, result.length() - 1);
+        }
+        return false;
+    }
+
+    // Synthesize a concrete string of exactly `len` accepted by `regex`.
+    // Returns true and sets `result` on success.
+    bool theory_nseq::synthesize_regex_string(expr* regex, unsigned len, zstring& result) {
+        result = zstring();
+        return dfs_regex(regex, len, 0, result);
+    }
+
+    // Add length-bound axioms from regex structure: len(s) >= min_length(r).
+    // Also adds: len(s) = 0 → nullable(r) (otherwise conflict).
+    bool theory_nseq::add_regex_length_axioms(nseq_mem const& mem) {
+        expr* s = mem.str();
+        expr* r = mem.regex();
+        bool sign = mem.sign();
+        nseq_dependency* dep = mem.dep();
+        bool progress = false;
+
+        if (sign) {
+            // s ∈ r → len(s) >= min_length(r)
+            unsigned minlen = m_util.re.min_length(r);
+            if (minlen > 0) {
+                expr_ref len_s = mk_len(s);
+                ensure_length_axiom(s);
+                literal lb = mk_literal(m_autil.mk_ge(len_s, m_autil.mk_int(minlen)));
+                if (ctx.get_assignment(lb) != l_true) {
+                    add_axiom(lb);
+                    progress = true;
+                }
+            }
+            // If r is not nullable and len(s) = 0, conflict
+            expr_ref nullable = m_seq_rewrite.is_nullable(expr_ref(r, m));
+            m_rewrite(nullable);
+            if (m.is_false(nullable)) {
+                // s ∈ r → s ≠ ""
+                expr_ref emp(m_util.str.mk_empty(s->get_sort()), m);
+                ensure_enode(emp);
+                literal s_empty = mk_eq(s, emp, false);
+                if (ctx.get_assignment(~s_empty) != l_true) {
+                    add_axiom(~s_empty);
+                    progress = true;
+                }
+            }
+        }
+        return progress;
+    }
+
    bool theory_nseq::check_regex_memberships() {
        bool progress = false;
        for (auto const& mem : m_state.mems()) {
@ -1410,43 +1668,99 @@ namespace smt {
        bool sign = mem.sign();
        nseq_dependency* dep = mem.dep();

+        // Add structural length axioms from the regex
+        if (add_regex_length_axioms(mem))
+            return true;
+
        // Try to determine the string value from the e-graph
        zstring sval;
-        if (!is_ground_string(s, sval))
+        if (is_ground_string(s, sval)) {
+            // Compute derivatives for the known string
+            expr_ref derived = derive_regex(r, sval);
+
+            // Check nullable: does the derived regex accept the empty word?
+            expr_ref nullable = m_seq_rewrite.is_nullable(derived);
+            m_rewrite(nullable);
+
+            if (sign) {
+                // Positive membership: s must be in r
+                if (m.is_false(nullable) || m_util.re.is_empty(derived)) {
+                    set_conflict(dep);
+                    return true;
+                }
+            }
+            else {
+                // Negative membership: s must NOT be in r
+                if (m.is_true(nullable)) {
+                    set_conflict(dep);
+                    return true;
+                }
+            }
            return false;
+        }

-        // Compute derivatives for the known string
-        expr_ref derived = derive_regex(r, sval);
-
-        // Check nullable: does the derived regex accept the empty word?
-        expr_ref nullable = m_seq_rewrite.is_nullable(derived);
-        m_rewrite(nullable);
-
+        // String is not yet ground. For positive membership:
+        // if we know the exact length, synthesize a concrete string.
        if (sign) {
-            // Positive membership: s must be in r
-            if (m.is_false(nullable)) {
-                // String is fully determined but regex rejects → conflict
-                set_conflict(dep);
-                return true;
-            }
-        }
-        else {
-            // Negative membership: s must NOT be in r
-            if (m.is_true(nullable)) {
-                // String is fully determined and regex accepts → conflict
-                set_conflict(dep);
-                return true;
+            rational len_val;
+            if (get_length(s, len_val) && len_val.is_nonneg() && len_val.is_unsigned()) {
+                unsigned len = len_val.get_unsigned();
+                // Feasibility check only: detect conflicts but don't force a specific value.
+                // Model generation (mk_value) will synthesize the actual string.
+                zstring synth;
+                if (!synthesize_regex_string(r, len, synth)) {
+                    // No string of that length accepted by regex - conflict
+                    IF_VERBOSE(3, verbose_stream() << "  regex conflict: no string of len=" << len
+                               << " in " << mk_bounded_pp(r, m, 2) << "\n";);
+                    set_conflict(dep);
+                    return true;
+                }
+                IF_VERBOSE(3, verbose_stream() << "  regex feasible: len=" << len
+                           << " has witness \"" << synth << "\"\n";);
            }
        }

-        // Check if the derived regex is the empty set
-        if (sign && m_util.re.is_empty(derived)) {
-            set_conflict(dep);
-            return true;
-        }
        return false;
    }

+    // Returns true if every membership constraint is already verified
+    // (ground strings checked) or provably feasible (non-ground with known length).
+    bool theory_nseq::all_mems_checked() {
+        for (auto const& mem : m_state.mems()) {
+            expr* s = mem.str();
+            expr* r = mem.regex();
+            bool sign = mem.sign();
+            nseq_dependency* dep = mem.dep();
+            zstring sval;
+            if (is_ground_string(s, sval)) {
+                // Verify the ground string
+                expr_ref derived = derive_regex(r, sval);
+                expr_ref nullable = m_seq_rewrite.is_nullable(derived);
+                m_rewrite(nullable);
+                if (sign && !m.is_true(nullable))
+                    return false;
+                if (!sign && !m.is_false(nullable))
+                    return false;
+            }
+            else if (sign) {
+                // Non-ground positive membership: check feasibility by length
+                rational len_val;
+                if (get_length(s, len_val) && len_val.is_nonneg() && len_val.is_unsigned()) {
+                    zstring synth;
+                    if (!synthesize_regex_string(r, len_val.get_unsigned(), synth))
+                        return false;  // infeasible; conflict should have been set by check_regex_mem
+                    // Feasible - model generator will assign the right value
+                }
+                else {
+                    // Length unknown and regex is empty: unconditionally infeasible
+                    if (m_util.re.is_empty(r))
+                        return false;
+                }
+            }
+        }
+        return true;
+    }
+
    // -------------------------------------------------------
    // Length reasoning
    // -------------------------------------------------------
@ -1491,7 +1805,34 @@ namespace smt {

    bool theory_nseq::get_length(expr* e, rational& val) {
        expr_ref len = mk_len(e);
-        return m_arith_value.get_value(len, val);
+        if (m_arith_value.get_value(len, val))
+            return true;
+        // Structural fallback: for str.substr(s, off, n) with concrete off and n,
+        // use the explicit length argument if it's a numeral and s has sufficient length.
+        expr* s = nullptr, *offset = nullptr, *len_arg = nullptr;
+        if (m_util.str.is_extract(e, s, offset, len_arg)) {
+            rational len_n;
+            if (m_autil.is_numeral(len_arg, len_n) && len_n.is_nonneg()) {
+                rational off_n;
+                if (m_autil.is_numeral(offset, off_n) && off_n.is_nonneg()) {
+                    rational parent_len;
+                    if (get_length(s, parent_len) &&
+                        off_n + len_n <= parent_len) {
+                        val = len_n;
+                        return true;
+                    }
+                }
+            }
+        }
+        // Fallback via lower/upper bound: if lo == hi, that's the value
+        rational lo, hi;
+        bool s_lo, s_hi;
+        if (m_arith_value.get_lo(len, lo, s_lo) && !s_lo &&
+            m_arith_value.get_up(len, hi, s_hi) && !s_hi && lo == hi) {
+            val = lo;
+            return true;
+        }
+        return false;
    }

    bool theory_nseq::lower_bound(expr* e, rational& lo) {
@ -1526,25 +1867,53 @@ namespace smt {
        return progress;
    }

+    // Build an arithmetic expression for the sum of lengths of the given string terms.
+    // Collapses concrete lengths (units, string constants) to integer numerals,
+    // and uses mk_len(e) for variables/complex terms.
+    expr_ref theory_nseq::build_length_sum(expr_ref_vector const& es) {
+        rational total(0);
+        expr_ref_vector var_lens(m);
+        for (expr* e : es) {
+            zstring s;
+            if (m_util.str.is_string(e, s)) {
+                total += rational(s.length());
+            } else if (m_util.str.is_unit(e)) {
+                total += rational(1);
+            } else if (m_util.str.is_empty(e)) {
+                // contributes 0
+            } else {
+                ensure_length_axiom(e);
+                var_lens.push_back(mk_len(e));
+            }
+        }
+        if (var_lens.empty())
+            return expr_ref(m_autil.mk_numeral(total, true), m);
+        expr_ref var_sum(m);
+        if (var_lens.size() == 1)
+            var_sum = expr_ref(var_lens.get(0), m);
+        else
+            var_sum = expr_ref(m_autil.mk_add(var_lens.size(), var_lens.data()), m);
+        if (total.is_zero())
+            return var_sum;
+        return expr_ref(m_autil.mk_add(var_sum, m_autil.mk_numeral(total, true)), m);
+    }
+
    bool theory_nseq::propagate_length_eqs() {
        bool progress = false;
        for (auto const& eq : m_state.eqs()) {
            expr_ref_vector const& lhs = eq.lhs();
            expr_ref_vector const& rhs = eq.rhs();
-            if (lhs.empty() || rhs.empty())
+            // Build sum of lengths directly, using concrete integer values for
+            // units/strings to avoid creating fresh unregistered concat terms.
+            expr_ref lhs_sum = build_length_sum(lhs);
+            expr_ref rhs_sum = build_length_sum(rhs);
+            // Skip if both sums are syntactically identical (trivially equal)
+            if (lhs_sum.get() == rhs_sum.get())
                continue;
-            sort* s = lhs[0]->get_sort();
-            expr_ref l = mk_concat(lhs, s);
-            expr_ref r = mk_concat(rhs, s);
-            expr_ref len_l = mk_len(l);
-            expr_ref len_r = mk_len(r);
-            if (ctx.e_internalized(len_l) && ctx.e_internalized(len_r)) {
-                enode* nl = ctx.get_enode(len_l);
-                enode* nr = ctx.get_enode(len_r);
-                if (nl->get_root() != nr->get_root()) {
-                    propagate_eq(eq.dep(), len_l, len_r, false);
-                    progress = true;
-                }
+            literal len_eq = mk_eq(lhs_sum, rhs_sum, false);
+            if (ctx.get_assignment(len_eq) != l_true) {
+                add_axiom(len_eq);
+                progress = true;
            }
        }
        return progress;
@ -1571,8 +1940,60 @@ namespace smt {
        st.update("nseq axioms", s.m_num_axioms);
    }

+    // Find the most constrained regex for a given string expression
+    // by scanning memberships that apply to e or its equivalence class root.
+    expr* theory_nseq::find_regex_for(expr* e) {
+        if (!ctx.e_internalized(e))
+            return nullptr;
+        enode* root = ctx.get_enode(e)->get_root();
+        for (auto const& mem : m_state.mems()) {
+            if (!mem.sign()) continue;
+            expr* ms = mem.str();
+            if (!ctx.e_internalized(ms)) continue;
+            if (ctx.get_enode(ms)->get_root() == root)
+                return mem.regex();
+        }
+        return nullptr;
+    }
+
+    // Find any positive membership for a string expression whose base
+    // is `base` and which starts at position `pos`. Returns the regex
+    // and sets `cover_len` to how many characters it covers.
+    // Used to synthesize sub-range values during model generation.
+    expr* theory_nseq::find_substr_regex(expr* base, unsigned pos,
+                                          unsigned total_len, unsigned& cover_len) {
+        if (!ctx.e_internalized(base))
+            return nullptr;
+        enode* base_root = ctx.get_enode(base)->get_root();
+        for (auto const& mem : m_state.mems()) {
+            if (!mem.sign()) continue;
+            expr* ms = mem.str();
+            if (!ctx.e_internalized(ms)) continue;
+            // Check if ms is str.substr(base, pos, k) for some k
+            expr* s = nullptr, *off = nullptr, *len_arg = nullptr;
+            if (!m_util.str.is_extract(ms, s, off, len_arg)) continue;
+            if (!ctx.e_internalized(s)) continue;
+            if (ctx.get_enode(s)->get_root() != base_root) continue;
+            // Check offset matches
+            rational off_v;
+            if (!m_autil.is_numeral(off, off_v) || !off_v.is_nonneg() ||
+                !off_v.is_unsigned() || off_v.get_unsigned() != pos) continue;
+            // Get the length (may be symbolic, use get_length)
+            rational len_v;
+            if (m_autil.is_numeral(len_arg, len_v) && len_v.is_nonneg() && len_v.is_unsigned()) {
+                cover_len = len_v.get_unsigned();
+                return mem.regex();
+            }
+            if (get_length(ms, len_v) && len_v.is_nonneg() && len_v.is_unsigned()) {
+                cover_len = len_v.get_unsigned();
+                return mem.regex();
+            }
+        }
+        return nullptr;
+    }
+
    // -------------------------------------------------------
-    // Model generation (stub)
+    // Model generation
    // -------------------------------------------------------

    model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) {
@ -1604,11 +2025,57 @@ namespace smt {
                curr = curr->get_next();
            } while (curr != root);

-            // Try to construct a string of the correct length
+            // Try to construct a string satisfying any regex membership constraint
            rational len_val;
            if (get_length(e, len_val) && len_val.is_nonneg() && len_val.is_unsigned()) {
                unsigned len = len_val.get_unsigned();
+                // Check if there's a direct regex constraint for this string
+                expr* regex = find_regex_for(e);
+                if (regex) {
+                    zstring synth;
+                    if (synthesize_regex_string(regex, len, synth)) {
+                        expr_ref val(m_util.str.mk_string(synth), m);
+                        m_fresh_values.push_back(val);
+                        return alloc(expr_wrapper_proc, to_app(val));
+                    }
+                }
+                // Try to synthesize character by character from str.at(e, i) memberships.
+                // Preprocessors often split x ∈ r into (str.at x 0) ∈ r0, (str.substr x 1 k) ∈ r1, etc.
                zstring s;
+                bool from_chars = true;
+                unsigned pos = 0;
+                while (pos < len && from_chars) {
+                    // Option A: there's a membership for str.at(e, pos) (length-1 char)
+                    expr_ref at_i(m_util.str.mk_at(e, m_autil.mk_int(pos)), m);
+                    expr* r = find_regex_for(at_i);
+                    if (r) {
+                        zstring ch;
+                        if (synthesize_regex_string(r, 1, ch)) {
+                            s = s + ch;
+                            pos++;
+                            continue;
+                        }
+                    }
+                    // Option B: find a membership for str.substr(e, pos, k) and synthesize k chars
+                    unsigned cover_len = 0;
+                    expr* rs = find_substr_regex(e, pos, len, cover_len);
+                    if (rs && cover_len > 0) {
+                        zstring piece;
+                        if (synthesize_regex_string(rs, cover_len, piece)) {
+                            s = s + piece;
+                            pos += cover_len;
+                            continue;
+                        }
+                    }
+                    from_chars = false;
+                }
+                if (from_chars && s.length() == len) {
+                    expr_ref val(m_util.str.mk_string(s), m);
+                    m_fresh_values.push_back(val);
+                    return alloc(expr_wrapper_proc, to_app(val));
+                }
+                // No regex constraint (or synthesis failed): use default fill
+                s = zstring();
                for (unsigned i = 0; i < len; i++)
                    s = s + zstring("A");
                expr_ref val(m_util.str.mk_string(s), m);
--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@ -97,6 +97,7 @@ namespace smt {
        bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
        bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep);
        bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
+        bool propagate_ground_split(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
        bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep);
        bool all_eqs_solved();
        bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep);
@ -115,8 +116,14 @@ namespace smt {
        // Regex membership
        bool check_regex_memberships();
        bool check_regex_mem(nseq_mem const& mem);
+        bool add_regex_length_axioms(nseq_mem const& mem);
+        bool synthesize_regex_string(expr* regex, unsigned len, zstring& result);
+        bool dfs_regex(expr* r, unsigned target_len, unsigned cur_len, zstring& result);
        bool is_ground_string(expr* e, zstring& s);
        expr_ref derive_regex(expr* regex, zstring const& prefix);
+        bool all_mems_checked();
+        expr* find_regex_for(expr* e);
+        expr* find_substr_regex(expr* base, unsigned pos, unsigned total_len, unsigned& cover_len);

        // String operation reductions
        void reduce_op(expr* e);
@ -129,6 +136,7 @@ namespace smt {

        // Length reasoning
        void add_length_axiom(expr* n);
+        expr_ref build_length_sum(expr_ref_vector const& es);
        bool check_zero_length();
        bool propagate_length_eqs();
        bool get_length(expr* e, rational& val);
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@ -129,6 +129,7 @@ add_executable(test-z3
  simplifier.cpp
  sls_test.cpp
  sls_seq_plugin.cpp
+  nseq_basic.cpp
  small_object_allocator.cpp
  smt2print_parse.cpp
  smt_context.cpp
--- a/src/test/main.cpp
+++ b/src/test/main.cpp
@ -284,6 +284,7 @@ int main(int argc, char ** argv) {
    TST(sls_test);
    TST(scoped_vector);
    TST(sls_seq_plugin);
+    TST(nseq_basic);
    TST(ho_matcher);
    TST(finite_set);
    TST(finite_set_rewriter);
--- a/src/test/nseq_basic.cpp
+++ b/src/test/nseq_basic.cpp
@ -0,0 +1,203 @@
+/*++
+Copyright (c) 2025 Microsoft Corporation
+
+Module Name:
+
+    nseq_basic.cpp
+
+Abstract:
+
+    Basic unit tests for the Nielsen-based string theory solver (theory_nseq).
+    Tests that theory_nseq can be selected via smt.string_solver=nseq and
+    correctly handles basic string constraints.
+
+Author:
+
+    Clemens Eisenhofer
+    Nikolaj Bjorner (nbjorner) 2025-2-28
+
+--*/
+
+#include "api/z3.h"
+#include <iostream>
+#include "util/util.h"
+
+// Helper: create a Z3 context with string_solver set to nseq
+static Z3_context mk_nseq_ctx() {
+    Z3_global_param_set("smt.string_solver", "nseq");
+    Z3_config cfg = Z3_mk_config();
+    Z3_context ctx = Z3_mk_context(cfg);
+    Z3_del_config(cfg);
+    Z3_global_param_reset_all();
+    return ctx;
+}
+
+// Test 1: Simple string constant equality (sat)
+static void test_nseq_simple_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast hello = Z3_mk_string(ctx, "hello");
+    Z3_ast eq = Z3_mk_eq(ctx, x, hello);
+    Z3_solver_assert(ctx, s, eq);
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 2: Simple string constant disequality (unsat)
+static void test_nseq_simple_unsat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast hello = Z3_mk_string(ctx, "hello");
+    Z3_ast world = Z3_mk_string(ctx, "world");
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, hello));
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, world));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_FALSE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 3: String concatenation (sat)
+static void test_nseq_concat_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast y = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "y"), str_sort);
+    Z3_ast hello = Z3_mk_string(ctx, "hello");
+    Z3_ast args[2] = { x, y };
+    Z3_ast xy = Z3_mk_seq_concat(ctx, 2, args);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, xy, hello));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 4: Prefix constraint (sat)
+static void test_nseq_prefix_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast pre = Z3_mk_string(ctx, "hel");
+    // prefix("hel", x)
+    Z3_ast prefix_cond = Z3_mk_seq_prefix(ctx, pre, x);
+    Z3_sort int_sort = Z3_mk_int_sort(ctx);
+    Z3_ast five = Z3_mk_int(ctx, 5, int_sort);
+    Z3_ast len_x = Z3_mk_seq_length(ctx, x);
+    Z3_solver_assert(ctx, s, prefix_cond);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, len_x, five));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 5: Empty string equality (sat)
+static void test_nseq_empty_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast empty = Z3_mk_string(ctx, "");
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, empty));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 6: Basic regex membership (sat)
+static void test_nseq_regex_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    // x = "abab" ∧ x ∈ (ab)*
+    Z3_ast abab = Z3_mk_string(ctx, "abab");
+    Z3_ast ab_re = Z3_mk_seq_to_re(ctx, Z3_mk_string(ctx, "ab"));
+    Z3_ast star_re = Z3_mk_re_star(ctx, ab_re);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, abab));
+    Z3_solver_assert(ctx, s, Z3_mk_seq_in_re(ctx, x, star_re));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 7: Basic regex membership (unsat)
+static void test_nseq_regex_unsat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    // x = "abc" ∧ x ∈ (ab)*  → unsat ("abc" is not in (ab)*)
+    Z3_ast abc = Z3_mk_string(ctx, "abc");
+    Z3_ast ab_re = Z3_mk_seq_to_re(ctx, Z3_mk_string(ctx, "ab"));
+    Z3_ast star_re = Z3_mk_re_star(ctx, ab_re);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, abc));
+    Z3_solver_assert(ctx, s, Z3_mk_seq_in_re(ctx, x, star_re));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_FALSE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 8: Length constraint (sat)
+static void test_nseq_length_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_sort int_sort = Z3_mk_int_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast len_x = Z3_mk_seq_length(ctx, x);
+    Z3_ast three = Z3_mk_int(ctx, 3, int_sort);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, len_x, three));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+void tst_nseq_basic() {
+    test_nseq_simple_sat();
+    test_nseq_simple_unsat();
+    test_nseq_concat_sat();
+    test_nseq_prefix_sat();
+    test_nseq_empty_sat();
+    test_nseq_regex_sat();
+    test_nseq_regex_unsat();
+    test_nseq_length_sat();
+    std::cout << "nseq_basic: all tests passed\n";
+}