From 9a9083384ffa58d537a7f1e17cf46c275a12941a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 28 Feb 2026 22:03:54 +0000
Subject: [PATCH 1/8] Initial plan


From 02db14c8c4db14b3153627c1d35e88acc16f7948 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 28 Feb 2026 22:33:47 +0000
Subject: [PATCH 2/8] Add nseq_basic unit tests for theory_nseq solver

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/test/CMakeLists.txt |   1 +
 src/test/main.cpp       |   1 +
 src/test/nseq_basic.cpp | 203 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 205 insertions(+)
 create mode 100644 src/test/nseq_basic.cpp
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index 1d5b5ce18..5a6f8b463 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -129,6 +129,7 @@ add_executable(test-z3
   simplifier.cpp
   sls_test.cpp
   sls_seq_plugin.cpp
+  nseq_basic.cpp
   small_object_allocator.cpp
   smt2print_parse.cpp
   smt_context.cpp
diff --git a/src/test/main.cpp b/src/test/main.cpp
index c5d55ebe1..7f11157e2 100644
--- a/src/test/main.cpp
+++ b/src/test/main.cpp
@@ -284,6 +284,7 @@ int main(int argc, char ** argv) {
     TST(sls_test);
     TST(scoped_vector);
     TST(sls_seq_plugin);
+    TST(nseq_basic);
     TST(ho_matcher);
     TST(finite_set);
     TST(finite_set_rewriter);
diff --git a/src/test/nseq_basic.cpp b/src/test/nseq_basic.cpp
new file mode 100644
index 000000000..bdde14289
--- /dev/null
+++ b/src/test/nseq_basic.cpp
@@ -0,0 +1,203 @@
+/*++
+Copyright (c) 2025 Microsoft Corporation
+
+Module Name:
+
+    nseq_basic.cpp
+
+Abstract:
+
+    Basic unit tests for the Nielsen-based string theory solver (theory_nseq).
+    Tests that theory_nseq can be selected via smt.string_solver=nseq and
+    correctly handles basic string constraints.
+
+Author:
+
+    Clemens Eisenhofer
+    Nikolaj Bjorner (nbjorner) 2025-2-28
+
+--*/
+
+#include "api/z3.h"
+#include <iostream>
+#include "util/util.h"
+
+// Helper: create a Z3 context with string_solver set to nseq
+static Z3_context mk_nseq_ctx() {
+    Z3_global_param_set("smt.string_solver", "nseq");
+    Z3_config cfg = Z3_mk_config();
+    Z3_context ctx = Z3_mk_context(cfg);
+    Z3_del_config(cfg);
+    Z3_global_param_reset_all();
+    return ctx;
+}
+
+// Test 1: Simple string constant equality (sat)
+static void test_nseq_simple_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast hello = Z3_mk_string(ctx, "hello");
+    Z3_ast eq = Z3_mk_eq(ctx, x, hello);
+    Z3_solver_assert(ctx, s, eq);
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 2: Simple string constant disequality (unsat)
+static void test_nseq_simple_unsat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast hello = Z3_mk_string(ctx, "hello");
+    Z3_ast world = Z3_mk_string(ctx, "world");
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, hello));
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, world));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_FALSE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 3: String concatenation (sat)
+static void test_nseq_concat_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast y = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "y"), str_sort);
+    Z3_ast hello = Z3_mk_string(ctx, "hello");
+    Z3_ast args[2] = { x, y };
+    Z3_ast xy = Z3_mk_seq_concat(ctx, 2, args);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, xy, hello));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 4: Prefix constraint (sat)
+static void test_nseq_prefix_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast pre = Z3_mk_string(ctx, "hel");
+    // prefix("hel", x)
+    Z3_ast prefix_cond = Z3_mk_seq_prefix(ctx, pre, x);
+    Z3_sort int_sort = Z3_mk_int_sort(ctx);
+    Z3_ast five = Z3_mk_int(ctx, 5, int_sort);
+    Z3_ast len_x = Z3_mk_seq_length(ctx, x);
+    Z3_solver_assert(ctx, s, prefix_cond);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, len_x, five));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 5: Empty string equality (sat)
+static void test_nseq_empty_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast empty = Z3_mk_string(ctx, "");
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, empty));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 6: Basic regex membership (sat)
+static void test_nseq_regex_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    // x = "abab" ∧ x ∈ (ab)*
+    Z3_ast abab = Z3_mk_string(ctx, "abab");
+    Z3_ast ab_re = Z3_mk_seq_to_re(ctx, Z3_mk_string(ctx, "ab"));
+    Z3_ast star_re = Z3_mk_re_star(ctx, ab_re);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, abab));
+    Z3_solver_assert(ctx, s, Z3_mk_seq_in_re(ctx, x, star_re));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 7: Basic regex membership (unsat)
+static void test_nseq_regex_unsat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    // x = "abc" ∧ x ∈ (ab)*  → unsat ("abc" is not in (ab)*)
+    Z3_ast abc = Z3_mk_string(ctx, "abc");
+    Z3_ast ab_re = Z3_mk_seq_to_re(ctx, Z3_mk_string(ctx, "ab"));
+    Z3_ast star_re = Z3_mk_re_star(ctx, ab_re);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, abc));
+    Z3_solver_assert(ctx, s, Z3_mk_seq_in_re(ctx, x, star_re));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_FALSE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+// Test 8: Length constraint (sat)
+static void test_nseq_length_sat() {
+    Z3_context ctx = mk_nseq_ctx();
+    Z3_solver s = Z3_mk_solver(ctx);
+    Z3_solver_inc_ref(ctx, s);
+
+    Z3_sort str_sort = Z3_mk_string_sort(ctx);
+    Z3_sort int_sort = Z3_mk_int_sort(ctx);
+    Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort);
+    Z3_ast len_x = Z3_mk_seq_length(ctx, x);
+    Z3_ast three = Z3_mk_int(ctx, 3, int_sort);
+    Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, len_x, three));
+    Z3_lbool r = Z3_solver_check(ctx, s);
+    ENSURE(r == Z3_L_TRUE);
+
+    Z3_solver_dec_ref(ctx, s);
+    Z3_del_context(ctx);
+}
+
+void tst_nseq_basic() {
+    test_nseq_simple_sat();
+    test_nseq_simple_unsat();
+    test_nseq_concat_sat();
+    test_nseq_prefix_sat();
+    test_nseq_empty_sat();
+    test_nseq_regex_sat();
+    test_nseq_regex_unsat();
+    test_nseq_length_sat();
+    std::cout << "nseq_basic: all tests passed\n";
+}

From a65b64ae6340b8aff9737f1b05709f4471cce637 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 28 Feb 2026 23:47:19 +0000
Subject: [PATCH 3/8] Fix soundness bug in theory_nseq: propagate_length_eqs
 and branch_eq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix two bugs that caused false unsat results in the nseq string solver
when word equations were combined with length constraints:

1. propagate_length_eqs was creating fresh mk_concat terms not in the
   SMT context, so ctx.e_internalized() always failed and the arithmetic
   solver never learned len(x)+len(y)+len(z)=6 from x·y·z="abcdef".
   Fix: new build_length_sum helper collapses concrete lengths to integer
   numerals; propagate_length_eqs adds the sum equality as a theory axiom
   that the arithmetic solver can immediately use.

2. branch_eq was branching variables as empty even when lower_bound > 0
   was already known from arithmetic constraints (e.g. len(x)=2).
   Fix: skip the empty branch when lower_bound(len_e, lo) && lo > 0.

3. Moved propagate_length_eqs before solve_eqs in final_check_eh so
   arithmetic bounds are available to guide branching decisions.

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/theory_nseq.cpp | 66 ++++++++++++++++++++++++++++++-----------
 src/smt/theory_nseq.h   |  1 +
 2 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp
index de33de38d..b1128317e 100644
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@@ -73,6 +73,11 @@ namespace smt {
         if (reduce_predicates())
             return FC_CONTINUE;
 
+        // Propagate length equalities from equations before solving,
+        // so arithmetic bounds are available to guide branching
+        if (propagate_length_eqs())
+            return FC_CONTINUE;
+
         // Check zero-length variables before solving (uses arithmetic bounds)
         if (check_zero_length())
             return FC_CONTINUE;
@@ -81,10 +86,6 @@ namespace smt {
         if (solve_eqs())
             return FC_CONTINUE;
 
-        // Propagate length equalities from equations
-        if (propagate_length_eqs())
-            return FC_CONTINUE;
-
         // Check regex membership constraints
         if (check_regex_memberships())
             return FC_CONTINUE;
@@ -741,6 +742,9 @@ namespace smt {
                 literal eq_empty = mk_eq(e, emp, false);
                 switch (ctx.get_assignment(eq_empty)) {
                 case l_undef:
+                    // If lower length bound is already > 0, skip the empty branch
+                    if (ctx.e_internalized(len_e) && lower_bound(len_e, lo) && lo > rational(0))
+                        break;
                     // Force the empty branch first
                     TRACE(seq, tout << "branch " << mk_bounded_pp(e, m) << " = \"\"\n";);
                     IF_VERBOSE(3, verbose_stream() << "  branch " << mk_bounded_pp(e, m) << " = \"\"\n";);
@@ -1526,25 +1530,53 @@ namespace smt {
         return progress;
     }
 
+    // Build an arithmetic expression for the sum of lengths of the given string terms.
+    // Collapses concrete lengths (units, string constants) to integer numerals,
+    // and uses mk_len(e) for variables/complex terms.
+    expr_ref theory_nseq::build_length_sum(expr_ref_vector const& es) {
+        rational total(0);
+        expr_ref_vector var_lens(m);
+        for (expr* e : es) {
+            zstring s;
+            if (m_util.str.is_string(e, s)) {
+                total += rational(s.length());
+            } else if (m_util.str.is_unit(e)) {
+                total += rational(1);
+            } else if (m_util.str.is_empty(e)) {
+                // contributes 0
+            } else {
+                ensure_length_axiom(e);
+                var_lens.push_back(mk_len(e));
+            }
+        }
+        if (var_lens.empty())
+            return expr_ref(m_autil.mk_numeral(total, true), m);
+        expr_ref var_sum(m);
+        if (var_lens.size() == 1)
+            var_sum = expr_ref(var_lens.get(0), m);
+        else
+            var_sum = expr_ref(m_autil.mk_add(var_lens.size(), var_lens.data()), m);
+        if (total.is_zero())
+            return var_sum;
+        return expr_ref(m_autil.mk_add(var_sum, m_autil.mk_numeral(total, true)), m);
+    }
+
     bool theory_nseq::propagate_length_eqs() {
         bool progress = false;
         for (auto const& eq : m_state.eqs()) {
             expr_ref_vector const& lhs = eq.lhs();
             expr_ref_vector const& rhs = eq.rhs();
-            if (lhs.empty() || rhs.empty())
+            // Build sum of lengths directly, using concrete integer values for
+            // units/strings to avoid creating fresh unregistered concat terms.
+            expr_ref lhs_sum = build_length_sum(lhs);
+            expr_ref rhs_sum = build_length_sum(rhs);
+            // Skip if both sums are syntactically identical (trivially equal)
+            if (lhs_sum.get() == rhs_sum.get())
                 continue;
-            sort* s = lhs[0]->get_sort();
-            expr_ref l = mk_concat(lhs, s);
-            expr_ref r = mk_concat(rhs, s);
-            expr_ref len_l = mk_len(l);
-            expr_ref len_r = mk_len(r);
-            if (ctx.e_internalized(len_l) && ctx.e_internalized(len_r)) {
-                enode* nl = ctx.get_enode(len_l);
-                enode* nr = ctx.get_enode(len_r);
-                if (nl->get_root() != nr->get_root()) {
-                    propagate_eq(eq.dep(), len_l, len_r, false);
-                    progress = true;
-                }
+            literal len_eq = mk_eq(lhs_sum, rhs_sum, false);
+            if (ctx.get_assignment(len_eq) != l_true) {
+                add_axiom(len_eq);
+                progress = true;
             }
         }
         return progress;
diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h
index 60f0502a0..7830c08b9 100644
--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@@ -129,6 +129,7 @@ namespace smt {
 
         // Length reasoning
         void add_length_axiom(expr* n);
+        expr_ref build_length_sum(expr_ref_vector const& es);
         bool check_zero_length();
         bool propagate_length_eqs();
         bool get_length(expr* e, rational& val);

From faeb936a8358c68177f796c5656133c71bb032f8 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 1 Mar 2026 02:25:29 +0000
Subject: [PATCH 4/8] Continue nseq: regex synthesis, model generation, length
 propagation

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/theory_nseq.cpp | 330 ++++++++++++++++++++++++++++++++++++----
 src/smt/theory_nseq.h   |   6 +
 2 files changed, 305 insertions(+), 31 deletions(-)

diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp
index b1128317e..368f5f310 100644
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@@ -94,8 +94,11 @@ namespace smt {
         if (check_disequalities())
             return FC_CONTINUE;
 
-        if (all_eqs_solved() && m_state.mems().empty())
-            return FC_DONE;
+        if (all_eqs_solved()) {
+            // All word equations solved; check if remaining memberships are handled
+            if (all_mems_checked())
+                return FC_DONE;
+        }
 
         return FC_GIVEUP;
     }
@@ -195,6 +198,12 @@ namespace smt {
             if (m_find.find(v1) != m_find.find(v2))
                 m_find.merge(v1, v2);
             m_state.add_eq(o1, o2, dep);
+            // Propagate length equality: len(o1) = len(o2)
+            ensure_length_axiom(o1);
+            ensure_length_axiom(o2);
+            expr_ref len1 = mk_len(o1);
+            expr_ref len2 = mk_len(o2);
+            add_axiom(mk_eq(len1, len2, false));
             IF_VERBOSE(3, verbose_stream() << "new_eq_eh: " << mk_bounded_pp(o1, m) << " = " << mk_bounded_pp(o2, m)
                   << " (total eqs: " << m_state.eqs().size() << ")\n");
             TRACE(seq, tout << "new eq: " << mk_bounded_pp(o1, m) << " = " << mk_bounded_pp(o2, m) << "\n";);
@@ -1397,6 +1406,84 @@ namespace smt {
         return r;
     }
 
+    // DFS: find a string of exactly target_len in regex r.
+    // On success, appends characters to 'result' and returns true.
+    // cur_len is how many characters have already been appended.
+    bool theory_nseq::dfs_regex(expr* r, unsigned target_len, unsigned cur_len, zstring& result) {
+        if (cur_len == target_len) {
+            expr_ref nullable = m_seq_rewrite.is_nullable(expr_ref(r, m));
+            m_rewrite(nullable);
+            return m.is_true(nullable);
+        }
+        // Try printable characters: lowercase, uppercase, digits, then common punctuation
+        static const unsigned char priority[] = {
+            'a','b','c','d','e','f','g','h','i','j','k','l','m',
+            'n','o','p','q','r','s','t','u','v','w','x','y','z',
+            '0','1','2','3','4','5','6','7','8','9',
+            'A','B','C','D','E','F','G','H','I','J','K','L','M',
+            'N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
+            ' ','.', ',', '!', '?', '-', '_', '@', '#', '$', '%', '&', '*'
+        };
+        for (unsigned c : priority) {
+            expr_ref ch(m_util.mk_char(c), m);
+            expr_ref dr = m_seq_rewrite.mk_derivative(ch, expr_ref(r, m));
+            m_rewrite(dr);
+            if (m_util.re.is_empty(dr))
+                continue;
+            result = result + zstring(c);
+            if (dfs_regex(dr, target_len, cur_len + 1, result))
+                return true;
+            // Backtrack: remove last character
+            result = result.extract(0, result.length() - 1);
+        }
+        return false;
+    }
+
+    // Synthesize a concrete string of exactly `len` accepted by `regex`.
+    // Returns true and sets `result` on success.
+    bool theory_nseq::synthesize_regex_string(expr* regex, unsigned len, zstring& result) {
+        result = zstring();
+        return dfs_regex(regex, len, 0, result);
+    }
+
+    // Add length-bound axioms from regex structure: len(s) >= min_length(r).
+    // Also adds: len(s) = 0 → nullable(r) (otherwise conflict).
+    bool theory_nseq::add_regex_length_axioms(nseq_mem const& mem) {
+        expr* s = mem.str();
+        expr* r = mem.regex();
+        bool sign = mem.sign();
+        nseq_dependency* dep = mem.dep();
+        bool progress = false;
+
+        if (sign) {
+            // s ∈ r → len(s) >= min_length(r)
+            unsigned minlen = m_util.re.min_length(r);
+            if (minlen > 0) {
+                expr_ref len_s = mk_len(s);
+                ensure_length_axiom(s);
+                literal lb = mk_literal(m_autil.mk_ge(len_s, m_autil.mk_int(minlen)));
+                if (ctx.get_assignment(lb) != l_true) {
+                    add_axiom(lb);
+                    progress = true;
+                }
+            }
+            // If r is not nullable and len(s) = 0, conflict
+            expr_ref nullable = m_seq_rewrite.is_nullable(expr_ref(r, m));
+            m_rewrite(nullable);
+            if (m.is_false(nullable)) {
+                // s ∈ r → s ≠ ""
+                expr_ref emp(m_util.str.mk_empty(s->get_sort()), m);
+                ensure_enode(emp);
+                literal s_empty = mk_eq(s, emp, false);
+                if (ctx.get_assignment(~s_empty) != l_true) {
+                    add_axiom(~s_empty);
+                    progress = true;
+                }
+            }
+        }
+        return progress;
+    }
+
     bool theory_nseq::check_regex_memberships() {
         bool progress = false;
         for (auto const& mem : m_state.mems()) {
@@ -1414,43 +1501,99 @@ namespace smt {
         bool sign = mem.sign();
         nseq_dependency* dep = mem.dep();
 
+        // Add structural length axioms from the regex
+        if (add_regex_length_axioms(mem))
+            return true;
+
         // Try to determine the string value from the e-graph
         zstring sval;
-        if (!is_ground_string(s, sval))
+        if (is_ground_string(s, sval)) {
+            // Compute derivatives for the known string
+            expr_ref derived = derive_regex(r, sval);
+
+            // Check nullable: does the derived regex accept the empty word?
+            expr_ref nullable = m_seq_rewrite.is_nullable(derived);
+            m_rewrite(nullable);
+
+            if (sign) {
+                // Positive membership: s must be in r
+                if (m.is_false(nullable) || m_util.re.is_empty(derived)) {
+                    set_conflict(dep);
+                    return true;
+                }
+            }
+            else {
+                // Negative membership: s must NOT be in r
+                if (m.is_true(nullable)) {
+                    set_conflict(dep);
+                    return true;
+                }
+            }
             return false;
+        }
 
-        // Compute derivatives for the known string
-        expr_ref derived = derive_regex(r, sval);
-
-        // Check nullable: does the derived regex accept the empty word?
-        expr_ref nullable = m_seq_rewrite.is_nullable(derived);
-        m_rewrite(nullable);
-
+        // String is not yet ground. For positive membership:
+        // if we know the exact length, synthesize a concrete string.
         if (sign) {
-            // Positive membership: s must be in r
-            if (m.is_false(nullable)) {
-                // String is fully determined but regex rejects → conflict
-                set_conflict(dep);
-                return true;
-            }
-        }
-        else {
-            // Negative membership: s must NOT be in r
-            if (m.is_true(nullable)) {
-                // String is fully determined and regex accepts → conflict
-                set_conflict(dep);
-                return true;
+            rational len_val;
+            if (get_length(s, len_val) && len_val.is_nonneg() && len_val.is_unsigned()) {
+                unsigned len = len_val.get_unsigned();
+                // Feasibility check only: detect conflicts but don't force a specific value.
+                // Model generation (mk_value) will synthesize the actual string.
+                zstring synth;
+                if (!synthesize_regex_string(r, len, synth)) {
+                    // No string of that length accepted by regex - conflict
+                    IF_VERBOSE(3, verbose_stream() << "  regex conflict: no string of len=" << len
+                               << " in " << mk_bounded_pp(r, m, 2) << "\n";);
+                    set_conflict(dep);
+                    return true;
+                }
+                IF_VERBOSE(3, verbose_stream() << "  regex feasible: len=" << len
+                           << " has witness \"" << synth << "\"\n";);
             }
         }
 
-        // Check if the derived regex is the empty set
-        if (sign && m_util.re.is_empty(derived)) {
-            set_conflict(dep);
-            return true;
-        }
         return false;
     }
 
+    // Returns true if every membership constraint is already verified
+    // (ground strings checked) or provably feasible (non-ground with known length).
+    bool theory_nseq::all_mems_checked() {
+        for (auto const& mem : m_state.mems()) {
+            expr* s = mem.str();
+            expr* r = mem.regex();
+            bool sign = mem.sign();
+            nseq_dependency* dep = mem.dep();
+            zstring sval;
+            if (is_ground_string(s, sval)) {
+                // Verify the ground string
+                expr_ref derived = derive_regex(r, sval);
+                expr_ref nullable = m_seq_rewrite.is_nullable(derived);
+                m_rewrite(nullable);
+                if (sign && !m.is_true(nullable))
+                    return false;
+                if (!sign && !m.is_false(nullable))
+                    return false;
+            }
+            else if (sign) {
+                // Non-ground positive membership: check feasibility by length
+                rational len_val;
+                if (get_length(s, len_val) && len_val.is_nonneg() && len_val.is_unsigned()) {
+                    zstring synth;
+                    if (!synthesize_regex_string(r, len_val.get_unsigned(), synth))
+                        return false;  // infeasible; conflict should have been set by check_regex_mem
+                    // Feasible - model generator will assign the right value
+                }
+                else {
+                    // Length unknown and regex is empty: unconditionally infeasible
+                    if (m_util.re.is_empty(r))
+                        return false;
+                }
+            }
+        }
+        return true;
+    }
+
     // -------------------------------------------------------
     // Length reasoning
     // -------------------------------------------------------
@@ -1495,7 +1638,34 @@ namespace smt {
 
     bool theory_nseq::get_length(expr* e, rational& val) {
         expr_ref len = mk_len(e);
-        return m_arith_value.get_value(len, val);
+        if (m_arith_value.get_value(len, val))
+            return true;
+        // Structural fallback: for str.substr(s, off, n) with concrete off and n,
+        // use the explicit length argument if it's a numeral and s has sufficient length.
+        expr* s = nullptr, *offset = nullptr, *len_arg = nullptr;
+        if (m_util.str.is_extract(e, s, offset, len_arg)) {
+            rational len_n;
+            if (m_autil.is_numeral(len_arg, len_n) && len_n.is_nonneg()) {
+                rational off_n;
+                if (m_autil.is_numeral(offset, off_n) && off_n.is_nonneg()) {
+                    rational parent_len;
+                    if (get_length(s, parent_len) &&
+                        off_n + len_n <= parent_len) {
+                        val = len_n;
+                        return true;
+                    }
+                }
+            }
+        }
+        // Fallback via lower/upper bound: if lo == hi, that's the value
+        rational lo, hi;
+        bool s_lo, s_hi;
+        if (m_arith_value.get_lo(len, lo, s_lo) && !s_lo &&
+            m_arith_value.get_up(len, hi, s_hi) && !s_hi && lo == hi) {
+            val = lo;
+            return true;
+        }
+        return false;
     }
 
     bool theory_nseq::lower_bound(expr* e, rational& lo) {
@@ -1603,8 +1773,60 @@ namespace smt {
         st.update("nseq axioms", s.m_num_axioms);
     }
 
+    // Find the most constrained regex for a given string expression
+    // by scanning memberships that apply to e or its equivalence class root.
+    expr* theory_nseq::find_regex_for(expr* e) {
+        if (!ctx.e_internalized(e))
+            return nullptr;
+        enode* root = ctx.get_enode(e)->get_root();
+        for (auto const& mem : m_state.mems()) {
+            if (!mem.sign()) continue;
+            expr* ms = mem.str();
+            if (!ctx.e_internalized(ms)) continue;
+            if (ctx.get_enode(ms)->get_root() == root)
+                return mem.regex();
+        }
+        return nullptr;
+    }
+
+    // Find any positive membership for a string expression whose base
+    // is `base` and which starts at position `pos`. Returns the regex
+    // and sets `cover_len` to how many characters it covers.
+    // Used to synthesize sub-range values during model generation.
+    expr* theory_nseq::find_substr_regex(expr* base, unsigned pos,
+                                          unsigned total_len, unsigned& cover_len) {
+        if (!ctx.e_internalized(base))
+            return nullptr;
+        enode* base_root = ctx.get_enode(base)->get_root();
+        for (auto const& mem : m_state.mems()) {
+            if (!mem.sign()) continue;
+            expr* ms = mem.str();
+            if (!ctx.e_internalized(ms)) continue;
+            // Check if ms is str.substr(base, pos, k) for some k
+            expr* s = nullptr, *off = nullptr, *len_arg = nullptr;
+            if (!m_util.str.is_extract(ms, s, off, len_arg)) continue;
+            if (!ctx.e_internalized(s)) continue;
+            if (ctx.get_enode(s)->get_root() != base_root) continue;
+            // Check offset matches
+            rational off_v;
+            if (!m_autil.is_numeral(off, off_v) || !off_v.is_nonneg() ||
+                !off_v.is_unsigned() || off_v.get_unsigned() != pos) continue;
+            // Get the length (may be symbolic, use get_length)
+            rational len_v;
+            if (m_autil.is_numeral(len_arg, len_v) && len_v.is_nonneg() && len_v.is_unsigned()) {
+                cover_len = len_v.get_unsigned();
+                return mem.regex();
+            }
+            if (get_length(ms, len_v) && len_v.is_nonneg() && len_v.is_unsigned()) {
+                cover_len = len_v.get_unsigned();
+                return mem.regex();
+            }
+        }
+        return nullptr;
+    }
+
     // -------------------------------------------------------
-    // Model generation (stub)
+    // Model generation
     // -------------------------------------------------------
 
     model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) {
@@ -1636,11 +1858,57 @@ namespace smt {
                 curr = curr->get_next();
             } while (curr != root);
 
-            // Try to construct a string of the correct length
+            // Try to construct a string satisfying any regex membership constraint
             rational len_val;
             if (get_length(e, len_val) && len_val.is_nonneg() && len_val.is_unsigned()) {
                 unsigned len = len_val.get_unsigned();
+                // Check if there's a direct regex constraint for this string
+                expr* regex = find_regex_for(e);
+                if (regex) {
+                    zstring synth;
+                    if (synthesize_regex_string(regex, len, synth)) {
+                        expr_ref val(m_util.str.mk_string(synth), m);
+                        m_fresh_values.push_back(val);
+                        return alloc(expr_wrapper_proc, to_app(val));
+                    }
+                }
+                // Try to synthesize character by character from str.at(e, i) memberships.
+                // Preprocessors often split x ∈ r into (str.at x 0) ∈ r0, (str.substr x 1 k) ∈ r1, etc.
                 zstring s;
+                bool from_chars = true;
+                unsigned pos = 0;
+                while (pos < len && from_chars) {
+                    // Option A: there's a membership for str.at(e, pos) (length-1 char)
+                    expr_ref at_i(m_util.str.mk_at(e, m_autil.mk_int(pos)), m);
+                    expr* r = find_regex_for(at_i);
+                    if (r) {
+                        zstring ch;
+                        if (synthesize_regex_string(r, 1, ch)) {
+                            s = s + ch;
+                            pos++;
+                            continue;
+                        }
+                    }
+                    // Option B: find a membership for str.substr(e, pos, k) and synthesize k chars
+                    unsigned cover_len = 0;
+                    expr* rs = find_substr_regex(e, pos, len, cover_len);
+                    if (rs && cover_len > 0) {
+                        zstring piece;
+                        if (synthesize_regex_string(rs, cover_len, piece)) {
+                            s = s + piece;
+                            pos += cover_len;
+                            continue;
+                        }
+                    }
+                    from_chars = false;
+                }
+                if (from_chars && s.length() == len) {
+                    expr_ref val(m_util.str.mk_string(s), m);
+                    m_fresh_values.push_back(val);
+                    return alloc(expr_wrapper_proc, to_app(val));
+                }
+                // No regex constraint (or synthesis failed): use default fill
+                s = zstring();
                 for (unsigned i = 0; i < len; i++)
                     s = s + zstring("A");
                 expr_ref val(m_util.str.mk_string(s), m);
diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h
index 7830c08b9..72a2b29cb 100644
--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@@ -115,8 +115,14 @@ namespace smt {
         // Regex membership
         bool check_regex_memberships();
         bool check_regex_mem(nseq_mem const& mem);
+        bool add_regex_length_axioms(nseq_mem const& mem);
+        bool synthesize_regex_string(expr* regex, unsigned len, zstring& result);
+        bool dfs_regex(expr* r, unsigned target_len, unsigned cur_len, zstring& result);
         bool is_ground_string(expr* e, zstring& s);
         expr_ref derive_regex(expr* regex, zstring const& prefix);
+        bool all_mems_checked();
+        expr* find_regex_for(expr* e);
+        expr* find_substr_regex(expr* base, unsigned pos, unsigned total_len, unsigned& cover_len);
 
         // String operation reductions
         void reduce_op(expr* e);

From 754892909b2bb66e1702816532ae2a5892fd9158 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 1 Mar 2026 13:41:12 +0000
Subject: [PATCH 5/8] Fix var-vs-var Nielsen split and final_check_eh round
 structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- split_variable: var-vs-var now uses length-guided branching via SAT
  instead of incorrect circular mk_tail(var,1) split. With known lengths,
  deterministically splits: unify if equal, mk_post(head,len_var) if var<head,
  mk_post(var,len_head) if var>head. With unknown lengths, branches via
  arithmetic SAT literal (|var| ≤ |head|).
- final_check_eh: process pending axioms then fall through to solve_eqs in
  the same round. This ensures force_phase is set on branching variables
  before the SAT solver makes any phase decisions. propagate_length_eqs and
  check_zero_length no longer short-circuit with FC_CONTINUE.

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/theory_nseq.cpp | 143 +++++++++++++++++++++++++++++++++-------
 src/smt/theory_nseq.h   |   3 +-
 2 files changed, 121 insertions(+), 25 deletions(-)

diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp
index 368f5f310..66b31c2b1 100644
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@@ -66,26 +66,34 @@ namespace smt {
             for (unsigned i = head; i < axioms.size(); ++i)
                 deque_axiom(axioms[i]);
             m_state.set_axioms_head(axioms.size());
-            return FC_CONTINUE;
+            if (ctx.inconsistent())
+                return FC_CONTINUE;
+            // Fall through: call solve_eqs in the same round so force_phase is set
+            // before the SAT solver makes random phase decisions.
         }
 
         // Reduce predicates (prefix, suffix, contains) to word equations
         if (reduce_predicates())
             return FC_CONTINUE;
 
-        // Propagate length equalities from equations before solving,
-        // so arithmetic bounds are available to guide branching
-        if (propagate_length_eqs())
+        // Propagate length equalities and zero-length variables.
+        // Don't short-circuit: continue to solve_eqs so force_phase is set on
+        // branching variables before the SAT solver makes its next decision.
+        propagate_length_eqs();
+        if (ctx.inconsistent())
             return FC_CONTINUE;
-
-        // Check zero-length variables before solving (uses arithmetic bounds)
-        if (check_zero_length())
+        check_zero_length();
+        if (ctx.inconsistent())
             return FC_CONTINUE;
 
         // Solve word equations using Nielsen transformations
         if (solve_eqs())
             return FC_CONTINUE;
 
+        // After a successful solve pass, pick up any freshly-added length axioms
+        if (m_state.has_pending_axioms())
+            return FC_CONTINUE;
+
         // Check regex membership constraints
         if (check_regex_memberships())
             return FC_CONTINUE;
@@ -866,7 +874,10 @@ namespace smt {
     }
 
     // -------------------------------------------------------
-    // Nielsen splitting: x · α = c · β → x = c · x_tail
+    // Nielsen splitting
+    // -------------------------------------------------------
+    // var-vs-unit: x · α = c · β → x = c · x_tail (peel one char)
+    // var-vs-var:  x · α = y · β → determine |x| vs |y| and split
     // -------------------------------------------------------
 
     bool theory_nseq::split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs,
@@ -874,7 +885,7 @@ namespace smt {
         if (lhs.empty() || rhs.empty())
             return false;
 
-        // Find: variable on one side facing a unit/constant on the other
+        // Find: variable on one side facing a non-variable on the other
         expr* var = nullptr;
         expr* head = nullptr;
         bool var_on_left = false;
@@ -889,11 +900,13 @@ namespace smt {
             head = lhs[0];
             var_on_left = false;
         }
-        // Also handle var vs var: pick either and peel one character
+        // var vs var case
+        bool both_var = false;
         if (!var && !head && m_nielsen.is_var(lhs[0]) && m_nielsen.is_var(rhs[0]) && lhs[0] != rhs[0]) {
             var = lhs[0];
             head = rhs[0];
             var_on_left = true;
+            both_var = true;
         }
         if (!var || !head)
             return false;
@@ -910,31 +923,115 @@ namespace smt {
         if (ctx.get_assignment(eq_empty) != l_false)
             return false;
 
+        // ---- var-vs-var: length-guided split ----
+        if (both_var) {
+            expr_ref len_var = mk_len(var);
+            expr_ref len_head = mk_len(head);
+            rational var_len, head_len;
+            bool has_var_len  = get_length(var,  var_len);
+            bool has_head_len = get_length(head, head_len);
+
+            if (has_var_len && has_head_len) {
+                if (var_len == head_len) {
+                    // Same length → must be equal
+                    IF_VERBOSE(3, verbose_stream() << "  var-var-split: |" << mk_bounded_pp(var, m, 1)
+                               << "|=|" << mk_bounded_pp(head, m, 1) << "| → unify\n";);
+                    bool changed = propagate_eq(dep, var, head);
+                    if (changed) m_state.stats().m_num_splits++;
+                    return changed;
+                }
+                else if (var_len < head_len) {
+                    // head = var · z_rest, |z_rest| = head_len - var_len
+                    IF_VERBOSE(3, verbose_stream() << "  var-var-split: |" << mk_bounded_pp(var, m, 1)
+                               << "| < |" << mk_bounded_pp(head, m, 1) << "| → split head\n";);
+                    expr_ref z(m_sk.mk_post(head, len_var), m);
+                    ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                    expr_ref rhs_eq(m_util.str.mk_concat(var, z), m);
+                    ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
+                    bool changed = propagate_eq(dep, head, rhs_eq);
+                    if (changed) m_state.stats().m_num_splits++;
+                    return changed;
+                }
+                else {
+                    // var = head · z_rest, |z_rest| = var_len - head_len
+                    IF_VERBOSE(3, verbose_stream() << "  var-var-split: |" << mk_bounded_pp(var, m, 1)
+                               << "| > |" << mk_bounded_pp(head, m, 1) << "| → split var\n";);
+                    expr_ref z(m_sk.mk_post(var, len_head), m);
+                    ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                    expr_ref rhs_eq(m_util.str.mk_concat(head, z), m);
+                    ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
+                    bool changed = propagate_eq(dep, var, rhs_eq);
+                    if (changed) m_state.stats().m_num_splits++;
+                    return changed;
+                }
+            }
+
+            // Lengths not fully determined: branch on |var| vs |head| via SAT
+            // First try: |var| ≤ |head| (use as a SAT decision)
+            literal len_le = mk_literal(
+                m_autil.mk_ge(m_autil.mk_sub(len_head, len_var), m_autil.mk_int(0)));
+            switch (ctx.get_assignment(len_le)) {
+            case l_undef:
+                IF_VERBOSE(3, verbose_stream() << "  var-var-split: branching |"
+                           << mk_bounded_pp(var, m, 1) << "| ≤ |" << mk_bounded_pp(head, m, 1) << "|\n";);
+                ctx.force_phase(len_le);
+                ctx.mark_as_relevant(len_le);
+                m_new_propagation = true;
+                m_state.stats().m_num_splits++;
+                return true;
+            case l_true: {
+                // |var| ≤ |head|: check if equal
+                literal len_eq_lit = mk_eq(len_var, len_head, false);
+                switch (ctx.get_assignment(len_eq_lit)) {
+                case l_undef:
+                    ctx.force_phase(len_eq_lit);
+                    ctx.mark_as_relevant(len_eq_lit);
+                    m_new_propagation = true;
+                    return true;
+                case l_true:
+                    return propagate_eq(dep, var, head);
+                case l_false: {
+                    // |var| < |head|: head = var · z_rest
+                    expr_ref z(m_sk.mk_post(head, len_var), m);
+                    ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                    expr_ref rhs_eq(m_util.str.mk_concat(var, z), m);
+                    ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
+                    bool changed = propagate_eq(dep, head, rhs_eq);
+                    if (changed) m_state.stats().m_num_splits++;
+                    return changed;
+                }
+                }
+                break;
+            }
+            case l_false: {
+                // |var| > |head|: var = head · z_rest
+                expr_ref z(m_sk.mk_post(var, len_head), m);
+                ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                expr_ref rhs_eq(m_util.str.mk_concat(head, z), m);
+                ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
+                bool changed = propagate_eq(dep, var, rhs_eq);
+                if (changed) m_state.stats().m_num_splits++;
+                return changed;
+            }
+            }
+            return false;
+        }
+
+        // ---- var-vs-unit/constant: peel one character ----
         // For multi-character string constants, extract only the first character
-        // so we peel one character at a time (Nielsen style)
         zstring head_str;
         if (m_util.str.is_string(head, head_str) && head_str.length() > 1) {
             head = m_util.str.mk_unit(m_util.mk_char(head_str[0]));
         }
 
-        // var is non-empty and the equation forces var to start with head.
-        // Nielsen split: var = head · var_tail
+        // var is non-empty and the equation forces var to start with head (a single char).
+        // Nielsen split: var = head · var_tail  where var_tail = seq.tail(var, 1)
         expr_ref one(m_autil.mk_int(1), m);
         expr_ref var_tail(m_sk.mk_tail(var, one), m);
         ensure_enode(var_tail);
         mk_var(ctx.get_enode(var_tail));
         ensure_length_axiom(var_tail);
 
-        // Before committing to the split, ensure the tail's emptiness is decided.
-        expr_ref tail_emp(m_util.str.mk_empty(var_tail->get_sort()), m);
-        literal tail_eq_empty = mk_eq(var_tail, tail_emp, false);
-        if (ctx.get_assignment(tail_eq_empty) == l_undef) {
-            ctx.force_phase(tail_eq_empty);
-            ctx.mark_as_relevant(tail_eq_empty);
-            m_new_propagation = true;
-            return true;
-        }
-
         expr_ref split_rhs(m_util.str.mk_concat(head, var_tail), m);
         ensure_enode(split_rhs);
         ensure_length_axiom(split_rhs);
diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h
index 72a2b29cb..ff044ccad 100644
--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@@ -96,8 +96,7 @@ namespace smt {
         bool branch_eq(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
         bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
         bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep);
-        bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
-        bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep);
+        bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);        bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep);
         bool all_eqs_solved();
         bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep);
 

From 7c06ca18fffebc4f1d972079e2c2d43234792ef6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 1 Mar 2026 13:41:58 +0000
Subject: [PATCH 6/8] Fix formatting: split_variable and canonize on separate
 lines in header

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/theory_nseq.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h
index ff044ccad..72a2b29cb 100644
--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@@ -96,7 +96,8 @@ namespace smt {
         bool branch_eq(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
         bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
         bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep);
-        bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);        bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep);
+        bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
+        bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep);
         bool all_eqs_solved();
         bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep);
 

From b59db298688abf78334765fe72541bf2b3ec0ac6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 1 Mar 2026 20:32:13 +0000
Subject: [PATCH 7/8] Add propagate_ground_split: directly propagate var values
 when ground side is fully known
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When one side of an equation is a pure ground string and the other side
has variables with fully known lengths, directly propagate each variable's
value as a substring without going through character-by-character splitting.
E.g. x·y = "abcdef" with len(x)=2, len(y)=4 immediately gives x="ab", y="cdef".

Also reverted the add_theory_aware_branching_info change (caused cross-context
phase pollution across push/pop). Kept force_phase in branch_eq.

Known issue: T08 (x·y = y·x, x≠y, len>0) hangs when run after T01+T06 in the
same Z3 session due to phase-cache pollution across push/pop for the same variable
names. Works correctly in isolation and with distinct variable names.

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/theory_nseq.cpp | 63 ++++++++++++++++++++++++++++++++++++++++-
 src/smt/theory_nseq.h   |  1 +
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp
index 66b31c2b1..3a3beeb5f 100644
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@@ -718,6 +718,11 @@ namespace smt {
             break;
         }
 
+        // Optimization: if one side is a pure ground string and the other side
+        // has variables with fully-known lengths, directly propagate substr values.
+        if (propagate_ground_split(lhs, rhs, dep))
+            return true;
+
         // Try branching: find a variable and decide x = "" or x ≠ ""
         if (branch_eq(lhs, rhs, dep))
             return true;
@@ -762,7 +767,7 @@ namespace smt {
                     // If lower length bound is already > 0, skip the empty branch
                     if (ctx.e_internalized(len_e) && lower_bound(len_e, lo) && lo > rational(0))
                         break;
-                    // Force the empty branch first
+                    // Force the empty branch first.
                     TRACE(seq, tout << "branch " << mk_bounded_pp(e, m) << " = \"\"\n";);
                     IF_VERBOSE(3, verbose_stream() << "  branch " << mk_bounded_pp(e, m) << " = \"\"\n";);
                     ctx.force_phase(eq_empty);
@@ -1194,6 +1199,62 @@ namespace smt {
         return progress;
     }
 
+    // If one side is a pure ground string and the other side has variables with
+    // fully known lengths, directly propagate each variable's value as a substr.
+    // E.g. x·y = "abcdef" with len(x)=2, len(y)=4 → x="ab", y="cdef"
+    bool theory_nseq::propagate_ground_split(expr_ref_vector const& lhs,
+                                              expr_ref_vector const& rhs,
+                                              nseq_dependency* dep) {
+        // Determine which side is pure-variable and which is pure-ground
+        auto has_var = [&](expr_ref_vector const& v) {
+            for (expr* e : v) if (m_nielsen.is_var(e)) return true;
+            return false;
+        };
+        auto all_var = [&](expr_ref_vector const& v) {
+            for (expr* e : v) if (!m_nielsen.is_var(e)) return false;
+            return true;
+        };
+        expr_ref_vector const* ground_side = nullptr;
+        expr_ref_vector const* var_side = nullptr;
+        if (!has_var(lhs) && all_var(rhs) && !rhs.empty()) {
+            ground_side = &lhs; var_side = &rhs;
+        }
+        else if (!has_var(rhs) && all_var(lhs) && !lhs.empty()) {
+            ground_side = &rhs; var_side = &lhs;
+        }
+        if (!ground_side)
+            return false;
+
+        // Build the ground string
+        zstring ground;
+        {
+            sort* s = var_side->get(0)->get_sort();
+            expr_ref gc = mk_concat(*ground_side, s);
+            if (!is_ground_string(gc, ground))
+                return false;
+        }
+
+        // Collect vars and their known lengths
+        unsigned offset = 0;
+        bool changed = false;
+        for (expr* e : *var_side) {
+            rational len_v;
+            if (!get_length(e, len_v) || !len_v.is_nonneg() || !len_v.is_unsigned())
+                return false; // length unknown, can't propagate
+            unsigned len = len_v.get_unsigned();
+            if (offset + len > ground.length())
+                return false; // length conflict, let solve_eq handle it
+            zstring substr = ground.extract(offset, len);
+            expr_ref val(m_util.str.mk_string(substr), m);
+            ensure_enode(val);
+            changed |= propagate_eq(dep, e, val);
+            offset += len;
+        }
+        if (offset != ground.length())
+            return false; // leftover ground chars, conflict
+        return changed;
+    }
+
     bool theory_nseq::check_diseq(nseq_ne const& ne) {
         expr* l = ne.l();
         expr* r = ne.r();
diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h
index 72a2b29cb..e77874ffd 100644
--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@@ -97,6 +97,7 @@ namespace smt {
         bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
         bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep);
         bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
+        bool propagate_ground_split(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
         bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep);
         bool all_eqs_solved();
         bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep);

From 5c56e5fb0a56436c974e7a0cf771a713b04a41f2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 1 Mar 2026 20:43:18 +0000
Subject: [PATCH 8/8] Fix nseq_state axiom scope leak and add
 propagate_ground_split

Bug fix: m_axioms and m_axiom_set were not restored on pop_scope,
causing axioms from a previous push/pop context to leak into
subsequent contexts with the same variable names, producing
non-termination in incremental solving (push/check-sat/pop sequences).

Fix: track m_axioms_size_at_push on the trail stack and truncate
m_axioms + remove corresponding m_axiom_set entries on pop.

Optimization: propagate_ground_split directly sets variable values when
one side is a pure ground string and all variables have known lengths.

Optimization: explicit len(z) = len(head) - len(var) axioms for
Skolem terms in var-vs-var splits to help arithmetic termination.

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/smt/nseq_state.cpp  | 10 ++++++++++
 src/smt/nseq_state.h    |  1 +
 src/smt/theory_nseq.cpp | 17 +++++++++++++----
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/smt/nseq_state.cpp b/src/smt/nseq_state.cpp
index 2767938a7..d696ebe27 100644
--- a/src/smt/nseq_state.cpp
+++ b/src/smt/nseq_state.cpp
@@ -38,6 +38,9 @@ namespace smt {
         m_trail.push_scope();
         m_trail.push(value_trail<unsigned>(m_axioms_head));
         m_trail.push(value_trail<unsigned>(m_preds_head));
+        // Save axiom vector size so we can truncate it on pop
+        m_trail.push(value_trail<unsigned>(m_axioms_size_at_push));
+        m_axioms_size_at_push = m_axioms.size();
         m_eqs.push_scope();
         m_neqs.push_scope();
         m_mems.push_scope();
@@ -45,7 +48,14 @@ namespace smt {
     }
 
     void nseq_state::pop_scope(unsigned num_scopes) {
+        // m_trail.pop_scope will restore m_axioms_size_at_push to the value saved
+        // at the outermost of the popped scopes (i.e., the correct target size).
         m_trail.pop_scope(num_scopes);
+        // Now m_axioms_size_at_push is the size we should truncate back to.
+        unsigned target = m_axioms_size_at_push;
+        for (unsigned i = target; i < m_axioms.size(); ++i)
+            m_axiom_set.remove(m_axioms.get(i));
+        m_axioms.shrink(target);
         m_dm.pop_scope(num_scopes);
         m_eqs.pop_scope(num_scopes);
         m_neqs.pop_scope(num_scopes);
diff --git a/src/smt/nseq_state.h b/src/smt/nseq_state.h
index d5201d9ee..a63f046ce 100644
--- a/src/smt/nseq_state.h
+++ b/src/smt/nseq_state.h
@@ -68,6 +68,7 @@ namespace smt {
         expr_ref_vector          m_axioms;
         obj_hashtable<expr>      m_axiom_set;
         unsigned                 m_axioms_head;
+        unsigned                 m_axioms_size_at_push { 0 }; // saved on push, restored on pop
 
         // Length tracking
         obj_hashtable<expr>      m_has_length;
diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp
index 3a3beeb5f..faf20df23 100644
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@@ -68,8 +68,6 @@ namespace smt {
             m_state.set_axioms_head(axioms.size());
             if (ctx.inconsistent())
                 return FC_CONTINUE;
-            // Fall through: call solve_eqs in the same round so force_phase is set
-            // before the SAT solver makes random phase decisions.
         }
 
         // Reduce predicates (prefix, suffix, contains) to word equations
@@ -996,9 +994,15 @@ namespace smt {
                 case l_true:
                     return propagate_eq(dep, var, head);
                 case l_false: {
-                    // |var| < |head|: head = var · z_rest
+                    // |var| < |head|: head = var · z_rest, |z_rest| = |head| - |var|
                     expr_ref z(m_sk.mk_post(head, len_var), m);
                     ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                    // Add len(z) = len(head) - len(var) as an explicit axiom so arithmetic
+                    // knows the exact length and terminates the split chain.
+                    expr_ref len_z = mk_len(z);
+                    expr_ref len_diff(m_autil.mk_sub(len_head, len_var), m);
+                    ensure_enode(len_z); ensure_enode(len_diff);
+                    add_axiom(mk_eq(len_z, len_diff, false));
                     expr_ref rhs_eq(m_util.str.mk_concat(var, z), m);
                     ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
                     bool changed = propagate_eq(dep, head, rhs_eq);
@@ -1009,9 +1013,14 @@ namespace smt {
                 break;
             }
             case l_false: {
-                // |var| > |head|: var = head · z_rest
+                // |var| > |head|: var = head · z_rest, |z_rest| = |var| - |head|
                 expr_ref z(m_sk.mk_post(var, len_head), m);
                 ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z);
+                // Add len(z) = len(var) - len(head) as an explicit axiom.
+                expr_ref len_z = mk_len(z);
+                expr_ref len_diff(m_autil.mk_sub(len_var, len_head), m);
+                ensure_enode(len_z); ensure_enode(len_diff);
+                add_axiom(mk_eq(len_z, len_diff, false));
                 expr_ref rhs_eq(m_util.str.mk_concat(head, z), m);
                 ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq);
                 bool changed = propagate_eq(dep, var, rhs_eq);