From 9a9083384ffa58d537a7f1e17cf46c275a12941a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 22:03:54 +0000 Subject: [PATCH 1/8] Initial plan From 02db14c8c4db14b3153627c1d35e88acc16f7948 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 22:33:47 +0000 Subject: [PATCH 2/8] Add nseq_basic unit tests for theory_nseq solver Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/test/CMakeLists.txt | 1 + src/test/main.cpp | 1 + src/test/nseq_basic.cpp | 203 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 205 insertions(+) create mode 100644 src/test/nseq_basic.cpp diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 1d5b5ce18..5a6f8b463 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -129,6 +129,7 @@ add_executable(test-z3 simplifier.cpp sls_test.cpp sls_seq_plugin.cpp + nseq_basic.cpp small_object_allocator.cpp smt2print_parse.cpp smt_context.cpp diff --git a/src/test/main.cpp b/src/test/main.cpp index c5d55ebe1..7f11157e2 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -284,6 +284,7 @@ int main(int argc, char ** argv) { TST(sls_test); TST(scoped_vector); TST(sls_seq_plugin); + TST(nseq_basic); TST(ho_matcher); TST(finite_set); TST(finite_set_rewriter); diff --git a/src/test/nseq_basic.cpp b/src/test/nseq_basic.cpp new file mode 100644 index 000000000..bdde14289 --- /dev/null +++ b/src/test/nseq_basic.cpp @@ -0,0 +1,203 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + nseq_basic.cpp + +Abstract: + + Basic unit tests for the Nielsen-based string theory solver (theory_nseq). + Tests that theory_nseq can be selected via smt.string_solver=nseq and + correctly handles basic string constraints. + +Author: + + Clemens Eisenhofer + Nikolaj Bjorner (nbjorner) 2025-2-28 + +--*/ + +#include "api/z3.h" +#include +#include "util/util.h" + +// Helper: create a Z3 context with string_solver set to nseq +static Z3_context mk_nseq_ctx() { + Z3_global_param_set("smt.string_solver", "nseq"); + Z3_config cfg = Z3_mk_config(); + Z3_context ctx = Z3_mk_context(cfg); + Z3_del_config(cfg); + Z3_global_param_reset_all(); + return ctx; +} + +// Test 1: Simple string constant equality (sat) +static void test_nseq_simple_sat() { + Z3_context ctx = mk_nseq_ctx(); + Z3_solver s = Z3_mk_solver(ctx); + Z3_solver_inc_ref(ctx, s); + + Z3_sort str_sort = Z3_mk_string_sort(ctx); + Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort); + Z3_ast hello = Z3_mk_string(ctx, "hello"); + Z3_ast eq = Z3_mk_eq(ctx, x, hello); + Z3_solver_assert(ctx, s, eq); + Z3_lbool r = Z3_solver_check(ctx, s); + ENSURE(r == Z3_L_TRUE); + + Z3_solver_dec_ref(ctx, s); + Z3_del_context(ctx); +} + +// Test 2: Simple string constant disequality (unsat) +static void test_nseq_simple_unsat() { + Z3_context ctx = mk_nseq_ctx(); + Z3_solver s = Z3_mk_solver(ctx); + Z3_solver_inc_ref(ctx, s); + + Z3_sort str_sort = Z3_mk_string_sort(ctx); + Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort); + Z3_ast hello = Z3_mk_string(ctx, "hello"); + Z3_ast world = Z3_mk_string(ctx, "world"); + Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, hello)); + Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, world)); + Z3_lbool r = Z3_solver_check(ctx, s); + ENSURE(r == Z3_L_FALSE); + + Z3_solver_dec_ref(ctx, s); + Z3_del_context(ctx); +} + +// Test 3: String concatenation (sat) +static void test_nseq_concat_sat() { + Z3_context ctx = mk_nseq_ctx(); + Z3_solver s = Z3_mk_solver(ctx); + Z3_solver_inc_ref(ctx, s); + + Z3_sort str_sort = Z3_mk_string_sort(ctx); + Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort); + Z3_ast y = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "y"), str_sort); + Z3_ast hello = Z3_mk_string(ctx, "hello"); + Z3_ast args[2] = { x, y }; + Z3_ast xy = Z3_mk_seq_concat(ctx, 2, args); + Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, xy, hello)); + Z3_lbool r = Z3_solver_check(ctx, s); + ENSURE(r == Z3_L_TRUE); + + Z3_solver_dec_ref(ctx, s); + Z3_del_context(ctx); +} + +// Test 4: Prefix constraint (sat) +static void test_nseq_prefix_sat() { + Z3_context ctx = mk_nseq_ctx(); + Z3_solver s = Z3_mk_solver(ctx); + Z3_solver_inc_ref(ctx, s); + + Z3_sort str_sort = Z3_mk_string_sort(ctx); + Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort); + Z3_ast pre = Z3_mk_string(ctx, "hel"); + // prefix("hel", x) + Z3_ast prefix_cond = Z3_mk_seq_prefix(ctx, pre, x); + Z3_sort int_sort = Z3_mk_int_sort(ctx); + Z3_ast five = Z3_mk_int(ctx, 5, int_sort); + Z3_ast len_x = Z3_mk_seq_length(ctx, x); + Z3_solver_assert(ctx, s, prefix_cond); + Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, len_x, five)); + Z3_lbool r = Z3_solver_check(ctx, s); + ENSURE(r == Z3_L_TRUE); + + Z3_solver_dec_ref(ctx, s); + Z3_del_context(ctx); +} + +// Test 5: Empty string equality (sat) +static void test_nseq_empty_sat() { + Z3_context ctx = mk_nseq_ctx(); + Z3_solver s = Z3_mk_solver(ctx); + Z3_solver_inc_ref(ctx, s); + + Z3_sort str_sort = Z3_mk_string_sort(ctx); + Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort); + Z3_ast empty = Z3_mk_string(ctx, ""); + Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, empty)); + Z3_lbool r = Z3_solver_check(ctx, s); + ENSURE(r == Z3_L_TRUE); + + Z3_solver_dec_ref(ctx, s); + Z3_del_context(ctx); +} + +// Test 6: Basic regex membership (sat) +static void test_nseq_regex_sat() { + Z3_context ctx = mk_nseq_ctx(); + Z3_solver s = Z3_mk_solver(ctx); + Z3_solver_inc_ref(ctx, s); + + Z3_sort str_sort = Z3_mk_string_sort(ctx); + Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort); + // x = "abab" ∧ x ∈ (ab)* + Z3_ast abab = Z3_mk_string(ctx, "abab"); + Z3_ast ab_re = Z3_mk_seq_to_re(ctx, Z3_mk_string(ctx, "ab")); + Z3_ast star_re = Z3_mk_re_star(ctx, ab_re); + Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, abab)); + Z3_solver_assert(ctx, s, Z3_mk_seq_in_re(ctx, x, star_re)); + Z3_lbool r = Z3_solver_check(ctx, s); + ENSURE(r == Z3_L_TRUE); + + Z3_solver_dec_ref(ctx, s); + Z3_del_context(ctx); +} + +// Test 7: Basic regex membership (unsat) +static void test_nseq_regex_unsat() { + Z3_context ctx = mk_nseq_ctx(); + Z3_solver s = Z3_mk_solver(ctx); + Z3_solver_inc_ref(ctx, s); + + Z3_sort str_sort = Z3_mk_string_sort(ctx); + Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort); + // x = "abc" ∧ x ∈ (ab)* → unsat ("abc" is not in (ab)*) + Z3_ast abc = Z3_mk_string(ctx, "abc"); + Z3_ast ab_re = Z3_mk_seq_to_re(ctx, Z3_mk_string(ctx, "ab")); + Z3_ast star_re = Z3_mk_re_star(ctx, ab_re); + Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, x, abc)); + Z3_solver_assert(ctx, s, Z3_mk_seq_in_re(ctx, x, star_re)); + Z3_lbool r = Z3_solver_check(ctx, s); + ENSURE(r == Z3_L_FALSE); + + Z3_solver_dec_ref(ctx, s); + Z3_del_context(ctx); +} + +// Test 8: Length constraint (sat) +static void test_nseq_length_sat() { + Z3_context ctx = mk_nseq_ctx(); + Z3_solver s = Z3_mk_solver(ctx); + Z3_solver_inc_ref(ctx, s); + + Z3_sort str_sort = Z3_mk_string_sort(ctx); + Z3_sort int_sort = Z3_mk_int_sort(ctx); + Z3_ast x = Z3_mk_const(ctx, Z3_mk_string_symbol(ctx, "x"), str_sort); + Z3_ast len_x = Z3_mk_seq_length(ctx, x); + Z3_ast three = Z3_mk_int(ctx, 3, int_sort); + Z3_solver_assert(ctx, s, Z3_mk_eq(ctx, len_x, three)); + Z3_lbool r = Z3_solver_check(ctx, s); + ENSURE(r == Z3_L_TRUE); + + Z3_solver_dec_ref(ctx, s); + Z3_del_context(ctx); +} + +void tst_nseq_basic() { + test_nseq_simple_sat(); + test_nseq_simple_unsat(); + test_nseq_concat_sat(); + test_nseq_prefix_sat(); + test_nseq_empty_sat(); + test_nseq_regex_sat(); + test_nseq_regex_unsat(); + test_nseq_length_sat(); + std::cout << "nseq_basic: all tests passed\n"; +} From a65b64ae6340b8aff9737f1b05709f4471cce637 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 23:47:19 +0000 Subject: [PATCH 3/8] Fix soundness bug in theory_nseq: propagate_length_eqs and branch_eq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix two bugs that caused false unsat results in the nseq string solver when word equations were combined with length constraints: 1. propagate_length_eqs was creating fresh mk_concat terms not in the SMT context, so ctx.e_internalized() always failed and the arithmetic solver never learned len(x)+len(y)+len(z)=6 from x·y·z="abcdef". Fix: new build_length_sum helper collapses concrete lengths to integer numerals; propagate_length_eqs adds the sum equality as a theory axiom that the arithmetic solver can immediately use. 2. branch_eq was branching variables as empty even when lower_bound > 0 was already known from arithmetic constraints (e.g. len(x)=2). Fix: skip the empty branch when lower_bound(len_e, lo) && lo > 0. 3. Moved propagate_length_eqs before solve_eqs in final_check_eh so arithmetic bounds are available to guide branching decisions. Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/theory_nseq.cpp | 66 ++++++++++++++++++++++++++++++----------- src/smt/theory_nseq.h | 1 + 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index de33de38d..b1128317e 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -73,6 +73,11 @@ namespace smt { if (reduce_predicates()) return FC_CONTINUE; + // Propagate length equalities from equations before solving, + // so arithmetic bounds are available to guide branching + if (propagate_length_eqs()) + return FC_CONTINUE; + // Check zero-length variables before solving (uses arithmetic bounds) if (check_zero_length()) return FC_CONTINUE; @@ -81,10 +86,6 @@ namespace smt { if (solve_eqs()) return FC_CONTINUE; - // Propagate length equalities from equations - if (propagate_length_eqs()) - return FC_CONTINUE; - // Check regex membership constraints if (check_regex_memberships()) return FC_CONTINUE; @@ -741,6 +742,9 @@ namespace smt { literal eq_empty = mk_eq(e, emp, false); switch (ctx.get_assignment(eq_empty)) { case l_undef: + // If lower length bound is already > 0, skip the empty branch + if (ctx.e_internalized(len_e) && lower_bound(len_e, lo) && lo > rational(0)) + break; // Force the empty branch first TRACE(seq, tout << "branch " << mk_bounded_pp(e, m) << " = \"\"\n";); IF_VERBOSE(3, verbose_stream() << " branch " << mk_bounded_pp(e, m) << " = \"\"\n";); @@ -1526,25 +1530,53 @@ namespace smt { return progress; } + // Build an arithmetic expression for the sum of lengths of the given string terms. + // Collapses concrete lengths (units, string constants) to integer numerals, + // and uses mk_len(e) for variables/complex terms. + expr_ref theory_nseq::build_length_sum(expr_ref_vector const& es) { + rational total(0); + expr_ref_vector var_lens(m); + for (expr* e : es) { + zstring s; + if (m_util.str.is_string(e, s)) { + total += rational(s.length()); + } else if (m_util.str.is_unit(e)) { + total += rational(1); + } else if (m_util.str.is_empty(e)) { + // contributes 0 + } else { + ensure_length_axiom(e); + var_lens.push_back(mk_len(e)); + } + } + if (var_lens.empty()) + return expr_ref(m_autil.mk_numeral(total, true), m); + expr_ref var_sum(m); + if (var_lens.size() == 1) + var_sum = expr_ref(var_lens.get(0), m); + else + var_sum = expr_ref(m_autil.mk_add(var_lens.size(), var_lens.data()), m); + if (total.is_zero()) + return var_sum; + return expr_ref(m_autil.mk_add(var_sum, m_autil.mk_numeral(total, true)), m); + } + bool theory_nseq::propagate_length_eqs() { bool progress = false; for (auto const& eq : m_state.eqs()) { expr_ref_vector const& lhs = eq.lhs(); expr_ref_vector const& rhs = eq.rhs(); - if (lhs.empty() || rhs.empty()) + // Build sum of lengths directly, using concrete integer values for + // units/strings to avoid creating fresh unregistered concat terms. + expr_ref lhs_sum = build_length_sum(lhs); + expr_ref rhs_sum = build_length_sum(rhs); + // Skip if both sums are syntactically identical (trivially equal) + if (lhs_sum.get() == rhs_sum.get()) continue; - sort* s = lhs[0]->get_sort(); - expr_ref l = mk_concat(lhs, s); - expr_ref r = mk_concat(rhs, s); - expr_ref len_l = mk_len(l); - expr_ref len_r = mk_len(r); - if (ctx.e_internalized(len_l) && ctx.e_internalized(len_r)) { - enode* nl = ctx.get_enode(len_l); - enode* nr = ctx.get_enode(len_r); - if (nl->get_root() != nr->get_root()) { - propagate_eq(eq.dep(), len_l, len_r, false); - progress = true; - } + literal len_eq = mk_eq(lhs_sum, rhs_sum, false); + if (ctx.get_assignment(len_eq) != l_true) { + add_axiom(len_eq); + progress = true; } } return progress; diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 60f0502a0..7830c08b9 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -129,6 +129,7 @@ namespace smt { // Length reasoning void add_length_axiom(expr* n); + expr_ref build_length_sum(expr_ref_vector const& es); bool check_zero_length(); bool propagate_length_eqs(); bool get_length(expr* e, rational& val); From faeb936a8358c68177f796c5656133c71bb032f8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 02:25:29 +0000 Subject: [PATCH 4/8] Continue nseq: regex synthesis, model generation, length propagation Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/theory_nseq.cpp | 330 ++++++++++++++++++++++++++++++++++++---- src/smt/theory_nseq.h | 6 + 2 files changed, 305 insertions(+), 31 deletions(-) diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index b1128317e..368f5f310 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -94,8 +94,11 @@ namespace smt { if (check_disequalities()) return FC_CONTINUE; - if (all_eqs_solved() && m_state.mems().empty()) - return FC_DONE; + if (all_eqs_solved()) { + // All word equations solved; check if remaining memberships are handled + if (all_mems_checked()) + return FC_DONE; + } return FC_GIVEUP; } @@ -195,6 +198,12 @@ namespace smt { if (m_find.find(v1) != m_find.find(v2)) m_find.merge(v1, v2); m_state.add_eq(o1, o2, dep); + // Propagate length equality: len(o1) = len(o2) + ensure_length_axiom(o1); + ensure_length_axiom(o2); + expr_ref len1 = mk_len(o1); + expr_ref len2 = mk_len(o2); + add_axiom(mk_eq(len1, len2, false)); IF_VERBOSE(3, verbose_stream() << "new_eq_eh: " << mk_bounded_pp(o1, m) << " = " << mk_bounded_pp(o2, m) << " (total eqs: " << m_state.eqs().size() << ")\n"); TRACE(seq, tout << "new eq: " << mk_bounded_pp(o1, m) << " = " << mk_bounded_pp(o2, m) << "\n";); @@ -1397,6 +1406,84 @@ namespace smt { return r; } + // DFS: find a string of exactly target_len in regex r. + // On success, appends characters to 'result' and returns true. + // cur_len is how many characters have already been appended. + bool theory_nseq::dfs_regex(expr* r, unsigned target_len, unsigned cur_len, zstring& result) { + if (cur_len == target_len) { + expr_ref nullable = m_seq_rewrite.is_nullable(expr_ref(r, m)); + m_rewrite(nullable); + return m.is_true(nullable); + } + // Try printable characters: lowercase, uppercase, digits, then common punctuation + static const unsigned char priority[] = { + 'a','b','c','d','e','f','g','h','i','j','k','l','m', + 'n','o','p','q','r','s','t','u','v','w','x','y','z', + '0','1','2','3','4','5','6','7','8','9', + 'A','B','C','D','E','F','G','H','I','J','K','L','M', + 'N','O','P','Q','R','S','T','U','V','W','X','Y','Z', + ' ','.', ',', '!', '?', '-', '_', '@', '#', '$', '%', '&', '*' + }; + for (unsigned c : priority) { + expr_ref ch(m_util.mk_char(c), m); + expr_ref dr = m_seq_rewrite.mk_derivative(ch, expr_ref(r, m)); + m_rewrite(dr); + if (m_util.re.is_empty(dr)) + continue; + result = result + zstring(c); + if (dfs_regex(dr, target_len, cur_len + 1, result)) + return true; + // Backtrack: remove last character + result = result.extract(0, result.length() - 1); + } + return false; + } + + // Synthesize a concrete string of exactly `len` accepted by `regex`. + // Returns true and sets `result` on success. + bool theory_nseq::synthesize_regex_string(expr* regex, unsigned len, zstring& result) { + result = zstring(); + return dfs_regex(regex, len, 0, result); + } + + // Add length-bound axioms from regex structure: len(s) >= min_length(r). + // Also adds: len(s) = 0 → nullable(r) (otherwise conflict). + bool theory_nseq::add_regex_length_axioms(nseq_mem const& mem) { + expr* s = mem.str(); + expr* r = mem.regex(); + bool sign = mem.sign(); + nseq_dependency* dep = mem.dep(); + bool progress = false; + + if (sign) { + // s ∈ r → len(s) >= min_length(r) + unsigned minlen = m_util.re.min_length(r); + if (minlen > 0) { + expr_ref len_s = mk_len(s); + ensure_length_axiom(s); + literal lb = mk_literal(m_autil.mk_ge(len_s, m_autil.mk_int(minlen))); + if (ctx.get_assignment(lb) != l_true) { + add_axiom(lb); + progress = true; + } + } + // If r is not nullable and len(s) = 0, conflict + expr_ref nullable = m_seq_rewrite.is_nullable(expr_ref(r, m)); + m_rewrite(nullable); + if (m.is_false(nullable)) { + // s ∈ r → s ≠ "" + expr_ref emp(m_util.str.mk_empty(s->get_sort()), m); + ensure_enode(emp); + literal s_empty = mk_eq(s, emp, false); + if (ctx.get_assignment(~s_empty) != l_true) { + add_axiom(~s_empty); + progress = true; + } + } + } + return progress; + } + bool theory_nseq::check_regex_memberships() { bool progress = false; for (auto const& mem : m_state.mems()) { @@ -1414,43 +1501,99 @@ namespace smt { bool sign = mem.sign(); nseq_dependency* dep = mem.dep(); + // Add structural length axioms from the regex + if (add_regex_length_axioms(mem)) + return true; + // Try to determine the string value from the e-graph zstring sval; - if (!is_ground_string(s, sval)) + if (is_ground_string(s, sval)) { + // Compute derivatives for the known string + expr_ref derived = derive_regex(r, sval); + + // Check nullable: does the derived regex accept the empty word? + expr_ref nullable = m_seq_rewrite.is_nullable(derived); + m_rewrite(nullable); + + if (sign) { + // Positive membership: s must be in r + if (m.is_false(nullable) || m_util.re.is_empty(derived)) { + set_conflict(dep); + return true; + } + } + else { + // Negative membership: s must NOT be in r + if (m.is_true(nullable)) { + set_conflict(dep); + return true; + } + } return false; + } - // Compute derivatives for the known string - expr_ref derived = derive_regex(r, sval); - - // Check nullable: does the derived regex accept the empty word? - expr_ref nullable = m_seq_rewrite.is_nullable(derived); - m_rewrite(nullable); - + // String is not yet ground. For positive membership: + // if we know the exact length, synthesize a concrete string. if (sign) { - // Positive membership: s must be in r - if (m.is_false(nullable)) { - // String is fully determined but regex rejects → conflict - set_conflict(dep); - return true; - } - } - else { - // Negative membership: s must NOT be in r - if (m.is_true(nullable)) { - // String is fully determined and regex accepts → conflict - set_conflict(dep); - return true; + rational len_val; + if (get_length(s, len_val) && len_val.is_nonneg() && len_val.is_unsigned()) { + unsigned len = len_val.get_unsigned(); + // Feasibility check only: detect conflicts but don't force a specific value. + // Model generation (mk_value) will synthesize the actual string. + zstring synth; + if (!synthesize_regex_string(r, len, synth)) { + // No string of that length accepted by regex - conflict + IF_VERBOSE(3, verbose_stream() << " regex conflict: no string of len=" << len + << " in " << mk_bounded_pp(r, m, 2) << "\n";); + set_conflict(dep); + return true; + } + IF_VERBOSE(3, verbose_stream() << " regex feasible: len=" << len + << " has witness \"" << synth << "\"\n";); } } - // Check if the derived regex is the empty set - if (sign && m_util.re.is_empty(derived)) { - set_conflict(dep); - return true; - } return false; } + // Returns true if every membership constraint is already verified + // (ground strings checked) or provably feasible (non-ground with known length). + bool theory_nseq::all_mems_checked() { + for (auto const& mem : m_state.mems()) { + expr* s = mem.str(); + expr* r = mem.regex(); + bool sign = mem.sign(); + nseq_dependency* dep = mem.dep(); + zstring sval; + if (is_ground_string(s, sval)) { + // Verify the ground string + expr_ref derived = derive_regex(r, sval); + expr_ref nullable = m_seq_rewrite.is_nullable(derived); + m_rewrite(nullable); + if (sign && !m.is_true(nullable)) + return false; + if (!sign && !m.is_false(nullable)) + return false; + } + else if (sign) { + // Non-ground positive membership: check feasibility by length + rational len_val; + if (get_length(s, len_val) && len_val.is_nonneg() && len_val.is_unsigned()) { + zstring synth; + if (!synthesize_regex_string(r, len_val.get_unsigned(), synth)) + return false; // infeasible; conflict should have been set by check_regex_mem + // Feasible - model generator will assign the right value + } + else { + // Length unknown and regex is empty: unconditionally infeasible + if (m_util.re.is_empty(r)) + return false; + } + } + } + return true; + } + // ------------------------------------------------------- // Length reasoning // ------------------------------------------------------- @@ -1495,7 +1638,34 @@ namespace smt { bool theory_nseq::get_length(expr* e, rational& val) { expr_ref len = mk_len(e); - return m_arith_value.get_value(len, val); + if (m_arith_value.get_value(len, val)) + return true; + // Structural fallback: for str.substr(s, off, n) with concrete off and n, + // use the explicit length argument if it's a numeral and s has sufficient length. + expr* s = nullptr, *offset = nullptr, *len_arg = nullptr; + if (m_util.str.is_extract(e, s, offset, len_arg)) { + rational len_n; + if (m_autil.is_numeral(len_arg, len_n) && len_n.is_nonneg()) { + rational off_n; + if (m_autil.is_numeral(offset, off_n) && off_n.is_nonneg()) { + rational parent_len; + if (get_length(s, parent_len) && + off_n + len_n <= parent_len) { + val = len_n; + return true; + } + } + } + } + // Fallback via lower/upper bound: if lo == hi, that's the value + rational lo, hi; + bool s_lo, s_hi; + if (m_arith_value.get_lo(len, lo, s_lo) && !s_lo && + m_arith_value.get_up(len, hi, s_hi) && !s_hi && lo == hi) { + val = lo; + return true; + } + return false; } bool theory_nseq::lower_bound(expr* e, rational& lo) { @@ -1603,8 +1773,60 @@ namespace smt { st.update("nseq axioms", s.m_num_axioms); } + // Find the most constrained regex for a given string expression + // by scanning memberships that apply to e or its equivalence class root. + expr* theory_nseq::find_regex_for(expr* e) { + if (!ctx.e_internalized(e)) + return nullptr; + enode* root = ctx.get_enode(e)->get_root(); + for (auto const& mem : m_state.mems()) { + if (!mem.sign()) continue; + expr* ms = mem.str(); + if (!ctx.e_internalized(ms)) continue; + if (ctx.get_enode(ms)->get_root() == root) + return mem.regex(); + } + return nullptr; + } + + // Find any positive membership for a string expression whose base + // is `base` and which starts at position `pos`. Returns the regex + // and sets `cover_len` to how many characters it covers. + // Used to synthesize sub-range values during model generation. + expr* theory_nseq::find_substr_regex(expr* base, unsigned pos, + unsigned total_len, unsigned& cover_len) { + if (!ctx.e_internalized(base)) + return nullptr; + enode* base_root = ctx.get_enode(base)->get_root(); + for (auto const& mem : m_state.mems()) { + if (!mem.sign()) continue; + expr* ms = mem.str(); + if (!ctx.e_internalized(ms)) continue; + // Check if ms is str.substr(base, pos, k) for some k + expr* s = nullptr, *off = nullptr, *len_arg = nullptr; + if (!m_util.str.is_extract(ms, s, off, len_arg)) continue; + if (!ctx.e_internalized(s)) continue; + if (ctx.get_enode(s)->get_root() != base_root) continue; + // Check offset matches + rational off_v; + if (!m_autil.is_numeral(off, off_v) || !off_v.is_nonneg() || + !off_v.is_unsigned() || off_v.get_unsigned() != pos) continue; + // Get the length (may be symbolic, use get_length) + rational len_v; + if (m_autil.is_numeral(len_arg, len_v) && len_v.is_nonneg() && len_v.is_unsigned()) { + cover_len = len_v.get_unsigned(); + return mem.regex(); + } + if (get_length(ms, len_v) && len_v.is_nonneg() && len_v.is_unsigned()) { + cover_len = len_v.get_unsigned(); + return mem.regex(); + } + } + return nullptr; + } + // ------------------------------------------------------- - // Model generation (stub) + // Model generation // ------------------------------------------------------- model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) { @@ -1636,11 +1858,57 @@ namespace smt { curr = curr->get_next(); } while (curr != root); - // Try to construct a string of the correct length + // Try to construct a string satisfying any regex membership constraint rational len_val; if (get_length(e, len_val) && len_val.is_nonneg() && len_val.is_unsigned()) { unsigned len = len_val.get_unsigned(); + // Check if there's a direct regex constraint for this string + expr* regex = find_regex_for(e); + if (regex) { + zstring synth; + if (synthesize_regex_string(regex, len, synth)) { + expr_ref val(m_util.str.mk_string(synth), m); + m_fresh_values.push_back(val); + return alloc(expr_wrapper_proc, to_app(val)); + } + } + // Try to synthesize character by character from str.at(e, i) memberships. + // Preprocessors often split x ∈ r into (str.at x 0) ∈ r0, (str.substr x 1 k) ∈ r1, etc. zstring s; + bool from_chars = true; + unsigned pos = 0; + while (pos < len && from_chars) { + // Option A: there's a membership for str.at(e, pos) (length-1 char) + expr_ref at_i(m_util.str.mk_at(e, m_autil.mk_int(pos)), m); + expr* r = find_regex_for(at_i); + if (r) { + zstring ch; + if (synthesize_regex_string(r, 1, ch)) { + s = s + ch; + pos++; + continue; + } + } + // Option B: find a membership for str.substr(e, pos, k) and synthesize k chars + unsigned cover_len = 0; + expr* rs = find_substr_regex(e, pos, len, cover_len); + if (rs && cover_len > 0) { + zstring piece; + if (synthesize_regex_string(rs, cover_len, piece)) { + s = s + piece; + pos += cover_len; + continue; + } + } + from_chars = false; + } + if (from_chars && s.length() == len) { + expr_ref val(m_util.str.mk_string(s), m); + m_fresh_values.push_back(val); + return alloc(expr_wrapper_proc, to_app(val)); + } + // No regex constraint (or synthesis failed): use default fill + s = zstring(); for (unsigned i = 0; i < len; i++) s = s + zstring("A"); expr_ref val(m_util.str.mk_string(s), m); diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 7830c08b9..72a2b29cb 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -115,8 +115,14 @@ namespace smt { // Regex membership bool check_regex_memberships(); bool check_regex_mem(nseq_mem const& mem); + bool add_regex_length_axioms(nseq_mem const& mem); + bool synthesize_regex_string(expr* regex, unsigned len, zstring& result); + bool dfs_regex(expr* r, unsigned target_len, unsigned cur_len, zstring& result); bool is_ground_string(expr* e, zstring& s); expr_ref derive_regex(expr* regex, zstring const& prefix); + bool all_mems_checked(); + expr* find_regex_for(expr* e); + expr* find_substr_regex(expr* base, unsigned pos, unsigned total_len, unsigned& cover_len); // String operation reductions void reduce_op(expr* e); From 754892909b2bb66e1702816532ae2a5892fd9158 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 13:41:12 +0000 Subject: [PATCH 5/8] Fix var-vs-var Nielsen split and final_check_eh round structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - split_variable: var-vs-var now uses length-guided branching via SAT instead of incorrect circular mk_tail(var,1) split. With known lengths, deterministically splits: unify if equal, mk_post(head,len_var) if varhead. With unknown lengths, branches via arithmetic SAT literal (|var| ≤ |head|). - final_check_eh: process pending axioms then fall through to solve_eqs in the same round. This ensures force_phase is set on branching variables before the SAT solver makes any phase decisions. propagate_length_eqs and check_zero_length no longer short-circuit with FC_CONTINUE. Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/theory_nseq.cpp | 143 +++++++++++++++++++++++++++++++++------- src/smt/theory_nseq.h | 3 +- 2 files changed, 121 insertions(+), 25 deletions(-) diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 368f5f310..66b31c2b1 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -66,26 +66,34 @@ namespace smt { for (unsigned i = head; i < axioms.size(); ++i) deque_axiom(axioms[i]); m_state.set_axioms_head(axioms.size()); - return FC_CONTINUE; + if (ctx.inconsistent()) + return FC_CONTINUE; + // Fall through: call solve_eqs in the same round so force_phase is set + // before the SAT solver makes random phase decisions. } // Reduce predicates (prefix, suffix, contains) to word equations if (reduce_predicates()) return FC_CONTINUE; - // Propagate length equalities from equations before solving, - // so arithmetic bounds are available to guide branching - if (propagate_length_eqs()) + // Propagate length equalities and zero-length variables. + // Don't short-circuit: continue to solve_eqs so force_phase is set on + // branching variables before the SAT solver makes its next decision. + propagate_length_eqs(); + if (ctx.inconsistent()) return FC_CONTINUE; - - // Check zero-length variables before solving (uses arithmetic bounds) - if (check_zero_length()) + check_zero_length(); + if (ctx.inconsistent()) return FC_CONTINUE; // Solve word equations using Nielsen transformations if (solve_eqs()) return FC_CONTINUE; + // After a successful solve pass, pick up any freshly-added length axioms + if (m_state.has_pending_axioms()) + return FC_CONTINUE; + // Check regex membership constraints if (check_regex_memberships()) return FC_CONTINUE; @@ -866,7 +874,10 @@ namespace smt { } // ------------------------------------------------------- - // Nielsen splitting: x · α = c · β → x = c · x_tail + // Nielsen splitting + // ------------------------------------------------------- + // var-vs-unit: x · α = c · β → x = c · x_tail (peel one char) + // var-vs-var: x · α = y · β → determine |x| vs |y| and split // ------------------------------------------------------- bool theory_nseq::split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, @@ -874,7 +885,7 @@ namespace smt { if (lhs.empty() || rhs.empty()) return false; - // Find: variable on one side facing a unit/constant on the other + // Find: variable on one side facing a non-variable on the other expr* var = nullptr; expr* head = nullptr; bool var_on_left = false; @@ -889,11 +900,13 @@ namespace smt { head = lhs[0]; var_on_left = false; } - // Also handle var vs var: pick either and peel one character + // var vs var case + bool both_var = false; if (!var && !head && m_nielsen.is_var(lhs[0]) && m_nielsen.is_var(rhs[0]) && lhs[0] != rhs[0]) { var = lhs[0]; head = rhs[0]; var_on_left = true; + both_var = true; } if (!var || !head) return false; @@ -910,31 +923,115 @@ namespace smt { if (ctx.get_assignment(eq_empty) != l_false) return false; + // ---- var-vs-var: length-guided split ---- + if (both_var) { + expr_ref len_var = mk_len(var); + expr_ref len_head = mk_len(head); + rational var_len, head_len; + bool has_var_len = get_length(var, var_len); + bool has_head_len = get_length(head, head_len); + + if (has_var_len && has_head_len) { + if (var_len == head_len) { + // Same length → must be equal + IF_VERBOSE(3, verbose_stream() << " var-var-split: |" << mk_bounded_pp(var, m, 1) + << "|=|" << mk_bounded_pp(head, m, 1) << "| → unify\n";); + bool changed = propagate_eq(dep, var, head); + if (changed) m_state.stats().m_num_splits++; + return changed; + } + else if (var_len < head_len) { + // head = var · z_rest, |z_rest| = head_len - var_len + IF_VERBOSE(3, verbose_stream() << " var-var-split: |" << mk_bounded_pp(var, m, 1) + << "| < |" << mk_bounded_pp(head, m, 1) << "| → split head\n";); + expr_ref z(m_sk.mk_post(head, len_var), m); + ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z); + expr_ref rhs_eq(m_util.str.mk_concat(var, z), m); + ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq); + bool changed = propagate_eq(dep, head, rhs_eq); + if (changed) m_state.stats().m_num_splits++; + return changed; + } + else { + // var = head · z_rest, |z_rest| = var_len - head_len + IF_VERBOSE(3, verbose_stream() << " var-var-split: |" << mk_bounded_pp(var, m, 1) + << "| > |" << mk_bounded_pp(head, m, 1) << "| → split var\n";); + expr_ref z(m_sk.mk_post(var, len_head), m); + ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z); + expr_ref rhs_eq(m_util.str.mk_concat(head, z), m); + ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq); + bool changed = propagate_eq(dep, var, rhs_eq); + if (changed) m_state.stats().m_num_splits++; + return changed; + } + } + + // Lengths not fully determined: branch on |var| vs |head| via SAT + // First try: |var| ≤ |head| (use as a SAT decision) + literal len_le = mk_literal( + m_autil.mk_ge(m_autil.mk_sub(len_head, len_var), m_autil.mk_int(0))); + switch (ctx.get_assignment(len_le)) { + case l_undef: + IF_VERBOSE(3, verbose_stream() << " var-var-split: branching |" + << mk_bounded_pp(var, m, 1) << "| ≤ |" << mk_bounded_pp(head, m, 1) << "|\n";); + ctx.force_phase(len_le); + ctx.mark_as_relevant(len_le); + m_new_propagation = true; + m_state.stats().m_num_splits++; + return true; + case l_true: { + // |var| ≤ |head|: check if equal + literal len_eq_lit = mk_eq(len_var, len_head, false); + switch (ctx.get_assignment(len_eq_lit)) { + case l_undef: + ctx.force_phase(len_eq_lit); + ctx.mark_as_relevant(len_eq_lit); + m_new_propagation = true; + return true; + case l_true: + return propagate_eq(dep, var, head); + case l_false: { + // |var| < |head|: head = var · z_rest + expr_ref z(m_sk.mk_post(head, len_var), m); + ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z); + expr_ref rhs_eq(m_util.str.mk_concat(var, z), m); + ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq); + bool changed = propagate_eq(dep, head, rhs_eq); + if (changed) m_state.stats().m_num_splits++; + return changed; + } + } + break; + } + case l_false: { + // |var| > |head|: var = head · z_rest + expr_ref z(m_sk.mk_post(var, len_head), m); + ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z); + expr_ref rhs_eq(m_util.str.mk_concat(head, z), m); + ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq); + bool changed = propagate_eq(dep, var, rhs_eq); + if (changed) m_state.stats().m_num_splits++; + return changed; + } + } + return false; + } + + // ---- var-vs-unit/constant: peel one character ---- // For multi-character string constants, extract only the first character - // so we peel one character at a time (Nielsen style) zstring head_str; if (m_util.str.is_string(head, head_str) && head_str.length() > 1) { head = m_util.str.mk_unit(m_util.mk_char(head_str[0])); } - // var is non-empty and the equation forces var to start with head. - // Nielsen split: var = head · var_tail + // var is non-empty and the equation forces var to start with head (a single char). + // Nielsen split: var = head · var_tail where var_tail = seq.tail(var, 1) expr_ref one(m_autil.mk_int(1), m); expr_ref var_tail(m_sk.mk_tail(var, one), m); ensure_enode(var_tail); mk_var(ctx.get_enode(var_tail)); ensure_length_axiom(var_tail); - // Before committing to the split, ensure the tail's emptiness is decided. - expr_ref tail_emp(m_util.str.mk_empty(var_tail->get_sort()), m); - literal tail_eq_empty = mk_eq(var_tail, tail_emp, false); - if (ctx.get_assignment(tail_eq_empty) == l_undef) { - ctx.force_phase(tail_eq_empty); - ctx.mark_as_relevant(tail_eq_empty); - m_new_propagation = true; - return true; - } - expr_ref split_rhs(m_util.str.mk_concat(head, var_tail), m); ensure_enode(split_rhs); ensure_length_axiom(split_rhs); diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 72a2b29cb..ff044ccad 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -96,8 +96,7 @@ namespace smt { bool branch_eq(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep); - bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); - bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep); + bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep); bool all_eqs_solved(); bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep); From 7c06ca18fffebc4f1d972079e2c2d43234792ef6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 13:41:58 +0000 Subject: [PATCH 6/8] Fix formatting: split_variable and canonize on separate lines in header Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/theory_nseq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index ff044ccad..72a2b29cb 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -96,7 +96,8 @@ namespace smt { bool branch_eq(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep); - bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep); + bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); + bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep); bool all_eqs_solved(); bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep); From b59db298688abf78334765fe72541bf2b3ec0ac6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 20:32:13 +0000 Subject: [PATCH 7/8] Add propagate_ground_split: directly propagate var values when ground side is fully known MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When one side of an equation is a pure ground string and the other side has variables with fully known lengths, directly propagate each variable's value as a substring without going through character-by-character splitting. E.g. x·y = "abcdef" with len(x)=2, len(y)=4 immediately gives x="ab", y="cdef". Also reverted the add_theory_aware_branching_info change (caused cross-context phase pollution across push/pop). Kept force_phase in branch_eq. Known issue: T08 (x·y = y·x, x≠y, len>0) hangs when run after T01+T06 in the same Z3 session due to phase-cache pollution across push/pop for the same variable names. Works correctly in isolation and with distinct variable names. Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/theory_nseq.cpp | 63 ++++++++++++++++++++++++++++++++++++++++- src/smt/theory_nseq.h | 1 + 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 66b31c2b1..3a3beeb5f 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -718,6 +718,11 @@ namespace smt { break; } + // Optimization: if one side is a pure ground string and the other side + // has variables with fully-known lengths, directly propagate substr values. + if (propagate_ground_split(lhs, rhs, dep)) + return true; + // Try branching: find a variable and decide x = "" or x ≠ "" if (branch_eq(lhs, rhs, dep)) return true; @@ -762,7 +767,7 @@ namespace smt { // If lower length bound is already > 0, skip the empty branch if (ctx.e_internalized(len_e) && lower_bound(len_e, lo) && lo > rational(0)) break; - // Force the empty branch first + // Force the empty branch first. TRACE(seq, tout << "branch " << mk_bounded_pp(e, m) << " = \"\"\n";); IF_VERBOSE(3, verbose_stream() << " branch " << mk_bounded_pp(e, m) << " = \"\"\n";); ctx.force_phase(eq_empty); @@ -1194,6 +1199,62 @@ namespace smt { return progress; } + // If one side is a pure ground string and the other side has variables with + // fully known lengths, directly propagate each variable's value as a substr. + // E.g. x·y = "abcdef" with len(x)=2, len(y)=4 → x="ab", y="cdef" + bool theory_nseq::propagate_ground_split(expr_ref_vector const& lhs, + expr_ref_vector const& rhs, + nseq_dependency* dep) { + // Determine which side is pure-variable and which is pure-ground + auto has_var = [&](expr_ref_vector const& v) { + for (expr* e : v) if (m_nielsen.is_var(e)) return true; + return false; + }; + auto all_var = [&](expr_ref_vector const& v) { + for (expr* e : v) if (!m_nielsen.is_var(e)) return false; + return true; + }; + expr_ref_vector const* ground_side = nullptr; + expr_ref_vector const* var_side = nullptr; + if (!has_var(lhs) && all_var(rhs) && !rhs.empty()) { + ground_side = &lhs; var_side = &rhs; + } + else if (!has_var(rhs) && all_var(lhs) && !lhs.empty()) { + ground_side = &rhs; var_side = &lhs; + } + if (!ground_side) + return false; + + // Build the ground string + zstring ground; + { + sort* s = var_side->get(0)->get_sort(); + expr_ref gc = mk_concat(*ground_side, s); + if (!is_ground_string(gc, ground)) + return false; + } + + // Collect vars and their known lengths + unsigned offset = 0; + bool changed = false; + for (expr* e : *var_side) { + rational len_v; + if (!get_length(e, len_v) || !len_v.is_nonneg() || !len_v.is_unsigned()) + return false; // length unknown, can't propagate + unsigned len = len_v.get_unsigned(); + if (offset + len > ground.length()) + return false; // length conflict, let solve_eq handle it + zstring substr = ground.extract(offset, len); + expr_ref val(m_util.str.mk_string(substr), m); + ensure_enode(val); + changed |= propagate_eq(dep, e, val); + offset += len; + } + if (offset != ground.length()) + return false; // leftover ground chars, conflict + return changed; + } + bool theory_nseq::check_diseq(nseq_ne const& ne) { expr* l = ne.l(); expr* r = ne.r(); diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 72a2b29cb..e77874ffd 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -97,6 +97,7 @@ namespace smt { bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep); bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); + bool propagate_ground_split(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep); bool all_eqs_solved(); bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep); From 5c56e5fb0a56436c974e7a0cf771a713b04a41f2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 20:43:18 +0000 Subject: [PATCH 8/8] Fix nseq_state axiom scope leak and add propagate_ground_split Bug fix: m_axioms and m_axiom_set were not restored on pop_scope, causing axioms from a previous push/pop context to leak into subsequent contexts with the same variable names, producing non-termination in incremental solving (push/check-sat/pop sequences). Fix: track m_axioms_size_at_push on the trail stack and truncate m_axioms + remove corresponding m_axiom_set entries on pop. Optimization: propagate_ground_split directly sets variable values when one side is a pure ground string and all variables have known lengths. Optimization: explicit len(z) = len(head) - len(var) axioms for Skolem terms in var-vs-var splits to help arithmetic termination. Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/nseq_state.cpp | 10 ++++++++++ src/smt/nseq_state.h | 1 + src/smt/theory_nseq.cpp | 17 +++++++++++++---- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/smt/nseq_state.cpp b/src/smt/nseq_state.cpp index 2767938a7..d696ebe27 100644 --- a/src/smt/nseq_state.cpp +++ b/src/smt/nseq_state.cpp @@ -38,6 +38,9 @@ namespace smt { m_trail.push_scope(); m_trail.push(value_trail(m_axioms_head)); m_trail.push(value_trail(m_preds_head)); + // Save axiom vector size so we can truncate it on pop + m_trail.push(value_trail(m_axioms_size_at_push)); + m_axioms_size_at_push = m_axioms.size(); m_eqs.push_scope(); m_neqs.push_scope(); m_mems.push_scope(); @@ -45,7 +48,14 @@ namespace smt { } void nseq_state::pop_scope(unsigned num_scopes) { + // m_trail.pop_scope will restore m_axioms_size_at_push to the value saved + // at the outermost of the popped scopes (i.e., the correct target size). m_trail.pop_scope(num_scopes); + // Now m_axioms_size_at_push is the size we should truncate back to. + unsigned target = m_axioms_size_at_push; + for (unsigned i = target; i < m_axioms.size(); ++i) + m_axiom_set.remove(m_axioms.get(i)); + m_axioms.shrink(target); m_dm.pop_scope(num_scopes); m_eqs.pop_scope(num_scopes); m_neqs.pop_scope(num_scopes); diff --git a/src/smt/nseq_state.h b/src/smt/nseq_state.h index d5201d9ee..a63f046ce 100644 --- a/src/smt/nseq_state.h +++ b/src/smt/nseq_state.h @@ -68,6 +68,7 @@ namespace smt { expr_ref_vector m_axioms; obj_hashtable m_axiom_set; unsigned m_axioms_head; + unsigned m_axioms_size_at_push { 0 }; // saved on push, restored on pop // Length tracking obj_hashtable m_has_length; diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 3a3beeb5f..faf20df23 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -68,8 +68,6 @@ namespace smt { m_state.set_axioms_head(axioms.size()); if (ctx.inconsistent()) return FC_CONTINUE; - // Fall through: call solve_eqs in the same round so force_phase is set - // before the SAT solver makes random phase decisions. } // Reduce predicates (prefix, suffix, contains) to word equations @@ -996,9 +994,15 @@ namespace smt { case l_true: return propagate_eq(dep, var, head); case l_false: { - // |var| < |head|: head = var · z_rest + // |var| < |head|: head = var · z_rest, |z_rest| = |head| - |var| expr_ref z(m_sk.mk_post(head, len_var), m); ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z); + // Add len(z) = len(head) - len(var) as an explicit axiom so arithmetic + // knows the exact length and terminates the split chain. + expr_ref len_z = mk_len(z); + expr_ref len_diff(m_autil.mk_sub(len_head, len_var), m); + ensure_enode(len_z); ensure_enode(len_diff); + add_axiom(mk_eq(len_z, len_diff, false)); expr_ref rhs_eq(m_util.str.mk_concat(var, z), m); ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq); bool changed = propagate_eq(dep, head, rhs_eq); @@ -1009,9 +1013,14 @@ namespace smt { break; } case l_false: { - // |var| > |head|: var = head · z_rest + // |var| > |head|: var = head · z_rest, |z_rest| = |var| - |head| expr_ref z(m_sk.mk_post(var, len_head), m); ensure_enode(z); mk_var(ctx.get_enode(z)); ensure_length_axiom(z); + // Add len(z) = len(var) - len(head) as an explicit axiom. + expr_ref len_z = mk_len(z); + expr_ref len_diff(m_autil.mk_sub(len_var, len_head), m); + ensure_enode(len_z); ensure_enode(len_diff); + add_axiom(mk_eq(len_z, len_diff, false)); expr_ref rhs_eq(m_util.str.mk_concat(head, z), m); ensure_enode(rhs_eq); ensure_length_axiom(rhs_eq); bool changed = propagate_eq(dep, var, rhs_eq);