From a65b64ae6340b8aff9737f1b05709f4471cce637 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Feb 2026 23:47:19 +0000 Subject: [PATCH] Fix soundness bug in theory_nseq: propagate_length_eqs and branch_eq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix two bugs that caused false unsat results in the nseq string solver when word equations were combined with length constraints: 1. propagate_length_eqs was creating fresh mk_concat terms not in the SMT context, so ctx.e_internalized() always failed and the arithmetic solver never learned len(x)+len(y)+len(z)=6 from x·y·z="abcdef". Fix: new build_length_sum helper collapses concrete lengths to integer numerals; propagate_length_eqs adds the sum equality as a theory axiom that the arithmetic solver can immediately use. 2. branch_eq was branching variables as empty even when lower_bound > 0 was already known from arithmetic constraints (e.g. len(x)=2). Fix: skip the empty branch when lower_bound(len_e, lo) && lo > 0. 3. Moved propagate_length_eqs before solve_eqs in final_check_eh so arithmetic bounds are available to guide branching decisions. Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/theory_nseq.cpp | 66 ++++++++++++++++++++++++++++++----------- src/smt/theory_nseq.h | 1 + 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index de33de38d..b1128317e 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -73,6 +73,11 @@ namespace smt { if (reduce_predicates()) return FC_CONTINUE; + // Propagate length equalities from equations before solving, + // so arithmetic bounds are available to guide branching + if (propagate_length_eqs()) + return FC_CONTINUE; + // Check zero-length variables before solving (uses arithmetic bounds) if (check_zero_length()) return FC_CONTINUE; @@ -81,10 +86,6 @@ namespace smt { if (solve_eqs()) return FC_CONTINUE; - // Propagate length equalities from equations - if (propagate_length_eqs()) - return FC_CONTINUE; - // Check regex membership constraints if (check_regex_memberships()) return FC_CONTINUE; @@ -741,6 +742,9 @@ namespace smt { literal eq_empty = mk_eq(e, emp, false); switch (ctx.get_assignment(eq_empty)) { case l_undef: + // If lower length bound is already > 0, skip the empty branch + if (ctx.e_internalized(len_e) && lower_bound(len_e, lo) && lo > rational(0)) + break; // Force the empty branch first TRACE(seq, tout << "branch " << mk_bounded_pp(e, m) << " = \"\"\n";); IF_VERBOSE(3, verbose_stream() << " branch " << mk_bounded_pp(e, m) << " = \"\"\n";); @@ -1526,25 +1530,53 @@ namespace smt { return progress; } + // Build an arithmetic expression for the sum of lengths of the given string terms. + // Collapses concrete lengths (units, string constants) to integer numerals, + // and uses mk_len(e) for variables/complex terms. + expr_ref theory_nseq::build_length_sum(expr_ref_vector const& es) { + rational total(0); + expr_ref_vector var_lens(m); + for (expr* e : es) { + zstring s; + if (m_util.str.is_string(e, s)) { + total += rational(s.length()); + } else if (m_util.str.is_unit(e)) { + total += rational(1); + } else if (m_util.str.is_empty(e)) { + // contributes 0 + } else { + ensure_length_axiom(e); + var_lens.push_back(mk_len(e)); + } + } + if (var_lens.empty()) + return expr_ref(m_autil.mk_numeral(total, true), m); + expr_ref var_sum(m); + if (var_lens.size() == 1) + var_sum = expr_ref(var_lens.get(0), m); + else + var_sum = expr_ref(m_autil.mk_add(var_lens.size(), var_lens.data()), m); + if (total.is_zero()) + return var_sum; + return expr_ref(m_autil.mk_add(var_sum, m_autil.mk_numeral(total, true)), m); + } + bool theory_nseq::propagate_length_eqs() { bool progress = false; for (auto const& eq : m_state.eqs()) { expr_ref_vector const& lhs = eq.lhs(); expr_ref_vector const& rhs = eq.rhs(); - if (lhs.empty() || rhs.empty()) + // Build sum of lengths directly, using concrete integer values for + // units/strings to avoid creating fresh unregistered concat terms. + expr_ref lhs_sum = build_length_sum(lhs); + expr_ref rhs_sum = build_length_sum(rhs); + // Skip if both sums are syntactically identical (trivially equal) + if (lhs_sum.get() == rhs_sum.get()) continue; - sort* s = lhs[0]->get_sort(); - expr_ref l = mk_concat(lhs, s); - expr_ref r = mk_concat(rhs, s); - expr_ref len_l = mk_len(l); - expr_ref len_r = mk_len(r); - if (ctx.e_internalized(len_l) && ctx.e_internalized(len_r)) { - enode* nl = ctx.get_enode(len_l); - enode* nr = ctx.get_enode(len_r); - if (nl->get_root() != nr->get_root()) { - propagate_eq(eq.dep(), len_l, len_r, false); - progress = true; - } + literal len_eq = mk_eq(lhs_sum, rhs_sum, false); + if (ctx.get_assignment(len_eq) != l_true) { + add_axiom(len_eq); + progress = true; } } return progress; diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 60f0502a0..7830c08b9 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -129,6 +129,7 @@ namespace smt { // Length reasoning void add_length_axiom(expr* n); + expr_ref build_length_sum(expr_ref_vector const& es); bool check_zero_length(); bool propagate_length_eqs(); bool get_length(expr* e, rational& val);