From b59db298688abf78334765fe72541bf2b3ec0ac6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 20:32:13 +0000 Subject: [PATCH] Add propagate_ground_split: directly propagate var values when ground side is fully known MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When one side of an equation is a pure ground string and the other side has variables with fully known lengths, directly propagate each variable's value as a substring without going through character-by-character splitting. E.g. x·y = "abcdef" with len(x)=2, len(y)=4 immediately gives x="ab", y="cdef". Also reverted the add_theory_aware_branching_info change (caused cross-context phase pollution across push/pop). Kept force_phase in branch_eq. Known issue: T08 (x·y = y·x, x≠y, len>0) hangs when run after T01+T06 in the same Z3 session due to phase-cache pollution across push/pop for the same variable names. Works correctly in isolation and with distinct variable names. Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/theory_nseq.cpp | 63 ++++++++++++++++++++++++++++++++++++++++- src/smt/theory_nseq.h | 1 + 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 66b31c2b1..3a3beeb5f 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -718,6 +718,11 @@ namespace smt { break; } + // Optimization: if one side is a pure ground string and the other side + // has variables with fully-known lengths, directly propagate substr values. + if (propagate_ground_split(lhs, rhs, dep)) + return true; + // Try branching: find a variable and decide x = "" or x ≠ "" if (branch_eq(lhs, rhs, dep)) return true; @@ -762,7 +767,7 @@ namespace smt { // If lower length bound is already > 0, skip the empty branch if (ctx.e_internalized(len_e) && lower_bound(len_e, lo) && lo > rational(0)) break; - // Force the empty branch first + // Force the empty branch first. TRACE(seq, tout << "branch " << mk_bounded_pp(e, m) << " = \"\"\n";); IF_VERBOSE(3, verbose_stream() << " branch " << mk_bounded_pp(e, m) << " = \"\"\n";); ctx.force_phase(eq_empty); @@ -1194,6 +1199,62 @@ namespace smt { return progress; } + // If one side is a pure ground string and the other side has variables with + // fully known lengths, directly propagate each variable's value as a substr. + // E.g. x·y = "abcdef" with len(x)=2, len(y)=4 → x="ab", y="cdef" + bool theory_nseq::propagate_ground_split(expr_ref_vector const& lhs, + expr_ref_vector const& rhs, + nseq_dependency* dep) { + // Determine which side is pure-variable and which is pure-ground + auto has_var = [&](expr_ref_vector const& v) { + for (expr* e : v) if (m_nielsen.is_var(e)) return true; + return false; + }; + auto all_var = [&](expr_ref_vector const& v) { + for (expr* e : v) if (!m_nielsen.is_var(e)) return false; + return true; + }; + expr_ref_vector const* ground_side = nullptr; + expr_ref_vector const* var_side = nullptr; + if (!has_var(lhs) && all_var(rhs) && !rhs.empty()) { + ground_side = &lhs; var_side = &rhs; + } + else if (!has_var(rhs) && all_var(lhs) && !lhs.empty()) { + ground_side = &rhs; var_side = &lhs; + } + if (!ground_side) + return false; + + // Build the ground string + zstring ground; + { + sort* s = var_side->get(0)->get_sort(); + expr_ref gc = mk_concat(*ground_side, s); + if (!is_ground_string(gc, ground)) + return false; + } + + // Collect vars and their known lengths + unsigned offset = 0; + bool changed = false; + for (expr* e : *var_side) { + rational len_v; + if (!get_length(e, len_v) || !len_v.is_nonneg() || !len_v.is_unsigned()) + return false; // length unknown, can't propagate + unsigned len = len_v.get_unsigned(); + if (offset + len > ground.length()) + return false; // length conflict, let solve_eq handle it + zstring substr = ground.extract(offset, len); + expr_ref val(m_util.str.mk_string(substr), m); + ensure_enode(val); + changed |= propagate_eq(dep, e, val); + offset += len; + } + if (offset != ground.length()) + return false; // leftover ground chars, conflict + return changed; + } + bool theory_nseq::check_diseq(nseq_ne const& ne) { expr* l = ne.l(); expr* r = ne.r(); diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 72a2b29cb..e77874ffd 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -97,6 +97,7 @@ namespace smt { bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep); bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); + bool propagate_ground_split(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep); bool all_eqs_solved(); bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep);