From f48040d8097deadc5993d39c86041e74f3fcb190 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 27 Feb 2026 18:01:08 -0800 Subject: [PATCH] Add Phase 3: Nielsen transformation engine and equation solving - New nseq_nielsen.h/cpp in src/ast/rewriter/: self-contained Nielsen transformation engine for word equations - simplify(): strip common prefix/suffix, empty elimination, variable stripping, single-var assignment detection - split(): case analysis for var vs constant, var vs var - is_conflict(): mismatch detection (different constants, one side has constants while other is empty) - Wire Nielsen into theory_nseq: - solve_eqs()/solve_eq(): process word equations using Nielsen transformations with e-graph canonization - branch_eq()/branch_var_prefix(): binary empty/non-empty decisions and prefix enumeration (no fresh variable creation) - canonize(): rewrite equation sides using current e-graph equivalences - all_eqs_solved(): check if all equations are satisfied - mk_value(): basic model generation (walk e-class for string constants) - Passes basic tests: simple equalities, concat equations, unsat detection Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/CMakeLists.txt | 1 + src/ast/rewriter/nseq_nielsen.cpp | 366 ++++++++++++++++++++++++++++++ src/ast/rewriter/nseq_nielsen.h | 106 +++++++++ src/smt/theory_nseq.cpp | 260 ++++++++++++++++++++- src/smt/theory_nseq.h | 11 + 5 files changed, 743 insertions(+), 1 deletion(-) create mode 100644 src/ast/rewriter/nseq_nielsen.cpp create mode 100644 src/ast/rewriter/nseq_nielsen.h diff --git a/src/ast/rewriter/CMakeLists.txt b/src/ast/rewriter/CMakeLists.txt index 9d529f9b5..b1d73a763 100644 --- a/src/ast/rewriter/CMakeLists.txt +++ b/src/ast/rewriter/CMakeLists.txt @@ -41,6 +41,7 @@ z3_add_component(rewriter seq_eq_solver.cpp seq_rewriter.cpp seq_skolem.cpp + nseq_nielsen.cpp th_rewriter.cpp value_sweep.cpp var_subst.cpp diff --git a/src/ast/rewriter/nseq_nielsen.cpp b/src/ast/rewriter/nseq_nielsen.cpp new file mode 100644 index 000000000..6f0b5f9da --- /dev/null +++ b/src/ast/rewriter/nseq_nielsen.cpp @@ -0,0 +1,366 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + nseq_nielsen.cpp + +Abstract: + + Nielsen transformation-based word equation solver. + +Author: + + Clemens Eisenhofer + Nikolaj Bjorner (nbjorner) 2025-2-28 + +--*/ + +#include "ast/ast_pp.h" +#include "ast/ast_ll_pp.h" +#include "ast/rewriter/nseq_nielsen.h" + +namespace seq { + + nielsen::nielsen(ast_manager& m, seq_rewriter& rw) + : m(m), m_util(m), m_autil(m), m_rw(rw), m_lhs(m), m_rhs(m) { + } + + bool nielsen::is_var(expr* e) const { + return m_util.is_seq(e) && + !m_util.str.is_concat(e) && + !m_util.str.is_unit(e) && + !m_util.str.is_empty(e) && + !m_util.str.is_string(e); + } + + bool nielsen::is_unit(expr* e) const { + return m_util.str.is_unit(e); + } + + bool nielsen::is_empty(expr* e) const { + return m_util.str.is_empty(e); + } + + bool nielsen::has_var(expr_ref_vector const& es) const { + for (expr* e : es) + if (is_var(e)) + return true; + return false; + } + + // ------------------------------------------------------- + // Strip matching constants/units from equation sides + // ------------------------------------------------------- + + bool nielsen::strip_common_prefix(expr_ref_vector& lhs, expr_ref_vector& rhs) { + unsigned i = 0; + unsigned min_sz = std::min(lhs.size(), rhs.size()); + while (i < min_sz) { + expr* l = lhs.get(i); + expr* r = rhs.get(i); + // Both must be ground/unit and equal + if (l == r && (is_unit(l) || m_util.str.is_string(l))) { + i++; + continue; + } + // Check if both are string constants with matching prefix + zstring s1, s2; + if (m_util.str.is_string(l, s1) && m_util.str.is_string(r, s2)) { + if (s1 == s2) { i++; continue; } + } + break; + } + if (i == 0) return false; + expr_ref_vector new_lhs(m), new_rhs(m); + new_lhs.append(lhs.size() - i, lhs.data() + i); + new_rhs.append(rhs.size() - i, rhs.data() + i); + lhs.swap(new_lhs); + rhs.swap(new_rhs); + return true; + } + + bool nielsen::strip_common_suffix(expr_ref_vector& lhs, expr_ref_vector& rhs) { + unsigned li = lhs.size(); + unsigned ri = rhs.size(); + unsigned stripped = 0; + while (li > 0 && ri > 0) { + expr* l = lhs.get(li - 1); + expr* r = rhs.get(ri - 1); + if (l == r && (is_unit(l) || m_util.str.is_string(l))) { + li--; ri--; stripped++; + continue; + } + zstring s1, s2; + if (m_util.str.is_string(l, s1) && m_util.str.is_string(r, s2)) { + if (s1 == s2) { li--; ri--; stripped++; continue; } + } + break; + } + if (stripped == 0) return false; + lhs.resize(li); + rhs.resize(ri); + return true; + } + + // ------------------------------------------------------- + // Main simplification (no case splitting) + // ------------------------------------------------------- + + nielsen_result nielsen::simplify(expr_ref_vector& lhs, expr_ref_vector& rhs) { + bool changed = false; + + // Remove empty strings from both sides + unsigned j = 0; + for (unsigned i = 0; i < lhs.size(); ++i) + if (!is_empty(lhs.get(i))) + lhs[j++] = lhs.get(i); + lhs.resize(j); + + j = 0; + for (unsigned i = 0; i < rhs.size(); ++i) + if (!is_empty(rhs.get(i))) + rhs[j++] = rhs.get(i); + rhs.resize(j); + + // Check trivial cases + if (lhs.empty() && rhs.empty()) + return nielsen_result::solved; + + // Strip common prefix and suffix + changed |= strip_common_prefix(lhs, rhs); + changed |= strip_common_suffix(lhs, rhs); + + if (lhs.empty() && rhs.empty()) + return nielsen_result::solved; + + // Check for conflict: both sides start with different constants + if (is_conflict(lhs, rhs)) + return nielsen_result::conflict; + + // Variable = empty: if one side is empty and other has single var + if (lhs.empty() && rhs.size() == 1 && is_var(rhs.get(0))) + return nielsen_result::solved; // x = ε is a solution + if (rhs.empty() && lhs.size() == 1 && is_var(lhs.get(0))) + return nielsen_result::solved; // x = ε is a solution + + // Single variable = single term (x = t): a direct assignment, solved + if (lhs.size() == 1 && is_var(lhs.get(0)) && !has_var(rhs)) + return nielsen_result::solved; + if (rhs.size() == 1 && is_var(rhs.get(0)) && !has_var(lhs)) + return nielsen_result::solved; + + // Both sides start with the same variable: strip it + if (!lhs.empty() && !rhs.empty() && lhs.get(0) == rhs.get(0) && is_var(lhs.get(0))) { + expr_ref_vector new_lhs(m), new_rhs(m); + new_lhs.append(lhs.size() - 1, lhs.data() + 1); + new_rhs.append(rhs.size() - 1, rhs.data() + 1); + lhs.swap(new_lhs); + rhs.swap(new_rhs); + changed = true; + } + + // Both sides end with the same variable: strip it + if (!lhs.empty() && !rhs.empty() && + lhs.back() == rhs.back() && is_var(lhs.back())) { + lhs.pop_back(); + rhs.pop_back(); + changed = true; + } + + if (changed && lhs.empty() && rhs.empty()) + return nielsen_result::solved; + + if (changed) + return nielsen_result::reduced; + + return nielsen_result::unchanged; + } + + // ------------------------------------------------------- + // Check for conflicts + // ------------------------------------------------------- + + bool nielsen::is_conflict(expr_ref_vector const& lhs, expr_ref_vector const& rhs) const { + if (lhs.empty() != rhs.empty()) { + // One side empty, other side has constants + expr_ref_vector const& nonempty = lhs.empty() ? rhs : lhs; + for (unsigned i = 0; i < nonempty.size(); ++i) { + zstring s; + if (m_util.str.is_string(nonempty[i], s) && s.length() > 0) + return true; + if (is_unit(nonempty[i])) + return true; + } + return false; + } + if (lhs.empty() && rhs.empty()) + return false; + + // Both start with different non-variable ground terms + expr* l = lhs[0]; + expr* r = rhs[0]; + zstring s1, s2; + if (m_util.str.is_string(l, s1) && m_util.str.is_string(r, s2)) { + if (s1.length() > 0 && s2.length() > 0 && s1[0] != s2[0]) + return true; + } + if (is_unit(l) && is_unit(r) && l != r) { + // Different unit terms + expr* c1 = to_app(l)->get_arg(0); + expr* c2 = to_app(r)->get_arg(0); + rational v1, v2; + if (m_autil.is_numeral(c1, v1) && m_autil.is_numeral(c2, v2) && v1 != v2) + return true; + } + return false; + } + + bool nielsen::is_solved(expr_ref_vector const& lhs, expr_ref_vector const& rhs) const { + return lhs.empty() && rhs.empty(); + } + + // ------------------------------------------------------- + // Case splitting + // ------------------------------------------------------- + + void nielsen::apply_subst(expr* var, expr* term, expr_ref_vector const& src, expr_ref_vector& dst) { + dst.reset(); + for (unsigned i = 0; i < src.size(); ++i) { + if (src[i] == var) { + // Replace variable with its substitution + m_util.str.get_concat_units(term, dst); + } + else { + dst.push_back(src[i]); + } + } + } + + bool nielsen::split(expr_ref_vector const& lhs, expr_ref_vector const& rhs, + vector& branches) { + if (lhs.empty() || rhs.empty()) { + // One side empty: all variables on other side must be empty + expr_ref_vector const& nonempty = lhs.empty() ? rhs : lhs; + for (unsigned i = 0; i < nonempty.size(); ++i) { + if (is_var(nonempty[i])) { + nielsen_branch b(m); + b.var = nonempty[i]; + b.term = m_util.str.mk_empty(nonempty[i]->get_sort()); + // After substitution, just remove the empty variable + expr_ref_vector const& other = lhs.empty() ? lhs : rhs; + b.new_lhs.append(other); + for (unsigned j = 0; j < nonempty.size(); ++j) + if (j != i && !is_empty(nonempty[j])) + b.new_rhs.push_back(nonempty[j]); + if (lhs.empty()) b.new_lhs.swap(b.new_rhs); + branches.push_back(std::move(b)); + return true; + } + } + return false; + } + + expr* l0 = lhs[0]; + expr* r0 = rhs[0]; + + // Case 1: Variable vs constant/unit + // x·α = c·β → branch: x = ε or x = c·x' + if (is_var(l0) && (is_unit(r0) || m_util.str.is_string(r0))) { + // Branch 1: x = ε + { + nielsen_branch b(m); + b.var = l0; + b.term = m_util.str.mk_empty(l0->get_sort()); + apply_subst(l0, b.term, lhs, b.new_lhs); + b.new_rhs.append(rhs); + branches.push_back(std::move(b)); + } + // Branch 2: x = r0 · x' (peel first character) + { + nielsen_branch b(m); + b.var = l0; + expr_ref x_prime(m.mk_fresh_const("x", l0->get_sort()), m); + b.term = m_util.str.mk_concat(r0, x_prime); + apply_subst(l0, b.term, lhs, b.new_lhs); + b.new_rhs.append(rhs); + branches.push_back(std::move(b)); + } + return true; + } + + // Symmetric: constant vs variable on left + if (is_var(r0) && (is_unit(l0) || m_util.str.is_string(l0))) { + // Branch 1: y = ε + { + nielsen_branch b(m); + b.var = r0; + b.term = m_util.str.mk_empty(r0->get_sort()); + b.new_lhs.append(lhs); + apply_subst(r0, b.term, rhs, b.new_rhs); + branches.push_back(std::move(b)); + } + // Branch 2: y = l0 · y' + { + nielsen_branch b(m); + b.var = r0; + expr_ref y_prime(m.mk_fresh_const("y", r0->get_sort()), m); + b.term = m_util.str.mk_concat(l0, y_prime); + b.new_lhs.append(lhs); + apply_subst(r0, b.term, rhs, b.new_rhs); + branches.push_back(std::move(b)); + } + return true; + } + + // Case 2: Variable vs variable + // x·α = y·β → branch: x = y (if same), x = y·z, or y = x·z + if (is_var(l0) && is_var(r0)) { + if (l0 == r0) { + // Same variable: strip and continue (should have been handled by simplify) + return false; + } + // Branch 1: x = ε + { + nielsen_branch b(m); + b.var = l0; + b.term = m_util.str.mk_empty(l0->get_sort()); + apply_subst(l0, b.term, lhs, b.new_lhs); + b.new_rhs.append(rhs); + branches.push_back(std::move(b)); + } + // Branch 2: y = ε + { + nielsen_branch b(m); + b.var = r0; + b.term = m_util.str.mk_empty(r0->get_sort()); + b.new_lhs.append(lhs); + apply_subst(r0, b.term, rhs, b.new_rhs); + branches.push_back(std::move(b)); + } + // Branch 3: x = y · z (x is longer) + { + nielsen_branch b(m); + b.var = l0; + expr_ref z(m.mk_fresh_const("z", l0->get_sort()), m); + b.term = m_util.str.mk_concat(r0, z); + apply_subst(l0, b.term, lhs, b.new_lhs); + b.new_rhs.append(rhs); + branches.push_back(std::move(b)); + } + // Branch 4: y = x · z (y is longer) + { + nielsen_branch b(m); + b.var = r0; + expr_ref z(m.mk_fresh_const("z", r0->get_sort()), m); + b.term = m_util.str.mk_concat(l0, z); + b.new_lhs.append(lhs); + apply_subst(r0, b.term, rhs, b.new_rhs); + branches.push_back(std::move(b)); + } + return true; + } + + return false; + } +} diff --git a/src/ast/rewriter/nseq_nielsen.h b/src/ast/rewriter/nseq_nielsen.h new file mode 100644 index 000000000..bfcc1cb78 --- /dev/null +++ b/src/ast/rewriter/nseq_nielsen.h @@ -0,0 +1,106 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + nseq_nielsen.h + +Abstract: + + Nielsen transformation-based word equation solver. + Self-contained rewriter utility for decomposing and solving + word equations using Nielsen transformations. + + Given a word equation lhs_1·...·lhs_n = rhs_1·...·rhs_m, + the solver applies: + - Constant matching: strip matching constants from both sides + - Empty elimination: x = ε + - Variable splitting: x·α = c·β → x = c·x' or x = ε + - Length-based reasoning for pruning + +Author: + + Clemens Eisenhofer + Nikolaj Bjorner (nbjorner) 2025-2-28 + +--*/ +#pragma once + +#include "ast/seq_decl_plugin.h" +#include "ast/arith_decl_plugin.h" +#include "ast/rewriter/seq_rewriter.h" + +namespace seq { + + // Result of a Nielsen transformation step + enum class nielsen_result { + solved, // equation is trivially satisfied + conflict, // equation is unsatisfiable + reduced, // equation was simplified + split, // case split needed + unchanged // no progress + }; + + // A case split produced by Nielsen transformation. + // Represents a substitution x -> t and the resulting simplified equation. + struct nielsen_branch { + expr_ref var; // variable being assigned + expr_ref term; // value assigned (ε, constant prefix + fresh var, etc.) + expr_ref_vector new_lhs; // resulting LHS after substitution + expr_ref_vector new_rhs; // resulting RHS after substitution + nielsen_branch(ast_manager& m) : var(m), term(m), new_lhs(m), new_rhs(m) {} + }; + + // Nielsen transformation engine for word equations. + // Self-contained, no dependency on SMT context. + class nielsen { + ast_manager& m; + seq_util m_util; + arith_util m_autil; + seq_rewriter& m_rw; + + // Scratch space + expr_ref_vector m_lhs, m_rhs; + + // Strip matching constants/units from both sides of an equation. + // Returns true if anything was stripped. + bool strip_common_prefix(expr_ref_vector& lhs, expr_ref_vector& rhs); + bool strip_common_suffix(expr_ref_vector& lhs, expr_ref_vector& rhs); + + // Check if an expression is a unit (single character) + bool is_unit(expr* e) const; + + // Check if an expression is the empty string + bool is_empty(expr* e) const; + + // Check if an expression vector contains any variables + bool has_var(expr_ref_vector const& es) const; + + // Apply substitution x -> t in an expression vector + void apply_subst(expr* var, expr* term, expr_ref_vector const& src, expr_ref_vector& dst); + + public: + nielsen(ast_manager& m, seq_rewriter& rw); + + // Check if an expression is a string variable (not unit, not constant) + bool is_var(expr* e) const; + + // Main simplification: reduce a word equation as far as possible + // without case splitting. Returns the result status. + // lhs, rhs: in/out - the equation sides (decomposed into concat components) + nielsen_result simplify(expr_ref_vector& lhs, expr_ref_vector& rhs); + + // Generate case split branches for a word equation that cannot + // be further simplified. The equation should be in simplified form. + // branches: output - possible branches to explore + // Returns false if no splits are possible (stuck). + bool split(expr_ref_vector const& lhs, expr_ref_vector const& rhs, + vector& branches); + + // Check if an equation is trivially solved (both sides empty) + bool is_solved(expr_ref_vector const& lhs, expr_ref_vector const& rhs) const; + + // Check for conflict (mismatched constants) + bool is_conflict(expr_ref_vector const& lhs, expr_ref_vector const& rhs) const; + }; +} diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 9e28b7fc8..2f08b89a5 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -33,6 +33,7 @@ namespace smt { m_sk(ctx.get_manager(), m_rewrite), m_arith_value(ctx.get_manager()), m_state(ctx.get_manager(), m_util), + m_nielsen(ctx.get_manager(), m_seq_rewrite), m_find(*this), m_has_seq(false), m_new_propagation(false) { @@ -53,6 +54,7 @@ namespace smt { if (!m_has_seq) return FC_DONE; + m_new_propagation = false; TRACE(seq, display(tout << "final_check level=" << ctx.get_scope_level() << "\n");); // Process pending axioms @@ -65,9 +67,16 @@ namespace smt { return FC_CONTINUE; } - // TODO: implement Nielsen transformation-based solving + // Solve word equations using Nielsen transformations + if (solve_eqs()) + return FC_CONTINUE; + // TODO: implement regex membership checking // TODO: implement length/Parikh reasoning + + if (all_eqs_solved() && m_state.mems().empty()) + return FC_DONE; + return FC_GIVEUP; } @@ -370,6 +379,226 @@ namespace smt { return expr_ref(m_util.str.mk_concat(es.size(), es.data(), s), m); } + // Canonize an equation side using current e-graph equivalences. + // Replaces each element with its canonical representative, decomposing + // concatenations and string constants as needed. + bool theory_nseq::canonize(expr_ref_vector const& src, expr_ref_vector& dst, + nseq_dependency*& dep) { + dst.reset(); + for (expr* e : src) { + if (!ctx.e_internalized(e)) { + dst.push_back(e); + continue; + } + enode* n = ctx.get_enode(e); + enode* r = n->get_root(); + expr* re = r->get_expr(); + if (re != e) { + // Track the dependency for the equality + dep = m_state.mk_join(dep, m_state.mk_dep(n, r)); + } + // Decompose the canonical representative into concat components + m_util.str.get_concat_units(re, dst); + } + return true; + } + + // Check if all equations are satisfied in the current e-graph. + bool theory_nseq::all_eqs_solved() { + for (auto const& eq : m_state.eqs()) { + expr_ref_vector lhs(m), rhs(m); + nseq_dependency* dep = eq.dep(); + if (!canonize(eq.lhs(), lhs, dep) || !canonize(eq.rhs(), rhs, dep)) + return false; + seq::nielsen_result result = m_nielsen.simplify(lhs, rhs); + TRACE(seq, tout << "all_eqs_solved [" << eq.id() << "]: "; + for (auto* e : lhs) tout << mk_bounded_pp(e, m, 2) << " "; + tout << "= "; + for (auto* e : rhs) tout << mk_bounded_pp(e, m, 2) << " "; + tout << " -> " << (int)result << "\n";); + if (result != seq::nielsen_result::solved) + return false; + } + return true; + } + + // ------------------------------------------------------- + // Nielsen equation solving + // ------------------------------------------------------- + + bool theory_nseq::solve_eqs() { + auto const& eqs = m_state.eqs(); + for (unsigned i = 0; !ctx.inconsistent() && i < eqs.size(); ++i) + solve_eq(eqs[i]); + return m_new_propagation || ctx.inconsistent(); + } + + bool theory_nseq::solve_eq(nseq_eq const& eq) { + expr_ref_vector lhs(m), rhs(m); + nseq_dependency* dep = eq.dep(); + + // Canonize using current e-graph equivalences + if (!canonize(eq.lhs(), lhs, dep) || !canonize(eq.rhs(), rhs, dep)) + return false; + + TRACE(seq, tout << "solve_eq [" << eq.id() << "]: "; + for (auto* e : lhs) tout << mk_bounded_pp(e, m, 2) << " "; + tout << "= "; + for (auto* e : rhs) tout << mk_bounded_pp(e, m, 2) << " "; + tout << "\n";); + + // Apply Nielsen simplification + seq::nielsen_result result = m_nielsen.simplify(lhs, rhs); + + switch (result) { + case seq::nielsen_result::solved: { + // Propagate solved form: either both empty, var = empty, or var = ground term + bool changed = false; + if (lhs.size() == 1 && m_nielsen.is_var(lhs.get(0)) && !rhs.empty()) { + sort* s = lhs.get(0)->get_sort(); + expr_ref rhs_concat = mk_concat(rhs, s); + changed = propagate_eq(dep, lhs.get(0), rhs_concat); + } + else if (rhs.size() == 1 && m_nielsen.is_var(rhs.get(0)) && !lhs.empty()) { + sort* s = rhs.get(0)->get_sort(); + expr_ref lhs_concat = mk_concat(lhs, s); + changed = propagate_eq(dep, rhs.get(0), lhs_concat); + } + else { + // All remaining vars must be empty + for (unsigned i = 0; i < lhs.size(); ++i) + if (m_nielsen.is_var(lhs.get(i))) + changed |= propagate_eq(dep, lhs.get(i), expr_ref(m_util.str.mk_empty(lhs.get(i)->get_sort()), m)); + for (unsigned i = 0; i < rhs.size(); ++i) + if (m_nielsen.is_var(rhs.get(i))) + changed |= propagate_eq(dep, rhs.get(i), expr_ref(m_util.str.mk_empty(rhs.get(i)->get_sort()), m)); + } + TRACE(seq, tout << "solved" << (changed ? " (propagated)" : " (no change)") << "\n";); + return changed; + } + + case seq::nielsen_result::conflict: + TRACE(seq, tout << "conflict\n";); + set_conflict(dep); + return true; + + case seq::nielsen_result::reduced: { + if (lhs.empty() && rhs.empty()) + return false; // already equal + bool changed = false; + if (!lhs.empty() && !rhs.empty()) { + sort* s = lhs[0]->get_sort(); + expr_ref l = mk_concat(lhs, s); + expr_ref r = mk_concat(rhs, s); + changed = propagate_eq(dep, l, r); + } + else { + expr_ref_vector& nonempty = lhs.empty() ? rhs : lhs; + for (expr* e : nonempty) { + if (m_util.is_seq(e)) { + expr_ref emp(m_util.str.mk_empty(e->get_sort()), m); + changed |= propagate_eq(dep, e, emp); + } + } + } + TRACE(seq, tout << "reduced" << (changed ? " (propagated)" : " (no change)") << "\n";); + return changed; + } + + case seq::nielsen_result::split: + case seq::nielsen_result::unchanged: + break; + } + + // Try branching: find a variable and decide x = "" or x ≠ "" + return branch_eq(lhs, rhs, dep); + } + + bool theory_nseq::branch_eq(expr_ref_vector const& lhs, expr_ref_vector const& rhs, + nseq_dependency* dep) { + // Try branching on variables from the LHS first, then RHS + for (unsigned side = 0; side < 2; ++side) { + expr_ref_vector const& es = (side == 0) ? lhs : rhs; + for (expr* e : es) { + if (!m_nielsen.is_var(e)) + continue; + // Check if this variable is already known to be empty + enode* n = ctx.get_enode(e); + expr_ref emp(m_util.str.mk_empty(e->get_sort()), m); + if (ctx.e_internalized(emp)) { + enode* n_emp = ctx.get_enode(emp); + if (n->get_root() == n_emp->get_root()) + continue; // already equal to empty, skip + } + + // Decide: x = "" or x ≠ "" + literal eq_empty = mk_eq(e, emp, false); + switch (ctx.get_assignment(eq_empty)) { + case l_undef: + // Force the empty branch first + TRACE(seq, tout << "branch " << mk_bounded_pp(e, m) << " = \"\"\n";); + ctx.force_phase(eq_empty); + ctx.mark_as_relevant(eq_empty); + m_new_propagation = true; + m_state.stats().m_num_splits++; + return true; + case l_true: + // Variable assigned to empty but EQ not yet merged + // Propagate the equality + propagate_eq(dep, e, emp); + return true; + case l_false: + // x ≠ "": try to find a prefix from the other side + break; + } + } + } + + // If all variables on one side are decided non-empty, + // try to match against the other side using prefix enumeration + return branch_eq_prefix(lhs, rhs, dep); + } + + bool theory_nseq::branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, + nseq_dependency* dep) { + // Find a leading variable on either side + if (!lhs.empty() && m_nielsen.is_var(lhs[0]) && !rhs.empty()) + return branch_var_prefix(lhs[0], rhs, dep); + if (!rhs.empty() && m_nielsen.is_var(rhs[0]) && !lhs.empty()) + return branch_var_prefix(rhs[0], lhs, dep); + return false; + } + + bool theory_nseq::branch_var_prefix(expr* x, expr_ref_vector const& other, + nseq_dependency* dep) { + // For x starting one side, try x = prefix of other side + // x = "" was already tried (assigned false) + // Now enumerate: x = other[0], x = other[0]·other[1], ... + expr_ref candidate(m); + for (unsigned len = 1; len <= other.size(); ++len) { + if (len == 1) + candidate = other[0]; + else + candidate = expr_ref(m_util.str.mk_concat(len, other.data(), x->get_sort()), m); + literal eq = mk_eq(x, candidate, false); + switch (ctx.get_assignment(eq)) { + case l_undef: + TRACE(seq, tout << "branch " << mk_bounded_pp(x, m) << " = " << mk_bounded_pp(candidate, m) << "\n";); + ctx.force_phase(eq); + ctx.mark_as_relevant(eq); + m_new_propagation = true; + m_state.stats().m_num_splits++; + return true; + case l_true: + propagate_eq(dep, x, candidate); + return true; + case l_false: + continue; + } + } + return false; + } + // ------------------------------------------------------- // Display and statistics // ------------------------------------------------------- @@ -396,6 +625,35 @@ namespace smt { // ------------------------------------------------------- model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) { + app* e = n->get_expr(); + TRACE(seq, tout << "mk_value: " << mk_bounded_pp(e, m) << "\n";); + + if (m_util.is_seq(e)) { + // Walk the equivalence class looking for a concrete string value + enode* root = n->get_root(); + enode* curr = root; + do { + expr* ce = curr->get_expr(); + zstring s; + if (m_util.str.is_string(ce, s)) + return alloc(expr_wrapper_proc, to_app(ce)); + if (m_util.str.is_empty(ce)) + return alloc(expr_wrapper_proc, to_app(ce)); + curr = curr->get_next(); + } while (curr != root); + + // No concrete value found: use seq_factory to get a fresh value + expr_ref val(m); + val = mg.get_model().get_fresh_value(e->get_sort()); + if (val) + return alloc(expr_wrapper_proc, to_app(val)); + // Fallback: empty string + return alloc(expr_wrapper_proc, to_app(m_util.str.mk_empty(e->get_sort()))); + } + if (m_util.is_re(e)) { + return alloc(expr_wrapper_proc, to_app(m_util.re.mk_full_seq(e->get_sort()))); + } + UNREACHABLE(); return nullptr; } diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 3125c9e2d..2891c3ce0 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -23,6 +23,7 @@ Author: #include "ast/rewriter/seq_rewriter.h" #include "ast/rewriter/seq_skolem.h" #include "ast/rewriter/th_rewriter.h" +#include "ast/rewriter/nseq_nielsen.h" #include "model/seq_factory.h" #include "smt/smt_theory.h" #include "smt/smt_arith_value.h" @@ -39,6 +40,7 @@ namespace smt { seq::skolem m_sk; arith_value m_arith_value; nseq_state m_state; + seq::nielsen m_nielsen; nseq_union_find m_find; bool m_has_seq; bool m_new_propagation; @@ -86,6 +88,15 @@ namespace smt { expr_ref mk_len(expr* s); expr_ref mk_concat(expr_ref_vector const& es, sort* s); + // Nielsen equation solving + bool solve_eqs(); + bool solve_eq(nseq_eq const& eq); + bool branch_eq(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); + bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep); + bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep); + bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep); + bool all_eqs_solved(); + public: theory_nseq(context& ctx); ~theory_nseq() override;