From 59bc9b17eab4ed3d7f6c9afc5347b9daaa94637b Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:45:53 -0700 Subject: [PATCH] theory_nseq: remove seq_state, embed tracked entries directly in prop_queue (#9045) * Remove seq_state: embed tracked_str_eq/tracked_str_mem directly in prop_queue Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * theory_nseq: use type aliases for eq_item/mem_item instead of wrapper structs Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * seq_model: validate_regex takes single tracked_str_mem, caller loops Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/seq/seq_state.h | 73 +------------------ src/smt/seq_model.cpp | 35 ++++----- src/smt/seq_model.h | 5 +- src/smt/theory_nseq.cpp | 155 ++++++++++++++++++++++------------------ src/smt/theory_nseq.h | 14 ++-- 5 files changed, 114 insertions(+), 168 deletions(-) diff --git a/src/smt/seq/seq_state.h b/src/smt/seq/seq_state.h index b083f56dd..c80747943 100644 --- a/src/smt/seq/seq_state.h +++ b/src/smt/seq/seq_state.h @@ -7,9 +7,9 @@ Module Name: Abstract: - Constraint store bridging the SMT context to the Nielsen graph. - Holds word equations and regex membership constraints with - push/pop support for backtracking. + Tracked constraint types bridging the SMT context to the Nielsen graph. + tracked_str_eq and tracked_str_mem extend the core constraint types with + SMT-layer source information (enodes and literals) for conflict reporting. Author: @@ -39,71 +39,4 @@ namespace smt { : seq::str_mem(str, regex, history, id, dep), lit(lit) {} }; - // source info for a string disequality - struct diseq_source { - enode* m_n1; - enode* m_n2; - }; - - class seq_state { - vector m_str_eqs; - vector m_str_mems; - vector m_diseqs; - unsigned_vector m_str_eq_lim; - unsigned_vector m_str_mem_lim; - unsigned_vector m_diseq_lim; - unsigned m_next_mem_id = 0; - - public: - seq_state() = default; - - void push() { - m_str_eq_lim.push_back(m_str_eqs.size()); - m_str_mem_lim.push_back(m_str_mems.size()); - m_diseq_lim.push_back(m_diseqs.size()); - } - - void pop(unsigned n) { - for (unsigned i = 0; i < n; ++i) { - m_str_eqs.shrink(m_str_eq_lim.back()); - m_str_eq_lim.pop_back(); - m_str_mems.shrink(m_str_mem_lim.back()); - m_str_mem_lim.pop_back(); - m_diseqs.shrink(m_diseq_lim.back()); - m_diseq_lim.pop_back(); - } - } - - void add_str_eq(euf::snode* lhs, euf::snode* rhs, enode* n1, enode* n2) { - seq::dep_tracker dep = nullptr; - m_str_eqs.push_back(tracked_str_eq(lhs, rhs, n1, n2, dep)); - } - - void add_str_mem(euf::snode* str, euf::snode* regex, sat::literal lit) { - seq::dep_tracker dep = nullptr; - m_str_mems.push_back(tracked_str_mem(str, regex, lit, nullptr, m_next_mem_id++, dep)); - } - - void add_diseq(enode* n1, enode* n2) { - m_diseqs.push_back({n1, n2}); - } - - vector const& str_eqs() const { return m_str_eqs; } - vector const& str_mems() const { return m_str_mems; } - vector const& diseqs() const { return m_diseqs; } - - diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; } - - bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_diseqs.empty(); } - - void reset() { - m_str_eqs.reset(); - m_str_mems.reset(); - m_diseqs.reset(); - m_str_eq_lim.reset(); - m_str_mem_lim.reset(); - m_diseq_lim.reset(); - } - }; - } diff --git a/src/smt/seq_model.cpp b/src/smt/seq_model.cpp index 7e06410b7..20eb2ddbf 100644 --- a/src/smt/seq_model.cpp +++ b/src/smt/seq_model.cpp @@ -17,7 +17,6 @@ Author: --*/ #include "smt/seq_model.h" -#include "smt/seq/seq_state.h" #include "smt/smt_context.h" #include "smt/smt_model_generator.h" #include "smt/proto_model/proto_model.h" @@ -347,28 +346,22 @@ namespace smt { } } - bool seq_model::validate_regex(seq_state const& state, ::proto_model& mdl) { - bool ok = true; + bool seq_model::validate_regex(tracked_str_mem const& mem, ::proto_model& mdl) { + if (!mem.m_str || !mem.m_regex) + return true; + expr* s_expr = mem.m_str->get_expr(); + expr* r_expr = mem.m_regex->get_expr(); + if (!s_expr || !r_expr) + return true; - // validate positive memberships: str ∈ regex - for (auto const& mem : state.str_mems()) { - if (!mem.m_str || !mem.m_regex) - continue; - expr* s_expr = mem.m_str->get_expr(); - expr* r_expr = mem.m_regex->get_expr(); - if (!s_expr || !r_expr) - continue; - - expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m); - if (mdl.is_false(in_re)) { - IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: " - << mk_bounded_pp(s_expr, m, 3) - << " in " << mk_bounded_pp(r_expr, m, 3) << "\n";); - ok = false; - } + expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m); + if (mdl.is_false(in_re)) { + IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: " + << mk_bounded_pp(s_expr, m, 3) + << " in " << mk_bounded_pp(r_expr, m, 3) << "\n";); + return false; } - - return ok; + return true; } } diff --git a/src/smt/seq_model.h b/src/smt/seq_model.h index f1676d45c..d1c81129f 100644 --- a/src/smt/seq_model.h +++ b/src/smt/seq_model.h @@ -32,6 +32,7 @@ Author: #include "ast/rewriter/seq_rewriter.h" #include "ast/euf/euf_sgraph.h" #include "smt/seq/seq_nielsen.h" +#include "smt/seq/seq_state.h" // tracked_str_mem #include "model/seq_factory.h" class proto_model; @@ -40,7 +41,7 @@ namespace smt { class enode; class model_generator; - class seq_state; + struct tracked_str_mem; class model_value_proc; class seq_model { @@ -87,7 +88,7 @@ namespace smt { // Validate that model assignments satisfy all regex membership // constraints from the state. Checks positive and negative // memberships. Returns true if all constraints pass. - bool validate_regex(seq_state const& state, ::proto_model& mdl); + bool validate_regex(tracked_str_mem const& mem, ::proto_model& mdl); private: // extract variable assignments from the sat path (root-to-leaf edges). diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index a1d17337a..b17856365 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -36,7 +36,6 @@ namespace smt { m_context_solver(m), m_nielsen(m_sgraph, m_context_solver), m_axioms(m_th_rewriter), - m_state(), m_regex(m_sgraph), m_model(m, m_seq, m_rewriter, m_sgraph) { @@ -169,10 +168,9 @@ namespace smt { euf::snode* s1 = get_snode(e1); euf::snode* s2 = get_snode(e2); if (s1 && s2) { - unsigned idx = m_state.str_eqs().size(); - m_state.add_str_eq(s1, s2, get_enode(v1), get_enode(v2)); + seq::dep_tracker dep = nullptr; ctx.push_trail(restore_vector(m_prop_queue)); - m_prop_queue.push_back(eq_item{idx}); + m_prop_queue.push_back(eq_item(s1, s2, get_enode(v1), get_enode(v2), dep)); } } @@ -203,10 +201,11 @@ namespace smt { euf::snode* sn_re = get_snode(re); if (!sn_str || !sn_re) return; - unsigned idx = m_state.str_mems().size(); literal lit(v, !is_true); + seq::dep_tracker dep = nullptr; if (is_true) { - m_state.add_str_mem(sn_str, sn_re, lit); + ctx.push_trail(restore_vector(m_prop_queue)); + m_prop_queue.push_back(mem_item(sn_str, sn_re, lit, nullptr, m_next_mem_id++, dep)); } else { // ¬(str ∈ R) ≡ str ∈ complement(R): store as a positive membership @@ -214,10 +213,9 @@ namespace smt { // is kept in mem_source for conflict reporting. expr_ref re_compl(m_seq.re.mk_complement(re), m); euf::snode* sn_re_compl = get_snode(re_compl.get()); - m_state.add_str_mem(sn_str, sn_re_compl, lit); + ctx.push_trail(restore_vector(m_prop_queue)); + m_prop_queue.push_back(mem_item(sn_str, sn_re_compl, lit, nullptr, m_next_mem_id++, dep)); } - ctx.push_trail(restore_vector(m_prop_queue)); - m_prop_queue.push_back(mem_item{idx}); } else if (m_seq.str.is_prefix(e)) { if (is_true) @@ -258,14 +256,12 @@ namespace smt { void theory_nseq::push_scope_eh() { theory::push_scope_eh(); - m_state.push(); m_sgraph.push(); } void theory_nseq::pop_scope_eh(unsigned num_scopes) { theory::pop_scope_eh(num_scopes); - m_state.pop(num_scopes); m_sgraph.pop(num_scopes); } @@ -289,24 +285,22 @@ namespace smt { while (m_prop_qhead < m_prop_queue.size() && !ctx.inconsistent()) { auto const& item = m_prop_queue[m_prop_qhead++]; if (std::holds_alternative(item)) - propagate_eq(std::get(item).idx); + propagate_eq(std::get(item)); else if (std::holds_alternative(item)) - propagate_pos_mem(std::get(item).idx); + propagate_pos_mem(std::get(item)); else if (std::holds_alternative(item)) dequeue_axiom(std::get(item).e); } } - void theory_nseq::propagate_eq(unsigned idx) { + void theory_nseq::propagate_eq(tracked_str_eq const& eq) { // When s1 = s2 is learned, ensure len(s1) and len(s2) are // internalized so congruence closure propagates len(s1) = len(s2). - auto const& src = m_state.str_eqs()[idx]; - ensure_length_var(src.m_l->get_expr()); - ensure_length_var(src.m_r->get_expr()); + ensure_length_var(eq.m_l->get_expr()); + ensure_length_var(eq.m_r->get_expr()); } - void theory_nseq::propagate_pos_mem(unsigned idx) { - auto const& mem = m_state.str_mems()[idx]; + void theory_nseq::propagate_pos_mem(tracked_str_mem const& mem) { if (!mem.m_str || !mem.m_regex) return; @@ -419,36 +413,46 @@ namespace smt { void theory_nseq::populate_nielsen_graph() { m_nielsen.reset(); - // transfer string equalities from state to nielsen graph root - for (auto const& eq : m_state.str_eqs()) { - m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs, eq.m_l, eq.m_r); - } + unsigned num_eqs = 0, num_mems = 0; - // transfer regex memberships, pre-processing through seq_regex - // to consume ground prefixes via Brzozowski derivatives - for (auto const &mem : m_state.str_mems()) { - int triv = m_regex.check_trivial(mem); - if (triv > 0) - continue; // trivially satisfied, skip - if (triv < 0) { - // trivially unsat: add anyway so solve() detects conflict - m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit); - continue; + // transfer string equalities and regex memberships from prop_queue to nielsen graph root + for (auto const& item : m_prop_queue) { + if (std::holds_alternative(item)) { + auto const& eq = std::get(item); + m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs, eq.m_l, eq.m_r); + ++num_eqs; } - // pre-process: consume ground prefix characters - vector processed; - if (!m_regex.process_str_mem(mem, processed)) { - // conflict during ground prefix consumption - m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit); - continue; - } - for (auto const& pm : processed) { - m_nielsen.add_str_mem(pm.m_str, pm.m_regex, mem.lit); + else if (std::holds_alternative(item)) { + auto const& mem = std::get(item); + int triv = m_regex.check_trivial(mem); + if (triv > 0) { + ++num_mems; + continue; // trivially satisfied, skip + } + if (triv < 0) { + // trivially unsat: add anyway so solve() detects conflict + m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit); + ++num_mems; + continue; + } + // pre-process: consume ground prefix characters + vector processed; + if (!m_regex.process_str_mem(mem, processed)) { + // conflict during ground prefix consumption + m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit); + ++num_mems; + continue; + } + for (auto const& pm : processed) + m_nielsen.add_str_mem(pm.m_str, pm.m_regex, mem.lit); + ++num_mems; } } - TRACE(seq, tout << "nseq populate: " << m_state.str_eqs().size() << " eqs, " - << m_state.str_mems().size() << " mems -> nielsen root\n"); + TRACE(seq, tout << "nseq populate: " << num_eqs << " eqs, " + << num_mems << " mems -> nielsen root\n"); + IF_VERBOSE(1, verbose_stream() << "nseq final_check: populating graph with " + << num_eqs << " eqs, " << num_mems << " mems\n";); } final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) { @@ -459,8 +463,13 @@ namespace smt { return FC_CONTINUE; } + // Check if there are any eq/mem items in the propagation queue. + bool has_eq_or_mem = false; + for (auto const& item : m_prop_queue) + if (!std::holds_alternative(item)) { has_eq_or_mem = true; break; } + // there is nothing to do for the string solver, as there are no string constraints - if (m_state.empty() && m_ho_terms.empty() && !has_unhandled_preds()) { + if (!has_eq_or_mem && m_ho_terms.empty() && !has_unhandled_preds()) { IF_VERBOSE(1, verbose_stream() << "nseq final_check: empty state+ho, FC_DONE (no solve)\n";); m_nielsen.reset(); m_nielsen.create_root(); @@ -474,7 +483,7 @@ namespace smt { return FC_CONTINUE; } - if (m_state.empty() && !has_unhandled_preds()) { + if (!has_eq_or_mem && !has_unhandled_preds()) { IF_VERBOSE(1, verbose_stream() << "nseq final_check: empty state (after ho), FC_DONE (no solve)\n";); m_nielsen.reset(); m_nielsen.create_root(); @@ -482,8 +491,6 @@ namespace smt { return FC_DONE; } - IF_VERBOSE(1, verbose_stream() << "nseq final_check: populating graph with " - << m_state.str_eqs().size() << " eqs, " << m_state.str_mems().size() << " mems\n";); populate_nielsen_graph(); // assert length constraints derived from string equalities @@ -508,7 +515,7 @@ namespace smt { IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck UNSAT\n";); return FC_CONTINUE; case l_false: - // all regex constraints satisfiable, no word eqs/diseqs → SAT + // all regex constraints satisfiable, no word eqs → SAT IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck SAT\n";); m_nielsen.set_sat_node(m_nielsen.root()); return FC_DONE; @@ -538,10 +545,7 @@ namespace smt { IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve SAT, sat_node=" << (m_nielsen.sat_node() ? "set" : "null") << "\n";); // Nielsen found a consistent assignment for positive constraints. - // If there are disequalities we haven't verified, we cannot soundly declare sat. - SASSERT(!m_state.empty()); // we should have axiomatized them - if (!m_state.diseqs().empty()) - return FC_GIVEUP; + SASSERT(has_eq_or_mem); // we should have axiomatized them if (!has_unhandled_preds()) return FC_DONE; } @@ -604,7 +608,9 @@ namespace smt { } void theory_nseq::validate_model(proto_model& mdl) { - m_model.validate_regex(m_state, mdl); + for (auto const& item : m_prop_queue) + if (std::holds_alternative(item)) + m_model.validate_regex(std::get(item), mdl); } // ----------------------------------------------------------------------- @@ -620,10 +626,14 @@ namespace smt { } void theory_nseq::display(std::ostream& out) const { + unsigned num_eqs = 0, num_mems = 0; + for (auto const& item : m_prop_queue) { + if (std::holds_alternative(item)) ++num_eqs; + else if (std::holds_alternative(item)) ++num_mems; + } out << "theory_nseq\n"; - out << " str_eqs: " << m_state.str_eqs().size() << "\n"; - out << " str_mems: " << m_state.str_mems().size() << "\n"; - out << " diseqs: " << m_state.diseqs().size() << "\n"; + out << " str_eqs: " << num_eqs << "\n"; + out << " str_mems: " << num_mems << "\n"; out << " prop_queue: " << m_prop_qhead << "/" << m_prop_queue.size() << "\n"; out << " ho_terms: " << m_ho_terms.size() << "\n"; } @@ -900,7 +910,13 @@ namespace smt { // ----------------------------------------------------------------------- lbool theory_nseq::check_regex_memberships_precheck() { - auto const& mems = m_state.str_mems(); + // Collect mem items from the propagation queue into a local pointer array + // so that indices into the array remain stable during this function. + ptr_vector mems; + for (auto const& item : m_prop_queue) + if (std::holds_alternative(item)) + mems.push_back(&std::get(item)); + if (mems.empty()) return l_undef; @@ -910,7 +926,7 @@ namespace smt { bool all_primitive = true; for (unsigned i = 0; i < mems.size(); ++i) { - auto const& mem = mems[i]; + auto const& mem = *mems[i]; SASSERT(mem.m_str && mem.m_regex); if (mem.is_primitive()) { auto& vec = var_to_mems.insert_if_not_there(mem.m_str->id(), unsigned_vector()); @@ -923,6 +939,11 @@ namespace smt { if (var_to_mems.empty()) return l_undef; + // Check if there are any eq items in the queue (needed for SAT condition below). + bool has_eqs = false; + for (auto const& item : m_prop_queue) + if (std::holds_alternative(item)) { has_eqs = true; break; } + bool any_undef = false; // Check intersection emptiness for each variable. @@ -931,7 +952,7 @@ namespace smt { unsigned_vector const& mem_indices = kv.m_value; ptr_vector regexes; for (unsigned i : mem_indices) { - euf::snode* re = mems[i].m_regex; + euf::snode* re = mems[i]->m_regex; if (re) regexes.push_back(re); } @@ -948,10 +969,8 @@ namespace smt { enode_pair_vector eqs; literal_vector lits; for (unsigned i : mem_indices) { - auto const &src = m_state.str_mems()[i]; - SASSERT(ctx.get_assignment(src.lit) == l_true); // we already stored the polarity of the literal - lits.push_back( - src.lit); + SASSERT(ctx.get_assignment(mems[i]->lit) == l_true); // we already stored the polarity of the literal + lits.push_back(mems[i]->lit); } TRACE(seq, tout << "nseq regex precheck: empty intersection for var " << var_id << ", conflict with " << lits.size() << " lits\n";); @@ -967,11 +986,11 @@ namespace smt { return l_undef; // cannot fully determine; let DFS decide // All variables' regex intersections are non-empty. - // If there are no word equations and no disequalities, variables are - // independent and each can be assigned a witness string → SAT. - if (all_primitive && m_state.str_eqs().empty() && m_state.diseqs().empty() && !has_unhandled_preds()) { + // If there are no word equations, variables are independent and + // each can be assigned a witness string → SAT. + if (all_primitive && !has_eqs && !has_unhandled_preds()) { TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, " - << "no word eqs/diseqs → SAT\n";); + << "no word eqs → SAT\n";); return l_false; // signals SAT (non-empty / satisfiable) } diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index f9a40ea12..959853acd 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -28,7 +28,7 @@ Author: #include "smt/smt_theory.h" #include "smt/smt_arith_value.h" #include "smt/seq/seq_nielsen.h" -#include "smt/seq/seq_state.h" +#include "smt/seq/seq_state.h" // tracked_str_eq, tracked_str_mem #include "smt/seq/seq_regex.h" #include "smt/seq_model.h" #include "smt/nseq_context_solver.h" @@ -48,19 +48,19 @@ namespace smt { context_solver m_context_solver; seq::nielsen_graph m_nielsen; seq::axioms m_axioms; - seq_state m_state; seq::seq_regex m_regex; // regex membership pre-processing seq_model m_model; // model construction helper // propagation queue items (variant over the distinct propagation cases) - struct eq_item { unsigned idx; }; // string equality at index idx in str_eqs - struct mem_item { unsigned idx; }; // regex membership at index idx in str_mems - struct axiom_item { expr* e; }; // structural axiom for term e + using eq_item = tracked_str_eq; // string equality + using mem_item = tracked_str_mem; // regex membership + struct axiom_item { expr* e; }; // structural axiom for term e using prop_item = std::variant; vector m_prop_queue; unsigned m_prop_qhead = 0; + unsigned m_next_mem_id = 0; // monotone counter for tracked_str_mem ids obj_hashtable m_axiom_set; // dedup guard for axiom_item enqueues // statistics @@ -117,8 +117,8 @@ namespace smt { euf::snode* get_snode(expr* e); // propagation dispatch helpers - void propagate_eq(unsigned idx); - void propagate_pos_mem(unsigned idx); + void propagate_eq(tracked_str_eq const& eq); + void propagate_pos_mem(tracked_str_mem const& mem); void enqueue_axiom(expr* e); void dequeue_axiom(expr* e); void ensure_length_var(expr* e);