3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-23 04:49:11 +00:00

theory_nseq: remove seq_state, embed tracked entries directly in prop_queue (#9045)

* Remove seq_state: embed tracked_str_eq/tracked_str_mem directly in prop_queue

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>

* theory_nseq: use type aliases for eq_item/mem_item instead of wrapper structs

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>

* seq_model: validate_regex takes single tracked_str_mem, caller loops

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
Copilot 2026-03-19 14:45:53 -07:00 committed by GitHub
parent 8795bf06fb
commit 59bc9b17ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 114 additions and 168 deletions

View file

@ -7,9 +7,9 @@ Module Name:
Abstract:
Constraint store bridging the SMT context to the Nielsen graph.
Holds word equations and regex membership constraints with
push/pop support for backtracking.
Tracked constraint types bridging the SMT context to the Nielsen graph.
tracked_str_eq and tracked_str_mem extend the core constraint types with
SMT-layer source information (enodes and literals) for conflict reporting.
Author:
@ -39,71 +39,4 @@ namespace smt {
: seq::str_mem(str, regex, history, id, dep), lit(lit) {}
};
// source info for a string disequality
struct diseq_source {
enode* m_n1;
enode* m_n2;
};
class seq_state {
vector<tracked_str_eq> m_str_eqs;
vector<tracked_str_mem> m_str_mems;
vector<diseq_source> m_diseqs;
unsigned_vector m_str_eq_lim;
unsigned_vector m_str_mem_lim;
unsigned_vector m_diseq_lim;
unsigned m_next_mem_id = 0;
public:
seq_state() = default;
void push() {
m_str_eq_lim.push_back(m_str_eqs.size());
m_str_mem_lim.push_back(m_str_mems.size());
m_diseq_lim.push_back(m_diseqs.size());
}
void pop(unsigned n) {
for (unsigned i = 0; i < n; ++i) {
m_str_eqs.shrink(m_str_eq_lim.back());
m_str_eq_lim.pop_back();
m_str_mems.shrink(m_str_mem_lim.back());
m_str_mem_lim.pop_back();
m_diseqs.shrink(m_diseq_lim.back());
m_diseq_lim.pop_back();
}
}
void add_str_eq(euf::snode* lhs, euf::snode* rhs, enode* n1, enode* n2) {
seq::dep_tracker dep = nullptr;
m_str_eqs.push_back(tracked_str_eq(lhs, rhs, n1, n2, dep));
}
void add_str_mem(euf::snode* str, euf::snode* regex, sat::literal lit) {
seq::dep_tracker dep = nullptr;
m_str_mems.push_back(tracked_str_mem(str, regex, lit, nullptr, m_next_mem_id++, dep));
}
void add_diseq(enode* n1, enode* n2) {
m_diseqs.push_back({n1, n2});
}
vector<tracked_str_eq> const& str_eqs() const { return m_str_eqs; }
vector<tracked_str_mem> const& str_mems() const { return m_str_mems; }
vector<diseq_source> const& diseqs() const { return m_diseqs; }
diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; }
bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_diseqs.empty(); }
void reset() {
m_str_eqs.reset();
m_str_mems.reset();
m_diseqs.reset();
m_str_eq_lim.reset();
m_str_mem_lim.reset();
m_diseq_lim.reset();
}
};
}

View file

@ -17,7 +17,6 @@ Author:
--*/
#include "smt/seq_model.h"
#include "smt/seq/seq_state.h"
#include "smt/smt_context.h"
#include "smt/smt_model_generator.h"
#include "smt/proto_model/proto_model.h"
@ -347,28 +346,22 @@ namespace smt {
}
}
bool seq_model::validate_regex(seq_state const& state, ::proto_model& mdl) {
bool ok = true;
bool seq_model::validate_regex(tracked_str_mem const& mem, ::proto_model& mdl) {
if (!mem.m_str || !mem.m_regex)
return true;
expr* s_expr = mem.m_str->get_expr();
expr* r_expr = mem.m_regex->get_expr();
if (!s_expr || !r_expr)
return true;
// validate positive memberships: str ∈ regex
for (auto const& mem : state.str_mems()) {
if (!mem.m_str || !mem.m_regex)
continue;
expr* s_expr = mem.m_str->get_expr();
expr* r_expr = mem.m_regex->get_expr();
if (!s_expr || !r_expr)
continue;
expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
if (mdl.is_false(in_re)) {
IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: "
<< mk_bounded_pp(s_expr, m, 3)
<< " in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
ok = false;
}
expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
if (mdl.is_false(in_re)) {
IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: "
<< mk_bounded_pp(s_expr, m, 3)
<< " in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
return false;
}
return ok;
return true;
}
}

View file

@ -32,6 +32,7 @@ Author:
#include "ast/rewriter/seq_rewriter.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
#include "smt/seq/seq_state.h" // tracked_str_mem
#include "model/seq_factory.h"
class proto_model;
@ -40,7 +41,7 @@ namespace smt {
class enode;
class model_generator;
class seq_state;
struct tracked_str_mem;
class model_value_proc;
class seq_model {
@ -87,7 +88,7 @@ namespace smt {
// Validate that model assignments satisfy all regex membership
// constraints from the state. Checks positive and negative
// memberships. Returns true if all constraints pass.
bool validate_regex(seq_state const& state, ::proto_model& mdl);
bool validate_regex(tracked_str_mem const& mem, ::proto_model& mdl);
private:
// extract variable assignments from the sat path (root-to-leaf edges).

View file

@ -36,7 +36,6 @@ namespace smt {
m_context_solver(m),
m_nielsen(m_sgraph, m_context_solver),
m_axioms(m_th_rewriter),
m_state(),
m_regex(m_sgraph),
m_model(m, m_seq, m_rewriter, m_sgraph)
{
@ -169,10 +168,9 @@ namespace smt {
euf::snode* s1 = get_snode(e1);
euf::snode* s2 = get_snode(e2);
if (s1 && s2) {
unsigned idx = m_state.str_eqs().size();
m_state.add_str_eq(s1, s2, get_enode(v1), get_enode(v2));
seq::dep_tracker dep = nullptr;
ctx.push_trail(restore_vector(m_prop_queue));
m_prop_queue.push_back(eq_item{idx});
m_prop_queue.push_back(eq_item(s1, s2, get_enode(v1), get_enode(v2), dep));
}
}
@ -203,10 +201,11 @@ namespace smt {
euf::snode* sn_re = get_snode(re);
if (!sn_str || !sn_re)
return;
unsigned idx = m_state.str_mems().size();
literal lit(v, !is_true);
seq::dep_tracker dep = nullptr;
if (is_true) {
m_state.add_str_mem(sn_str, sn_re, lit);
ctx.push_trail(restore_vector(m_prop_queue));
m_prop_queue.push_back(mem_item(sn_str, sn_re, lit, nullptr, m_next_mem_id++, dep));
}
else {
// ¬(str ∈ R) ≡ str ∈ complement(R): store as a positive membership
@ -214,10 +213,9 @@ namespace smt {
// is kept in mem_source for conflict reporting.
expr_ref re_compl(m_seq.re.mk_complement(re), m);
euf::snode* sn_re_compl = get_snode(re_compl.get());
m_state.add_str_mem(sn_str, sn_re_compl, lit);
ctx.push_trail(restore_vector(m_prop_queue));
m_prop_queue.push_back(mem_item(sn_str, sn_re_compl, lit, nullptr, m_next_mem_id++, dep));
}
ctx.push_trail(restore_vector(m_prop_queue));
m_prop_queue.push_back(mem_item{idx});
}
else if (m_seq.str.is_prefix(e)) {
if (is_true)
@ -258,14 +256,12 @@ namespace smt {
void theory_nseq::push_scope_eh() {
theory::push_scope_eh();
m_state.push();
m_sgraph.push();
}
void theory_nseq::pop_scope_eh(unsigned num_scopes) {
theory::pop_scope_eh(num_scopes);
m_state.pop(num_scopes);
m_sgraph.pop(num_scopes);
}
@ -289,24 +285,22 @@ namespace smt {
while (m_prop_qhead < m_prop_queue.size() && !ctx.inconsistent()) {
auto const& item = m_prop_queue[m_prop_qhead++];
if (std::holds_alternative<eq_item>(item))
propagate_eq(std::get<eq_item>(item).idx);
propagate_eq(std::get<eq_item>(item));
else if (std::holds_alternative<mem_item>(item))
propagate_pos_mem(std::get<mem_item>(item).idx);
propagate_pos_mem(std::get<mem_item>(item));
else if (std::holds_alternative<axiom_item>(item))
dequeue_axiom(std::get<axiom_item>(item).e);
}
}
void theory_nseq::propagate_eq(unsigned idx) {
void theory_nseq::propagate_eq(tracked_str_eq const& eq) {
// When s1 = s2 is learned, ensure len(s1) and len(s2) are
// internalized so congruence closure propagates len(s1) = len(s2).
auto const& src = m_state.str_eqs()[idx];
ensure_length_var(src.m_l->get_expr());
ensure_length_var(src.m_r->get_expr());
ensure_length_var(eq.m_l->get_expr());
ensure_length_var(eq.m_r->get_expr());
}
void theory_nseq::propagate_pos_mem(unsigned idx) {
auto const& mem = m_state.str_mems()[idx];
void theory_nseq::propagate_pos_mem(tracked_str_mem const& mem) {
if (!mem.m_str || !mem.m_regex)
return;
@ -419,36 +413,46 @@ namespace smt {
void theory_nseq::populate_nielsen_graph() {
m_nielsen.reset();
// transfer string equalities from state to nielsen graph root
for (auto const& eq : m_state.str_eqs()) {
m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs, eq.m_l, eq.m_r);
}
unsigned num_eqs = 0, num_mems = 0;
// transfer regex memberships, pre-processing through seq_regex
// to consume ground prefixes via Brzozowski derivatives
for (auto const &mem : m_state.str_mems()) {
int triv = m_regex.check_trivial(mem);
if (triv > 0)
continue; // trivially satisfied, skip
if (triv < 0) {
// trivially unsat: add anyway so solve() detects conflict
m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit);
continue;
// transfer string equalities and regex memberships from prop_queue to nielsen graph root
for (auto const& item : m_prop_queue) {
if (std::holds_alternative<eq_item>(item)) {
auto const& eq = std::get<eq_item>(item);
m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs, eq.m_l, eq.m_r);
++num_eqs;
}
// pre-process: consume ground prefix characters
vector<seq::str_mem> processed;
if (!m_regex.process_str_mem(mem, processed)) {
// conflict during ground prefix consumption
m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit);
continue;
}
for (auto const& pm : processed) {
m_nielsen.add_str_mem(pm.m_str, pm.m_regex, mem.lit);
else if (std::holds_alternative<mem_item>(item)) {
auto const& mem = std::get<mem_item>(item);
int triv = m_regex.check_trivial(mem);
if (triv > 0) {
++num_mems;
continue; // trivially satisfied, skip
}
if (triv < 0) {
// trivially unsat: add anyway so solve() detects conflict
m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit);
++num_mems;
continue;
}
// pre-process: consume ground prefix characters
vector<seq::str_mem> processed;
if (!m_regex.process_str_mem(mem, processed)) {
// conflict during ground prefix consumption
m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit);
++num_mems;
continue;
}
for (auto const& pm : processed)
m_nielsen.add_str_mem(pm.m_str, pm.m_regex, mem.lit);
++num_mems;
}
}
TRACE(seq, tout << "nseq populate: " << m_state.str_eqs().size() << " eqs, "
<< m_state.str_mems().size() << " mems -> nielsen root\n");
TRACE(seq, tout << "nseq populate: " << num_eqs << " eqs, "
<< num_mems << " mems -> nielsen root\n");
IF_VERBOSE(1, verbose_stream() << "nseq final_check: populating graph with "
<< num_eqs << " eqs, " << num_mems << " mems\n";);
}
final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) {
@ -459,8 +463,13 @@ namespace smt {
return FC_CONTINUE;
}
// Check if there are any eq/mem items in the propagation queue.
bool has_eq_or_mem = false;
for (auto const& item : m_prop_queue)
if (!std::holds_alternative<axiom_item>(item)) { has_eq_or_mem = true; break; }
// there is nothing to do for the string solver, as there are no string constraints
if (m_state.empty() && m_ho_terms.empty() && !has_unhandled_preds()) {
if (!has_eq_or_mem && m_ho_terms.empty() && !has_unhandled_preds()) {
IF_VERBOSE(1, verbose_stream() << "nseq final_check: empty state+ho, FC_DONE (no solve)\n";);
m_nielsen.reset();
m_nielsen.create_root();
@ -474,7 +483,7 @@ namespace smt {
return FC_CONTINUE;
}
if (m_state.empty() && !has_unhandled_preds()) {
if (!has_eq_or_mem && !has_unhandled_preds()) {
IF_VERBOSE(1, verbose_stream() << "nseq final_check: empty state (after ho), FC_DONE (no solve)\n";);
m_nielsen.reset();
m_nielsen.create_root();
@ -482,8 +491,6 @@ namespace smt {
return FC_DONE;
}
IF_VERBOSE(1, verbose_stream() << "nseq final_check: populating graph with "
<< m_state.str_eqs().size() << " eqs, " << m_state.str_mems().size() << " mems\n";);
populate_nielsen_graph();
// assert length constraints derived from string equalities
@ -508,7 +515,7 @@ namespace smt {
IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck UNSAT\n";);
return FC_CONTINUE;
case l_false:
// all regex constraints satisfiable, no word eqs/diseqs → SAT
// all regex constraints satisfiable, no word eqs → SAT
IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck SAT\n";);
m_nielsen.set_sat_node(m_nielsen.root());
return FC_DONE;
@ -538,10 +545,7 @@ namespace smt {
IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve SAT, sat_node="
<< (m_nielsen.sat_node() ? "set" : "null") << "\n";);
// Nielsen found a consistent assignment for positive constraints.
// If there are disequalities we haven't verified, we cannot soundly declare sat.
SASSERT(!m_state.empty()); // we should have axiomatized them
if (!m_state.diseqs().empty())
return FC_GIVEUP;
SASSERT(has_eq_or_mem); // we should have axiomatized them
if (!has_unhandled_preds())
return FC_DONE;
}
@ -604,7 +608,9 @@ namespace smt {
}
void theory_nseq::validate_model(proto_model& mdl) {
m_model.validate_regex(m_state, mdl);
for (auto const& item : m_prop_queue)
if (std::holds_alternative<mem_item>(item))
m_model.validate_regex(std::get<mem_item>(item), mdl);
}
// -----------------------------------------------------------------------
@ -620,10 +626,14 @@ namespace smt {
}
void theory_nseq::display(std::ostream& out) const {
unsigned num_eqs = 0, num_mems = 0;
for (auto const& item : m_prop_queue) {
if (std::holds_alternative<eq_item>(item)) ++num_eqs;
else if (std::holds_alternative<mem_item>(item)) ++num_mems;
}
out << "theory_nseq\n";
out << " str_eqs: " << m_state.str_eqs().size() << "\n";
out << " str_mems: " << m_state.str_mems().size() << "\n";
out << " diseqs: " << m_state.diseqs().size() << "\n";
out << " str_eqs: " << num_eqs << "\n";
out << " str_mems: " << num_mems << "\n";
out << " prop_queue: " << m_prop_qhead << "/" << m_prop_queue.size() << "\n";
out << " ho_terms: " << m_ho_terms.size() << "\n";
}
@ -900,7 +910,13 @@ namespace smt {
// -----------------------------------------------------------------------
lbool theory_nseq::check_regex_memberships_precheck() {
auto const& mems = m_state.str_mems();
// Collect mem items from the propagation queue into a local pointer array
// so that indices into the array remain stable during this function.
ptr_vector<tracked_str_mem const> mems;
for (auto const& item : m_prop_queue)
if (std::holds_alternative<mem_item>(item))
mems.push_back(&std::get<mem_item>(item));
if (mems.empty())
return l_undef;
@ -910,7 +926,7 @@ namespace smt {
bool all_primitive = true;
for (unsigned i = 0; i < mems.size(); ++i) {
auto const& mem = mems[i];
auto const& mem = *mems[i];
SASSERT(mem.m_str && mem.m_regex);
if (mem.is_primitive()) {
auto& vec = var_to_mems.insert_if_not_there(mem.m_str->id(), unsigned_vector());
@ -923,6 +939,11 @@ namespace smt {
if (var_to_mems.empty())
return l_undef;
// Check if there are any eq items in the queue (needed for SAT condition below).
bool has_eqs = false;
for (auto const& item : m_prop_queue)
if (std::holds_alternative<eq_item>(item)) { has_eqs = true; break; }
bool any_undef = false;
// Check intersection emptiness for each variable.
@ -931,7 +952,7 @@ namespace smt {
unsigned_vector const& mem_indices = kv.m_value;
ptr_vector<euf::snode> regexes;
for (unsigned i : mem_indices) {
euf::snode* re = mems[i].m_regex;
euf::snode* re = mems[i]->m_regex;
if (re)
regexes.push_back(re);
}
@ -948,10 +969,8 @@ namespace smt {
enode_pair_vector eqs;
literal_vector lits;
for (unsigned i : mem_indices) {
auto const &src = m_state.str_mems()[i];
SASSERT(ctx.get_assignment(src.lit) == l_true); // we already stored the polarity of the literal
lits.push_back(
src.lit);
SASSERT(ctx.get_assignment(mems[i]->lit) == l_true); // we already stored the polarity of the literal
lits.push_back(mems[i]->lit);
}
TRACE(seq, tout << "nseq regex precheck: empty intersection for var "
<< var_id << ", conflict with " << lits.size() << " lits\n";);
@ -967,11 +986,11 @@ namespace smt {
return l_undef; // cannot fully determine; let DFS decide
// All variables' regex intersections are non-empty.
// If there are no word equations and no disequalities, variables are
// independent and each can be assigned a witness string → SAT.
if (all_primitive && m_state.str_eqs().empty() && m_state.diseqs().empty() && !has_unhandled_preds()) {
// If there are no word equations, variables are independent and
// each can be assigned a witness string → SAT.
if (all_primitive && !has_eqs && !has_unhandled_preds()) {
TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, "
<< "no word eqs/diseqs → SAT\n";);
<< "no word eqs → SAT\n";);
return l_false; // signals SAT (non-empty / satisfiable)
}

View file

@ -28,7 +28,7 @@ Author:
#include "smt/smt_theory.h"
#include "smt/smt_arith_value.h"
#include "smt/seq/seq_nielsen.h"
#include "smt/seq/seq_state.h"
#include "smt/seq/seq_state.h" // tracked_str_eq, tracked_str_mem
#include "smt/seq/seq_regex.h"
#include "smt/seq_model.h"
#include "smt/nseq_context_solver.h"
@ -48,19 +48,19 @@ namespace smt {
context_solver m_context_solver;
seq::nielsen_graph m_nielsen;
seq::axioms m_axioms;
seq_state m_state;
seq::seq_regex m_regex; // regex membership pre-processing
seq_model m_model; // model construction helper
// propagation queue items (variant over the distinct propagation cases)
struct eq_item { unsigned idx; }; // string equality at index idx in str_eqs
struct mem_item { unsigned idx; }; // regex membership at index idx in str_mems
struct axiom_item { expr* e; }; // structural axiom for term e
using eq_item = tracked_str_eq; // string equality
using mem_item = tracked_str_mem; // regex membership
struct axiom_item { expr* e; }; // structural axiom for term e
using prop_item = std::variant<eq_item, mem_item, axiom_item>;
vector<prop_item> m_prop_queue;
unsigned m_prop_qhead = 0;
unsigned m_next_mem_id = 0; // monotone counter for tracked_str_mem ids
obj_hashtable<expr> m_axiom_set; // dedup guard for axiom_item enqueues
// statistics
@ -117,8 +117,8 @@ namespace smt {
euf::snode* get_snode(expr* e);
// propagation dispatch helpers
void propagate_eq(unsigned idx);
void propagate_pos_mem(unsigned idx);
void propagate_eq(tracked_str_eq const& eq);
void propagate_pos_mem(tracked_str_mem const& mem);
void enqueue_axiom(expr* e);
void dequeue_axiom(expr* e);
void ensure_length_var(expr* e);