From 3719b449e8feb708096f86fa71efd06f9398098f Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Tue, 17 Mar 2026 16:18:27 -0700 Subject: [PATCH] re-organize dependencies Signed-off-by: Nikolaj Bjorner --- src/smt/seq/seq_nielsen.cpp | 39 +++++++++++++-------------- src/smt/seq/seq_nielsen.h | 39 +++++++++++++-------------- src/smt/seq/seq_regex.cpp | 2 +- src/smt/seq/seq_regex.h | 2 +- src/smt/seq/seq_state.h | 25 ++--------------- src/smt/theory_nseq.cpp | 54 +++++++++++++------------------------ src/smt/theory_nseq.h | 3 --- 7 files changed, 60 insertions(+), 104 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 75391812f..afa926795 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -187,9 +187,9 @@ namespace seq { m_var_lb.reset(); m_var_ub.reset(); for (auto const& eq : parent.m_str_eq) - m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep)); + m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs,eq.m_l, eq.m_r, eq.m_dep)); for (auto const& mem : parent.m_str_mem) - m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_history, mem.m_id, mem.m_dep)); + m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_lit, mem.m_history, mem.m_id, mem.m_dep)); for (auto const& ic : parent.m_int_constraints) m_int_constraints.push_back(ic); // clone character disequalities @@ -545,24 +545,22 @@ namespace seq { return e; } - void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) { + void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs, smt::enode* l, smt::enode* r) { if (!m_root) m_root = mk_node(); - dep_tracker dep = m_dep_mgr.mk_leaf({dep_source::kind::eq, m_num_input_eqs}); - str_eq eq(lhs, rhs, dep); + dep_tracker dep = m_dep_mgr.mk_leaf(enode_pair(l, r)); + str_eq eq(lhs, rhs, l, r, dep); eq.sort(); m_root->add_str_eq(eq); - ++m_num_input_eqs; } - void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) { + void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex, sat::literal l) { if (!m_root) m_root = mk_node(); - dep_tracker dep = m_dep_mgr.mk_leaf({dep_source::kind::mem, m_num_input_mems}); + dep_tracker dep = m_dep_mgr.mk_leaf(l); euf::snode* history = m_sg.mk_empty_seq(str->get_sort()); unsigned id = next_mem_id(); - m_root->add_str_mem(str_mem(str, regex, history, id, dep)); - ++m_num_input_mems; + m_root->add_str_mem(str_mem(str, regex, l, history, id, dep)); } void nielsen_graph::inc_run_idx() { @@ -589,8 +587,6 @@ namespace seq { m_depth_bound = 0; m_next_mem_id = 0; m_fresh_cnt = 0; - m_num_input_eqs = 0; - m_num_input_mems = 0; m_root_constraints_asserted = false; m_mod_cnt.reset(); m_len_var_cache.clear(); @@ -2794,8 +2790,8 @@ namespace seq { auto& eqs = child->str_eqs(); eqs[eq_idx] = eqs.back(); eqs.pop_back(); - eqs.push_back(str_eq(eq1_lhs, eq1_rhs, eq.m_dep)); - eqs.push_back(str_eq(eq2_lhs, eq2_rhs, eq.m_dep)); + eqs.push_back(str_eq(eq1_lhs, eq1_rhs, eq.m_l, eq.m_r, eq.m_dep)); + eqs.push_back(str_eq(eq2_lhs, eq2_rhs, eq.m_l, eq.m_r, eq.m_dep)); // Int constraints on the edge. // 1) len(pad) = |padding| (if padding variable was created) @@ -3459,11 +3455,11 @@ namespace seq { nielsen_node* child = mk_child(node); // Add membership: pr ∈ stab_base* (stabilizer constraint) - child->add_str_mem(str_mem(pr, star_sn, mem.m_history, next_mem_id(), mem.m_dep)); + child->add_str_mem(str_mem(pr, star_sn, mem.m_lit, mem.m_history, next_mem_id(), mem.m_dep)); // Add remaining membership: po · tail ∈ R (same regex, trimmed history) euf::snode* post_tail = str_tail->is_empty() ? po : m_sg.mk_concat(po, str_tail); - child->add_str_mem(str_mem(post_tail, mem.m_regex, nullptr, next_mem_id(), mem.m_dep)); + child->add_str_mem(str_mem(post_tail, mem.m_regex, mem.m_lit, nullptr, next_mem_id(), mem.m_dep)); // Blocking constraint: po must NOT start with stab_base // po ∈ complement(non_nullable(stab_base) · Σ*) @@ -3475,7 +3471,7 @@ namespace seq { expr_ref block_re(seq.re.mk_complement(base_then_all), mgr); euf::snode* block_sn = m_sg.mk(block_re); if (block_sn) - child->add_str_mem(str_mem(po, block_sn, nullptr, next_mem_id(), mem.m_dep)); + child->add_str_mem(str_mem(po, block_sn, mem.m_lit, nullptr, next_mem_id(), mem.m_dep)); } // Substitute x → pr · po @@ -3934,17 +3930,18 @@ namespace seq { } } - void nielsen_graph::explain_conflict(unsigned_vector& eq_indices, unsigned_vector& mem_indices) const { + void nielsen_graph::explain_conflict(svector>& eqs, + svector& mem_literals) const { SASSERT(m_root); dep_tracker deps = m_dep_mgr.mk_empty(); collect_conflict_deps(deps); vector vs; m_dep_mgr.linearize(deps, vs); for (dep_source const& d : vs) { - if (d.m_kind == dep_source::kind::eq) - eq_indices.push_back(d.index); + if (std::holds_alternative(d)) + eqs.push_back(std::get(d)); else - mem_indices.push_back(d.index); + mem_literals.push_back(std::get(d)); } } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index faabdfb72..4cfb0d6e6 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -250,6 +250,10 @@ namespace seq { class seq_regex; // forward declaration (defined in smt/seq/seq_regex.h) } +namespace smt { + class enode; +} + namespace seq { // forward declarations @@ -305,13 +309,10 @@ namespace seq { // source of a dependency: identifies an input constraint by kind and index. // kind::eq means a string equality, kind::mem means a regex membership. // index is the 0-based position in the input eq or mem list respectively. - struct dep_source { - enum class kind { eq, mem } m_kind; - unsigned index; - bool operator==(dep_source const& o) const { - return m_kind == o.m_kind && index == o.index; - } - }; + + using enode_pair = std::pair; + using dep_source = std::variant; + // Arena-based dependency manager: builds an immutable tree of dep_source // leaves joined by binary join nodes. Memory is managed via a region; @@ -352,11 +353,12 @@ namespace seq { struct str_eq { euf::snode* m_lhs; euf::snode* m_rhs; + smt::enode *m_l, *m_r; dep_tracker m_dep; str_eq(): m_lhs(nullptr), m_rhs(nullptr), m_dep(nullptr) {} - str_eq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep): - m_lhs(lhs), m_rhs(rhs), m_dep(dep) {} + str_eq(euf::snode* lhs, euf::snode* rhs, smt::enode* l, smt::enode* r, dep_tracker const& dep): + m_lhs(lhs), m_rhs(rhs), m_l(l), m_r(r), m_dep(dep) {} bool operator==(str_eq const& other) const { return m_lhs == other.m_lhs && m_rhs == other.m_rhs; @@ -377,13 +379,14 @@ namespace seq { struct str_mem { euf::snode* m_str; euf::snode* m_regex; + sat::literal m_lit; euf::snode* m_history; // tracks derivation history for cycle detection unsigned m_id; // unique identifier dep_tracker m_dep; str_mem(): m_str(nullptr), m_regex(nullptr), m_history(nullptr), m_id(UINT_MAX), m_dep(nullptr) {} - str_mem(euf::snode* str, euf::snode* regex, euf::snode* history, unsigned id, dep_tracker const& dep): - m_str(str), m_regex(regex), m_history(history), m_id(id), m_dep(dep) {} + str_mem(euf::snode* str, euf::snode* regex, sat::literal l, euf::snode* history, unsigned id, dep_tracker const& dep): + m_str(str), m_regex(regex), m_lit(l), m_history(history), m_id(id), m_dep(dep) {} bool operator==(str_mem const& other) const { return m_id == other.m_id && m_str == other.m_str && m_regex == other.m_regex; @@ -759,8 +762,6 @@ namespace seq { bool m_parikh_enabled = true; unsigned m_next_mem_id = 0; unsigned m_fresh_cnt = 0; - unsigned m_num_input_eqs = 0; - unsigned m_num_input_mems = 0; nielsen_stats m_stats; @@ -839,8 +840,8 @@ namespace seq { svector const& sat_path() const { return m_sat_path; } // add constraints to the root node from external solver - void add_str_eq(euf::snode* lhs, euf::snode* rhs); - void add_str_mem(euf::snode* str, euf::snode* regex); + void add_str_eq(euf::snode* lhs, euf::snode* rhs, smt::enode* l, smt::enode* r); + void add_str_mem(euf::snode* str, euf::snode* regex, sat::literal l); // run management unsigned run_idx() const { return m_run_idx; } @@ -862,10 +863,6 @@ namespace seq { // generate next unique regex membership id unsigned next_mem_id() { return m_next_mem_id++; } - // number of input constraints (used for indexing dep_source leaves) - unsigned num_input_eqs() const { return m_num_input_eqs; } - unsigned num_input_mems() const { return m_num_input_mems; } - // display for debugging std::ostream& display(std::ostream& out) const; @@ -895,7 +892,9 @@ namespace seq { // explain a conflict: partition the dep_source leaves into str_eq indices // (kind::eq) and str_mem indices (kind::mem). // Must be called after solve() returns unsat. - void explain_conflict(unsigned_vector& eq_indices, unsigned_vector& mem_indices) const; + void explain_conflict(svector &eqs, + svector &mem_literals) const; + // accumulated search statistics nielsen_stats const& stats() const { return m_stats; } diff --git a/src/smt/seq/seq_regex.cpp b/src/smt/seq/seq_regex.cpp index abeb2b38f..a0fc31095 100644 --- a/src/smt/seq/seq_regex.cpp +++ b/src/smt/seq/seq_regex.cpp @@ -859,7 +859,7 @@ namespace seq { new_history = m_sg.mk(chain); } } - return seq::str_mem(mem.m_str, mem.m_regex, new_history, mem.m_id, mem.m_dep); + return seq::str_mem(mem.m_str, mem.m_regex, mem.m_lit, new_history, mem.m_id, mem.m_dep); } // ----------------------------------------------------------------------- diff --git a/src/smt/seq/seq_regex.h b/src/smt/seq/seq_regex.h index 1a2f13936..5969b0518 100644 --- a/src/smt/seq/seq_regex.h +++ b/src/smt/seq/seq_regex.h @@ -227,7 +227,7 @@ namespace seq { if (deriv) propagate_self_stabilizing(parent_re, deriv); euf::snode* new_str = m_sg.drop_first(mem.m_str); - return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep); + return seq::str_mem(new_str, deriv, mem.m_lit, mem.m_history, mem.m_id, mem.m_dep); } // ----------------------------------------------------------------- diff --git a/src/smt/seq/seq_state.h b/src/smt/seq/seq_state.h index 6069472f7..2ba99ad78 100644 --- a/src/smt/seq/seq_state.h +++ b/src/smt/seq/seq_state.h @@ -27,17 +27,6 @@ namespace smt { class enode; - // source info for a string equality (the two enodes whose merge caused it) - struct eq_source { - enode* m_n1; - enode* m_n2; - }; - - // source info for a regex membership (the literal that asserted it) - struct mem_source { - sat::literal m_lit; - }; - // source info for a string disequality struct diseq_source { enode* m_n1; @@ -47,8 +36,6 @@ namespace smt { class seq_state { vector m_str_eqs; vector m_str_mems; - vector m_eq_sources; - vector m_mem_sources; vector m_diseqs; unsigned_vector m_str_eq_lim; unsigned_vector m_str_mem_lim; @@ -67,10 +54,8 @@ namespace smt { void pop(unsigned n) { for (unsigned i = 0; i < n; ++i) { m_str_eqs.shrink(m_str_eq_lim.back()); - m_eq_sources.shrink(m_str_eq_lim.back()); m_str_eq_lim.pop_back(); m_str_mems.shrink(m_str_mem_lim.back()); - m_mem_sources.shrink(m_str_mem_lim.back()); m_str_mem_lim.pop_back(); m_diseqs.shrink(m_diseq_lim.back()); m_diseq_lim.pop_back(); @@ -79,14 +64,12 @@ namespace smt { void add_str_eq(euf::snode* lhs, euf::snode* rhs, enode* n1, enode* n2) { seq::dep_tracker dep = nullptr; - m_str_eqs.push_back(seq::str_eq(lhs, rhs, dep)); - m_eq_sources.push_back({n1, n2}); + m_str_eqs.push_back(seq::str_eq(lhs, rhs, n1, n2, dep)); } void add_str_mem(euf::snode* str, euf::snode* regex, sat::literal lit) { seq::dep_tracker dep = nullptr; - m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep)); - m_mem_sources.push_back({lit}); + m_str_mems.push_back(seq::str_mem(str, regex, lit, nullptr, m_next_mem_id++, dep)); } void add_diseq(enode* n1, enode* n2) { @@ -97,8 +80,6 @@ namespace smt { vector const& str_mems() const { return m_str_mems; } vector const& diseqs() const { return m_diseqs; } - eq_source const& get_eq_source(unsigned i) const { return m_eq_sources[i]; } - mem_source const& get_mem_source(unsigned i) const { return m_mem_sources[i]; } diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; } bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_diseqs.empty(); } @@ -106,8 +87,6 @@ namespace smt { void reset() { m_str_eqs.reset(); m_str_mems.reset(); - m_eq_sources.reset(); - m_mem_sources.reset(); m_diseqs.reset(); m_str_eq_lim.reset(); m_str_mem_lim.reset(); diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 7cc482cfe..4730e8fe1 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -256,9 +256,9 @@ namespace smt { void theory_nseq::propagate_eq(unsigned idx) { // When s1 = s2 is learned, ensure len(s1) and len(s2) are // internalized so congruence closure propagates len(s1) = len(s2). - eq_source const& src = m_state.get_eq_source(idx); - ensure_length_var(src.m_n1->get_expr()); - ensure_length_var(src.m_n2->get_expr()); + auto const& src = m_state.str_eqs()[idx]; + ensure_length_var(src.m_l->get_expr()); + ensure_length_var(src.m_r->get_expr()); } void theory_nseq::propagate_diseq(unsigned idx) { @@ -274,7 +274,6 @@ namespace smt { void theory_nseq::propagate_pos_mem(unsigned idx) { auto const& mem = m_state.str_mems()[idx]; - auto const& src = m_state.get_mem_source(idx); if (!mem.m_str || !mem.m_regex) return; @@ -283,7 +282,7 @@ namespace smt { if (m_regex.is_empty_regex(mem.m_regex)) { enode_pair_vector eqs; literal_vector lits; - lits.push_back(src.m_lit); + lits.push_back(mem.m_lit); set_conflict(eqs, lits); return; } @@ -292,7 +291,7 @@ namespace smt { if (mem.m_str->is_empty() && !mem.m_regex->is_nullable()) { enode_pair_vector eqs; literal_vector lits; - lits.push_back(src.m_lit); + lits.push_back(mem.m_lit); set_conflict(eqs, lits); return; } @@ -317,44 +316,37 @@ namespace smt { void theory_nseq::populate_nielsen_graph() { m_nielsen.reset(); - m_nielsen_to_state_mem.reset(); // transfer string equalities from state to nielsen graph root for (auto const& eq : m_state.str_eqs()) { - m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs); + m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs, eq.m_l, eq.m_r); } // transfer regex memberships, pre-processing through seq_regex // to consume ground prefixes via Brzozowski derivatives - for (unsigned state_idx = 0; state_idx < m_state.str_mems().size(); ++state_idx) { - auto const& mem = m_state.str_mems()[state_idx]; + for (auto const &mem : m_state.str_mems()) { int triv = m_regex.check_trivial(mem); if (triv > 0) continue; // trivially satisfied, skip if (triv < 0) { // trivially unsat: add anyway so solve() detects conflict - m_nielsen.add_str_mem(mem.m_str, mem.m_regex); - m_nielsen_to_state_mem.push_back(state_idx); + m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.m_lit); continue; } // pre-process: consume ground prefix characters vector processed; if (!m_regex.process_str_mem(mem, processed)) { // conflict during ground prefix consumption - m_nielsen.add_str_mem(mem.m_str, mem.m_regex); - m_nielsen_to_state_mem.push_back(state_idx); + m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.m_lit); continue; } for (auto const& pm : processed) { - m_nielsen.add_str_mem(pm.m_str, pm.m_regex); - m_nielsen_to_state_mem.push_back(state_idx); + m_nielsen.add_str_mem(pm.m_str, pm.m_regex, pm.m_lit); } } TRACE(seq, tout << "nseq populate: " << m_state.str_eqs().size() << " eqs, " - << m_state.str_mems().size() << " mems -> nielsen root with " - << m_nielsen.num_input_eqs() << " eqs, " - << m_nielsen.num_input_mems() << " mems\n";); + << m_state.str_mems().size() << " mems -> nielsen root\n"); } final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) { @@ -455,20 +447,11 @@ namespace smt { void theory_nseq::deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits) { vector vs; m_nielsen.dep_mgr().linearize(deps, vs); - for (seq::dep_source const& d : vs) { - if (d.m_kind == seq::dep_source::kind::eq) { - eq_source const& src = m_state.get_eq_source(d.index); - if (src.m_n1->get_root() == src.m_n2->get_root()) - eqs.push_back({src.m_n1, src.m_n2}); - } - else { - if (d.index < m_nielsen_to_state_mem.size()) { - unsigned state_mem_idx = m_nielsen_to_state_mem[d.index]; - mem_source const& src = m_state.get_mem_source(state_mem_idx); - SASSERT(ctx.get_assignment(src.m_lit) == l_true); - lits.push_back(src.m_lit); - } - } + for (seq::dep_source const &d : vs) { + if (std::holds_alternative(d)) + eqs.push_back(std::get(d)); + else + lits.push_back(std::get(d)); } } @@ -856,9 +839,10 @@ namespace smt { enode_pair_vector eqs; literal_vector lits; for (unsigned i : mem_indices) { - mem_source const& src = m_state.get_mem_source(i); + auto const &src = m_state.str_mems()[i]; SASSERT(ctx.get_assignment(src.m_lit) == l_true); // we already stored the polarity of the literal - lits.push_back(src.m_lit); + lits.push_back( + src.m_lit); } TRACE(seq, tout << "nseq regex precheck: empty intersection for var " << var_id << ", conflict with " << lits.size() << " lits\n";); diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 07f98e1d3..2a9fd1d53 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -64,9 +64,6 @@ namespace smt { // map from context enode to private sgraph snode obj_map m_expr2snode; - // mapping from nielsen mem index to state mem index - // (populated during populate_nielsen_graph, used in deps_to_lits) - unsigned_vector m_nielsen_to_state_mem; // higher-order terms (seq.map, seq.mapi, seq.foldl, seq.foldli) ptr_vector m_ho_terms;