From 2db99473a356e8ade954905824b37e8e619befe4 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Tue, 14 Apr 2026 16:27:50 +0200 Subject: [PATCH] Removed irrelevant information from membership constraints --- src/smt/seq/seq_nielsen.cpp | 139 ++------------------------------- src/smt/seq/seq_nielsen.h | 14 +--- src/smt/seq/seq_nielsen_pp.cpp | 2 +- src/smt/seq/seq_regex.cpp | 67 +--------------- src/smt/seq/seq_regex.h | 4 +- src/smt/seq/seq_state.h | 10 +-- src/smt/theory_nseq.cpp | 4 +- src/smt/theory_nseq.h | 1 - 8 files changed, 22 insertions(+), 219 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 421a1018d..e62c223bd 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -444,9 +444,7 @@ namespace seq { if (!root()) create_root(); dep_tracker dep = m_dep_mgr.mk_leaf(l); - euf::snode* history = m_sg.mk_empty_seq(str->get_sort()); - unsigned id = next_mem_id(); - m_root->add_str_mem(str_mem(str, regex, history, id, dep)); + m_root->add_str_mem(str_mem(str, regex, dep)); } // test-friendly overloads (no external dependency tracking) @@ -463,9 +461,7 @@ namespace seq { if (!root()) create_root(); dep_tracker dep = nullptr; - euf::snode* history = m_sg.mk_empty_seq(str->get_sort()); - unsigned id = next_mem_id(); - m_root->add_str_mem(str_mem(str, regex, history, id, dep)); + m_root->add_str_mem(str_mem(str, regex, dep)); } void nielsen_graph::inc_run_idx() { @@ -490,7 +486,6 @@ namespace seq { m_sat_path.reset(); m_run_idx = 0; m_depth_bound = 0; - m_next_mem_id = 0; m_fresh_cnt = 0; m_root_constraints_asserted = false; m_mod_cnt.reset(); @@ -1097,9 +1092,6 @@ namespace seq { } mem.m_str = dir_drop(sg, mem.m_str, 1, fwd); mem.m_regex = deriv; - // ZIPT tracks history only in forward direction. - if (fwd) - mem.m_history = sg.mk_concat(mem.m_history, tok); } } } @@ -1139,7 +1131,6 @@ namespace seq { mem.m_str = sg.drop_left(mem.m_str, 1); mem.m_regex = next; - mem.m_history = sg.mk_concat(mem.m_history, tok); } } @@ -1375,25 +1366,6 @@ namespace seq { return search_result::unsat; } - str_mem* mem = nullptr; - for (auto& m : node->m_str_mem) { - if (m.m_id == 28) { - mem = &m; - break; - } - } - if (mem) { - vector deps; - m_dep_mgr.linearize(mem->m_dep, deps); - for (auto& dep : deps) { - if (std::holds_alternative(dep)) - std::cout << "eq: " << mk_pp(std::get(dep).first->get_expr(), m) << " = " << mk_pp(std::get(dep).second->get_expr(), m) << std::endl; - else - std::cout << "mem literal: " << std::get(dep) << std::endl; - } - } - - // simplify constraints (idempotent after first call) const simplify_result sr = node->simplify_and_init(cur_path); @@ -2723,105 +2695,6 @@ namespace seq { bool nielsen_graph::apply_star_intr(nielsen_node* node) { return false; - // Only fire if a backedge was set (cycle detected) - if (!node->backedge()) - return false; - - // Look for a str_mem with a variable-headed string - for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) { - str_mem const& mem = node->str_mems()[mi]; - if (!mem.m_str || !mem.m_regex) - continue; - if (mem.is_primitive()) - continue; - euf::snode* first = mem.m_str->first(); - if (!first || !first->is_var()) - continue; - - euf::snode* x = first; - expr* re_expr = mem.m_regex->get_expr(); - if (!re_expr) - continue; - expr *str_expr = mem.m_str->get_expr(); - - // Determine the stabilizer base: - // 1. If self-stabilizing, use regex itself as stabilizer - // 2. Otherwise, try strengthened_stabilizer from cycle history - // 3. Fall back to simple star(R) - euf::snode* stab_base = nullptr; - - if (m_seq_regex && m_seq_regex->is_self_stabilizing(mem.m_regex)) - stab_base = mem.m_regex; - else if (m_seq_regex && mem.m_history) - stab_base = m_seq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history); - - if (!stab_base) - // Fall back: simple star of the cycle regex - stab_base = mem.m_regex; - SASSERT(stab_base); - - // Register the stabilizer - if (m_seq_regex) - m_seq_regex->add_stabilizer(mem.m_regex, stab_base); - - // Check if stabilizer equals regex → self-stabilizing - if (m_seq_regex && stab_base == mem.m_regex) - m_seq_regex->set_self_stabilizing(mem.m_regex); - - // Build stab_star = star(stab_base) - expr* stab_expr = stab_base->get_expr(); - if (!stab_expr) - continue; - expr_ref star_re(m_seq.re.mk_star(stab_expr), m); - euf::snode* star_sn = m_sg.mk(star_re); - if (!star_sn) - continue; - - // Try subsumption: if L(x_range) ⊆ L(stab_star), drop the constraint - if (m_seq_regex && m_seq_regex->try_subsume(mem, *node)) - continue; - - // Create child: x → pr · po - - th_rewriter rw(m); - auto pr_e = skolem(m, rw).mk("star-intro-left", str_expr, re_expr, str_expr->get_sort()); - auto po_e = skolem(m, rw).mk("star-intro-right", str_expr, re_expr, str_expr->get_sort()); - euf::snode *pr = m_sg.mk(pr_e); - euf::snode *po = m_sg.mk(po_e); - euf::snode* str_tail = m_sg.drop_first(mem.m_str); - - nielsen_node* child = mk_child(node); - - // Add membership: pr ∈ stab_base* (stabilizer constraint) - child->add_str_mem(str_mem(pr, star_sn, mem.m_history, next_mem_id(), mem.m_dep)); - - // Add remaining membership: po · tail ∈ R (same regex, trimmed history) - euf::snode* post_tail = str_tail->is_empty() ? po : m_sg.mk_concat(po, str_tail); - child->add_str_mem(str_mem(post_tail, mem.m_regex, nullptr, next_mem_id(), mem.m_dep)); - - // Blocking constraint: po must NOT start with stab_base - // po ∈ complement(non_nullable(stab_base) · Σ*) - // This prevents redundant unrolling. - if (!stab_base->is_nullable()) { - sort* str_sort = m_seq.str.mk_string_sort(); - expr_ref full_seq(m_seq.re.mk_full_seq(m_seq.re.mk_re(str_sort)), m); - expr_ref base_then_all(m_seq.re.mk_concat(stab_expr, full_seq), m); - expr_ref block_re(m_seq.re.mk_complement(base_then_all), m); - euf::snode* block_sn = m_sg.mk(block_re); - if (block_sn) - child->add_str_mem(str_mem(po, block_sn, nullptr, next_mem_id(), mem.m_dep)); - } - - // Substitute x → pr · po - nielsen_edge* e = mk_edge(node, child, false); - euf::snode* replacement = m_sg.mk_concat(pr, po); - nielsen_subst s(x, replacement, mem.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - - return true; - } - return false; } // ----------------------------------------------------------------------- @@ -3069,15 +2942,15 @@ namespace seq { // remove the original mem from child auto& child_mems = child->str_mems(); for (unsigned k = 0; k < child_mems.size(); ++k) { - if (child_mems[k].m_id == mem.m_id) { + if (child_mems[k] == mem) { child_mems[k] = child_mems.back(); child_mems.pop_back(); break; } } - child->add_str_mem(str_mem(first, sn_p, mem.m_history, next_mem_id(), mem.m_dep)); - child->add_str_mem(str_mem(tail, sn_q, mem.m_history, next_mem_id(), mem.m_dep)); + child->add_str_mem(str_mem(first, sn_p, mem.m_dep)); + child->add_str_mem(str_mem(tail, sn_q, mem.m_dep)); } return true; @@ -3410,7 +3283,7 @@ namespace seq { child->add_constraint(constraint(f, mem.m_dep, m)); mk_edge(node, child, true); for (str_mem &cm : child->str_mems()) - if (cm.m_id == mem.m_id) { + if (cm == mem) { cm.m_regex = new_regex_snode; break; } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 2fdbe1111..16b2314e6 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -397,16 +397,14 @@ namespace seq { struct str_mem { euf::snode* m_str; euf::snode* m_regex; - euf::snode* m_history; // tracks derivation history for cycle detection - unsigned m_id; // unique identifier dep_tracker m_dep; - str_mem(): m_str(nullptr), m_regex(nullptr), m_history(nullptr), m_id(UINT_MAX), m_dep(nullptr) {} - str_mem(euf::snode* str, euf::snode* regex, euf::snode* history, unsigned id, dep_tracker const& dep): - m_str(str), m_regex(regex), m_history(history), m_id(id), m_dep(dep) {} + str_mem(): m_str(nullptr), m_regex(nullptr), m_dep(nullptr) {} + str_mem(euf::snode* str, euf::snode* regex, dep_tracker const& dep): + m_str(str), m_regex(regex), m_dep(dep) {} bool operator==(str_mem const& other) const { - return m_id == other.m_id && m_str == other.m_str && m_regex == other.m_regex; + return m_str == other.m_str && m_regex == other.m_regex; } // check if the constraint has the form x in R with x a single variable @@ -784,7 +782,6 @@ namespace seq { bool m_parikh_enabled = true; bool m_signature_split = false; bool m_regex_factorization = true; - unsigned m_next_mem_id = 0; unsigned m_fresh_cnt = 0; nielsen_stats m_stats; @@ -913,9 +910,6 @@ namespace seq { void set_regex_factorization(bool e) { m_regex_factorization = e; } - // generate next unique regex membership id - unsigned next_mem_id() { return m_next_mem_id++; } - // display for debugging std::ostream& display(std::ostream& out) const; diff --git a/src/smt/seq/seq_nielsen_pp.cpp b/src/smt/seq/seq_nielsen_pp.cpp index 231b14f65..fb580abcc 100644 --- a/src/smt/seq/seq_nielsen_pp.cpp +++ b/src/smt/seq/seq_nielsen_pp.cpp @@ -53,7 +53,7 @@ namespace seq { // display regex memberships for (auto const &mem : n->str_mems()) { - out << " str_mem[" << mem.m_id << "]: "; + out << " str_mem: "; if (mem.m_str) out << "str[id=" << mem.m_str->id() << ",len=" << mem.m_str->length() << "]"; else diff --git a/src/smt/seq/seq_regex.cpp b/src/smt/seq/seq_regex.cpp index d9024fadf..674b5d672 100644 --- a/src/smt/seq/seq_regex.cpp +++ b/src/smt/seq/seq_regex.cpp @@ -840,22 +840,7 @@ namespace seq { // ----------------------------------------------------------------------- seq::str_mem seq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) { - // Build a history chain by prepending the new regex entry to the - // existing history. Uses regex-concat as a cons cell: - // new_history = re.concat(history_re, old_history) - // where arg(0) is the latest entry and arg(1) is the tail. - // If old_history is nullptr, the new entry becomes the terminal leaf. - euf::snode* new_history = history_re; - if (mem.m_history && history_re) { - expr* re_expr = history_re->get_expr(); - expr* old_expr = mem.m_history->get_expr(); - if (re_expr && old_expr) { - seq_util& seq = m_sg.get_seq_util(); - expr_ref chain(seq.re.mk_concat(re_expr, old_expr), m_sg.get_manager()); - new_history = m_sg.mk(chain); - } - } - return seq::str_mem(mem.m_str, mem.m_regex, new_history, mem.m_id, mem.m_dep); + return str_mem(mem.m_str, mem.m_regex, mem.m_dep); } // ----------------------------------------------------------------------- @@ -863,42 +848,6 @@ namespace seq { // ----------------------------------------------------------------------- euf::snode* seq_regex::extract_cycle(seq::str_mem const& mem) const { - // Walk the history chain looking for a repeated regex. - // A cycle exists when the current regex matches a regex in the history. - if (!mem.m_regex || !mem.m_history) - return nullptr; - - euf::snode* current = mem.m_regex; - euf::snode* hist = mem.m_history; - - // Walk the history chain up to a bounded depth. - // The history is structured as a chain of regex snapshots connected - // via the sgraph's regex-concat: each level's arg(0) is a snapshot - // and arg(1) is the tail. A leaf (non-concat) is a terminal entry. - unsigned bound = 1000; - while (hist && bound-- > 0) { - euf::snode* entry = hist; - euf::snode* tail = nullptr; - - // If the history node is a regex concat, decompose it: - // arg(0) is the regex snapshot, arg(1) is the rest of the chain - seq_util& seq = m_sg.get_seq_util(); - if (hist->is_concat() && hist->get_expr() && - seq.re.is_concat(hist->get_expr())) { - entry = hist->arg(0); - tail = hist->arg(1); - } - - // Check pointer equality (fast, covers normalized regexes) - if (entry == current) - return entry; - // Check expression-level equality as fallback - if (entry->get_expr() && current->get_expr() && - entry->get_expr() == current->get_expr()) - return entry; - - hist = tail; - } return nullptr; } @@ -929,19 +878,7 @@ namespace seq { // The history is built by simplify_and_init as a left-associative // string concat chain: concat(concat(concat(nil, c1), c2), c3). // Extract the tokens consumed since the ancestor. - if (!current.m_history) - return nullptr; - - unsigned cur_len = current.m_history->length(); - unsigned anc_len = ancestor.m_history ? ancestor.m_history->length() : 0; - - if (cur_len <= anc_len) - return nullptr; - - if (anc_len == 0) - return current.m_history; - - return m_sg.drop_left(current.m_history, anc_len); + return nullptr; } // ----------------------------------------------------------------------- diff --git a/src/smt/seq/seq_regex.h b/src/smt/seq/seq_regex.h index 227543e78..0cb539816 100644 --- a/src/smt/seq/seq_regex.h +++ b/src/smt/seq/seq_regex.h @@ -239,13 +239,13 @@ namespace seq { // compute derivative of a str_mem constraint: advance past one character. // the string side is shortened by drop_first and the regex is derived. // Propagates self-stabilizing flags from the parent regex to the derivative. - seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) { + str_mem derive(str_mem const& mem, euf::snode* elem) { euf::snode* parent_re = mem.m_regex; euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, elem); if (deriv) propagate_self_stabilizing(parent_re, deriv); euf::snode* new_str = m_sg.drop_first(mem.m_str); - return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep); + return str_mem(new_str, deriv, mem.m_dep); } // ----------------------------------------------------------------- diff --git a/src/smt/seq/seq_state.h b/src/smt/seq/seq_state.h index c80747943..2a29ced8f 100644 --- a/src/smt/seq/seq_state.h +++ b/src/smt/seq/seq_state.h @@ -27,16 +27,16 @@ namespace smt { class enode; - struct tracked_str_eq : public seq::str_eq { + struct tracked_str_eq : seq::str_eq { enode *m_l, *m_r; tracked_str_eq(euf::snode *lhs, euf::snode *rhs, smt::enode *l, smt::enode *r, seq::dep_tracker const &dep) - : seq::str_eq(lhs, rhs, dep), m_l(l), m_r(r) {} + : str_eq(lhs, rhs, dep), m_l(l), m_r(r) {} }; - struct tracked_str_mem : public seq::str_mem { + struct tracked_str_mem : seq::str_mem { sat::literal lit; - tracked_str_mem(euf::snode *str, euf::snode *regex, sat::literal lit, euf::snode *history, unsigned id, seq::dep_tracker const &dep) - : seq::str_mem(str, regex, history, id, dep), lit(lit) {} + tracked_str_mem(euf::snode *str, euf::snode *regex, sat::literal lit, seq::dep_tracker const &dep) + : str_mem(str, regex, dep), lit(lit) {} }; } diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 1ef944f17..a6b98ca53 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -265,7 +265,7 @@ namespace smt { seq::dep_tracker dep = nullptr; if (is_true) { ctx.push_trail(restore_vector(m_prop_queue)); - m_prop_queue.push_back(mem_item(sn_str, sn_re, lit, nullptr, m_next_mem_id++, dep)); + m_prop_queue.push_back(mem_item(sn_str, sn_re, lit, dep)); } else { // ¬(str ∈ R) ≡ str ∈ complement(R): store as a positive membership @@ -274,7 +274,7 @@ namespace smt { expr_ref re_compl(m_seq.re.mk_complement(re), m); euf::snode* sn_re_compl = get_snode(re_compl.get()); ctx.push_trail(restore_vector(m_prop_queue)); - m_prop_queue.push_back(mem_item(sn_str, sn_re_compl, lit, nullptr, m_next_mem_id++, dep)); + m_prop_queue.push_back(mem_item(sn_str, sn_re_compl, lit, dep)); } } else if (m_seq.str.is_prefix(e)) { diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 14f5929bb..ad8bcdc66 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -61,7 +61,6 @@ namespace smt { vector m_prop_queue; unsigned m_prop_qhead = 0; - unsigned m_next_mem_id = 0; // monotone counter for tracked_str_mem ids obj_hashtable m_axiom_set; // dedup guard for axiom_item enqueues obj_hashtable m_no_diseq_set; // track expressions that should not trigger new disequality axioms expr_ref_vector m_relevant_lengths; // track variables whose lengths are relevant