diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 180e1daa5..588bf66be 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -17,12 +17,6 @@ Author: Clemens Eisenhofer 2026-03-02 Nikolaj Bjorner (nbjorner) 2026-03-02 - -NSB review: add ast_manager& m to nielsen_graph and remove local calls to get_manager() -NSB review: add seq_util& seq to nielsen_graph and remove local calls to get_seq_util() -NSB review: make m_graph a reference instead of a pointer on nielsen_node -NSB review: replace comparisons of snode ids by m.are_equal, for ast_manager m. Make sure to use E-graph roots. - --*/ #include "smt/seq/seq_nielsen.h" @@ -179,7 +173,7 @@ namespace seq { // nielsen_node // ----------------------------------------------- - nielsen_node::nielsen_node(nielsen_graph* graph, unsigned id): + nielsen_node::nielsen_node(nielsen_graph& graph, unsigned id): m_id(id), m_graph(graph), m_is_progress(true) { } @@ -313,8 +307,8 @@ namespace seq { return true; } // add int_constraint: len(var) >= lb - ast_manager& m = m_graph->sg().get_manager(); - seq_util& seq = m_graph->sg().get_seq_util(); + ast_manager& m = m_graph.m(); + seq_util& seq = m_graph.seq(); arith_util arith(m); expr_ref len_var(seq.str.mk_length(var->get_expr()), m); expr_ref bound(arith.mk_int(lb), m); @@ -339,8 +333,8 @@ namespace seq { return true; } // add int_constraint: len(var) <= ub - ast_manager& m = m_graph->sg().get_manager(); - seq_util& seq = m_graph->sg().get_seq_util(); + ast_manager& m = m_graph.m(); + seq_util& seq = m_graph.seq(); arith_util arith(m); expr_ref len_var(seq.str.mk_length(var->get_expr()), m); expr_ref bound(arith.mk_int(ub), m); @@ -428,7 +422,7 @@ namespace seq { for (str_mem const& mem : m_str_mem) { if (!mem.m_str || !mem.m_regex) continue; unsigned min_len = 0, max_len = UINT_MAX; - m_graph->compute_regex_length_interval(mem.m_regex, min_len, max_len); + m_graph.compute_regex_length_interval(mem.m_regex, min_len, max_len); if (min_len == 0 && max_len == UINT_MAX) continue; // if str is a single variable, apply bounds directly @@ -439,9 +433,9 @@ namespace seq { add_upper_int_bound(mem.m_str, max_len, mem.m_dep); } else { // str is a concatenation or other term: add as general int_constraints - ast_manager& m = m_graph->sg().get_manager(); + ast_manager& m = m_graph.m(); arith_util arith(m); - expr_ref len_str = m_graph->compute_length_expr(mem.m_str); + expr_ref len_str = m_graph.compute_length_expr(mem.m_str); if (min_len > 0) { expr_ref bound(arith.mk_int(min_len), m); m_int_constraints.push_back( @@ -491,18 +485,14 @@ namespace seq { } else { SASSERT(s.m_val->is_char()); // symbolic char → concrete char: check range constraints - // NSB review: seq.is_const_char is unchecked, what if it returns false? - // should this work for non-string characters as well? if (m_char_ranges.contains(var_id)) { char_set& range = m_char_ranges.find(var_id); - // extract the concrete char value from the s_char snode unsigned ch_val = 0; seq_util& seq = sg.get_seq_util(); expr* unit_expr = s.m_val->get_expr(); expr* ch_expr = nullptr; - if (unit_expr && seq.str.is_unit(unit_expr, ch_expr)) - seq.is_const_char(ch_expr, ch_val); - if (!range.contains(ch_val)) { + bool have_char = unit_expr && seq.str.is_unit(unit_expr, ch_expr) && seq.is_const_char(ch_expr, ch_val); + if (have_char && !range.contains(ch_val)) { m_is_general_conflict = true; m_reason = backtrack_reason::character_range; return; @@ -518,6 +508,8 @@ namespace seq { // ----------------------------------------------- nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver): + m_m(sg.get_manager()), + m_seq(sg.get_seq_util()), m_sg(sg), m_solver(solver), m_parikh(alloc(seq_parikh, sg)), @@ -533,7 +525,7 @@ namespace seq { nielsen_node* nielsen_graph::mk_node() { unsigned id = m_nodes.size(); - nielsen_node* n = alloc(nielsen_node, this, id); + nielsen_node* n = alloc(nielsen_node, *this, id); m_nodes.push_back(n); return n; } @@ -556,7 +548,6 @@ namespace seq { if (!m_root) m_root = mk_node(); dep_tracker dep; - // NSB review: is this correct? dep.insert(m_root->str_eqs().size()); str_eq eq(lhs, rhs, dep); eq.sort(); @@ -568,7 +559,6 @@ namespace seq { if (!m_root) m_root = mk_node(); dep_tracker dep; - // NSB reivew: is this correct? dep.insert(m_root->str_eqs().size() + m_root->str_mems().size()); euf::snode* history = m_sg.mk_empty_seq(str->get_sort()); unsigned id = next_mem_id(); @@ -1060,22 +1050,14 @@ namespace seq { // nielsen_node: simplify_and_init // ----------------------------------------------------------------------- - // NSB review: simplify the loop that checks all_eliminable and has_char to - // use any_of. - // NSB review: use is_power matcher defined in seq_util instead of ad-hoc checks for the t->is_power case. - bool nielsen_node::handle_empty_side(euf::sgraph& sg, euf::snode* non_empty_side, dep_tracker const& dep, bool& changed) { euf::snode_vector tokens; non_empty_side->collect_tokens(tokens); - bool all_eliminable = true; - bool has_char = false; - for (euf::snode* t : tokens) { - if (t->is_char()) has_char = true; - else if (!t->is_var() && !t->is_power() && t->kind() != euf::snode_kind::s_other) { - all_eliminable = false; break; - } - } + bool has_char = std::any_of(tokens.begin(), tokens.end(), [](euf::snode* t){ return t->is_char(); }); + bool all_eliminable = std::all_of(tokens.begin(), tokens.end(), [](euf::snode* t){ + return t->is_var() || t->is_power() || t->kind() == euf::snode_kind::s_other; + }); if (has_char || !all_eliminable) { m_is_general_conflict = true; m_reason = backtrack_reason::symbol_clash; @@ -1083,6 +1065,7 @@ namespace seq { } ast_manager& m = sg.get_manager(); arith_util arith(m); + seq_util& seq = sg.get_seq_util(); for (euf::snode* t : tokens) { if (t->is_var()) { nielsen_subst s(t, sg.mk_empty_seq(t->get_sort()), dep); @@ -1092,8 +1075,8 @@ namespace seq { else if (t->is_power()) { // Power equated to empty → exponent must be 0. expr* e = t->get_expr(); - expr* pow_exp = (e && is_app(e) && to_app(e)->get_num_args() >= 2) - ? to_app(e)->get_arg(1) : nullptr; + expr* pow_base = nullptr, *pow_exp = nullptr; + if (e) seq.str.is_power(e, pow_base, pow_exp); if (pow_exp) { expr* zero = arith.mk_numeral(rational(0), true); m_int_constraints.push_back( @@ -1116,8 +1099,6 @@ namespace seq { // Uses syntactic matching on Z3 expression structure: pointer equality // detects shared sub-expressions created during ConstNumUnwinding. // - // NSB review: use arith_util matchers for addition and subtraction, etc instead of ad-hoc checks - static bool get_const_power_diff(expr* b, expr* a, arith_util& arith, rational& diff) { if (a == b) { diff = rational(0); return true; } // b = (+ a k) ? @@ -1152,19 +1133,19 @@ namespace seq { } // Get the base expression of a power snode. - // NSB review: use seq_util.is_power matcher instead of ad-hoc checks for t->is_power and get_expr structure. - static expr* get_power_base_expr(euf::snode* power) { + static expr* get_power_base_expr(euf::snode* power, seq_util& seq) { if (!power || !power->is_power()) return nullptr; expr* e = power->get_expr(); - return (e && is_app(e) && to_app(e)->get_num_args() >= 2) ? to_app(e)->get_arg(0) : nullptr; + expr* base = nullptr, *exp = nullptr; + return (e && seq.str.is_power(e, base, exp)) ? base : nullptr; } // Get the exponent expression of a power snode. - // NSB review: use seq_util.is_power matcher instead of ad-hoc checks for t->is_power and get_expr structure. - static expr* get_power_exp_expr(euf::snode* power) { + static expr* get_power_exp_expr(euf::snode* power, seq_util& seq) { if (!power || !power->is_power()) return nullptr; expr* e = power->get_expr(); - return (e && is_app(e) && to_app(e)->get_num_args() >= 2) ? to_app(e)->get_arg(1) : nullptr; + expr* base = nullptr, *exp = nullptr; + return (e && seq.str.is_power(e, base, exp)) ? exp : nullptr; } // Merge adjacent tokens with the same power base on one side of an equation. @@ -1196,8 +1177,8 @@ namespace seq { // cross-side cancellation works better with unmerged leading powers // (e.g., w^k trivially ≤ 1+k, but w^(2k) vs 1+k requires k ≥ 1). if (tok->is_power() && i > 0) { - expr* base_e = get_power_base_expr(tok); - expr* exp_acc = get_power_exp_expr(tok); + expr* base_e = get_power_base_expr(tok, seq); + expr* exp_acc = get_power_exp_expr(tok, seq); if (!base_e || !exp_acc) { result.push_back(tok); ++i; continue; } bool local_merged = false; @@ -1205,9 +1186,9 @@ namespace seq { while (j < tokens.size()) { euf::snode* next = tokens[j]; if (next->is_power()) { - expr* nb = get_power_base_expr(next); + expr* nb = get_power_base_expr(next, seq); if (nb == base_e) { - exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(next)); + exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(next, seq)); local_merged = true; ++j; continue; } } @@ -1234,17 +1215,17 @@ namespace seq { // unwind produces u · u^(n-1); merging it back to u^n creates an infinite cycle. if (i > 0 && tok->is_char() && tok->get_expr() && i + 1 < tokens.size()) { euf::snode* next = tokens[i + 1]; - if (next->is_power() && get_power_base_expr(next) == tok->get_expr()) { + if (next->is_power() && get_power_base_expr(next, seq) == tok->get_expr()) { expr* base_e = tok->get_expr(); // Use same arg order as Case 1: add(exp, 1), not add(1, exp), // so that merging "c · c^e" and "c^e · c" both produce add(e, 1) // and the resulting power expression is hash-consed identically. - expr* exp_acc = arith.mk_add(get_power_exp_expr(next), arith.mk_int(1)); + expr* exp_acc = arith.mk_add(get_power_exp_expr(next, seq), arith.mk_int(1)); unsigned j = i + 2; while (j < tokens.size()) { euf::snode* further = tokens[j]; - if (further->is_power() && get_power_base_expr(further) == base_e) { - exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(further)); + if (further->is_power() && get_power_base_expr(further, seq) == base_e) { + exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(further, seq)); ++j; continue; } if (further->is_char() && further->get_expr() == base_e) { @@ -1268,12 +1249,10 @@ namespace seq { if (!merged) return nullptr; - // Rebuild snode from merged token list - // NSB review: set rebuilt to nullptr and only create the empty seq if it is still null after the loop. - euf::snode* rebuilt = sg.mk_empty_seq(side->get_sort()); - for (unsigned k = 0; k < result.size(); ++k) { + euf::snode* rebuilt = nullptr; + for (unsigned k = 0; k < result.size(); ++k) rebuilt = (k == 0) ? result[k] : sg.mk_concat(rebuilt, result[k]); - } + if (!rebuilt) rebuilt = sg.mk_empty_seq(side->get_sort()); return rebuilt; } @@ -1287,13 +1266,14 @@ namespace seq { ast_manager& m = sg.get_manager(); arith_util arith(m); + seq_util seq(m); bool simplified = false; euf::snode_vector result; for (euf::snode* tok : tokens) { if (tok->is_power()) { - expr* exp_e = get_power_exp_expr(tok); + expr* exp_e = get_power_exp_expr(tok, seq); rational val; if (exp_e && arith.is_numeral(exp_e, val)) { if (val.is_zero()) { @@ -1317,11 +1297,10 @@ namespace seq { if (!simplified) return nullptr; - // NSB review: set rebuilt to nullptr and only create the empty seq if it is still null after the loop. - if (result.empty()) return sg.mk_empty_seq(side->get_sort()); - euf::snode* rebuilt = result[0]; - for (unsigned k = 1; k < result.size(); ++k) - rebuilt = sg.mk_concat(rebuilt, result[k]); + euf::snode* rebuilt = nullptr; + for (euf::snode* tok : result) + rebuilt = rebuilt ? sg.mk_concat(rebuilt, tok) : tok; + if (!rebuilt) rebuilt = sg.mk_empty_seq(side->get_sort()); return rebuilt; } @@ -1333,6 +1312,7 @@ namespace seq { static std::pair comm_power( euf::snode* base_sn, euf::snode* side, ast_manager& m, bool fwd) { arith_util arith(m); + seq_util seq(m); euf::snode_vector base_tokens, side_tokens; collect_tokens_dir(base_sn, fwd, base_tokens); collect_tokens_dir(side, fwd, side_tokens); @@ -1372,7 +1352,7 @@ namespace seq { for (unsigned j = 0; j < pb_tokens.size() && match; j++) match = (pb_tokens[j] == base_tokens[j]); if (match) { - expr* pow_exp = get_power_exp_expr(t); + expr* pow_exp = get_power_exp_expr(t, seq); if (pow_exp) { sum = sum ? arith.mk_add(sum, pow_exp) : pow_exp; continue; @@ -1391,13 +1371,11 @@ namespace seq { return {expr_ref(last_stable_sum, m), last_stable_idx}; } - // NSB review: nielsen_node already has a reference to nielsen_graph so why not use that? - - simplify_result nielsen_node::simplify_and_init(nielsen_graph& g, svector const& cur_path) { + simplify_result nielsen_node::simplify_and_init(svector const& cur_path) { if (m_is_extended) return simplify_result::proceed; - euf::sgraph& sg = g.sg(); + euf::sgraph& sg = m_graph.sg(); ast_manager& m = sg.get_manager(); arith_util arith(m); seq_util seq(m); @@ -1445,15 +1423,13 @@ namespace seq { eq.m_rhs->collect_tokens(rhs_toks); // --- prefix --- - // NSB review: use m.are_distinct instead of ad-hoc checks for char clash, - // NSB review: use m.are_equal instead of pointer lt->id() == rt->id(). unsigned prefix = 0; while (prefix < lhs_toks.size() && prefix < rhs_toks.size()) { euf::snode* lt = lhs_toks[prefix]; euf::snode* rt = rhs_toks[prefix]; - if (lt->id() == rt->id() && (lt->is_char() || lt->is_var())) { + if ((lt->is_char() || lt->is_var()) && m.are_equal(lt->get_expr(), rt->get_expr())) { ++prefix; - } else if (lt->is_char() && rt->is_char()) { + } else if (lt->is_char() && rt->is_char() && m.are_distinct(lt->get_expr(), rt->get_expr())) { m_is_general_conflict = true; m_reason = backtrack_reason::symbol_clash; return simplify_result::conflict; @@ -1463,16 +1439,14 @@ namespace seq { } // --- suffix (only among the tokens not already consumed by prefix) --- - // NSB review: use m.are_distinct instead of ad-hoc checks for char clash, - // NSB review: use m.are_equal instead of pointer lt->id() == rt->id(). unsigned lsz = lhs_toks.size(), rsz = rhs_toks.size(); unsigned suffix = 0; while (suffix < lsz - prefix && suffix < rsz - prefix) { euf::snode* lt = lhs_toks[lsz - 1 - suffix]; euf::snode* rt = rhs_toks[rsz - 1 - suffix]; - if (lt->id() == rt->id() && (lt->is_char() || lt->is_var())) { + if ((lt->is_char() || lt->is_var()) && m.are_equal(lt->get_expr(), rt->get_expr())) { ++suffix; - } else if (lt->is_char() && rt->is_char()) { + } else if (lt->is_char() && rt->is_char() && m.are_distinct(lt->get_expr(), rt->get_expr())) { m_is_general_conflict = true; m_reason = backtrack_reason::symbol_clash; return simplify_result::conflict; @@ -1498,7 +1472,14 @@ namespace seq { // constraint as edge labels so they appear in the graph. // Guard: skip if we already created a child (re-entry via iterative deepening). - // NSB review: ask copilot to summarize this step using a formal looking but readable spec + // Spec: DirectionalInconsistency. + // For direction d ∈ {left, right}, let first_d(u) denote the first token of u + // in direction d. + // If pow_side[d] = u^p and other_side[d] = c (a concrete character), + // and first_d(base(u)) = c' with c' ≠ c, + // then p = 0 (the power is vacuous in direction d). + // Action: create a deterministic child with substitution u^p → ε and + // int constraint p = 0. if (!eq.is_trivial() && eq.m_lhs && eq.m_rhs) { for (unsigned od = 0; od < 2; ++od) { bool fwd = (od == 0); @@ -1513,17 +1494,17 @@ namespace seq { if (!base_sn) continue; euf::snode* base_head = dir_token(base_sn, fwd); if (!base_head || !base_head->is_char()) continue; - if (base_head->id() == other_head->id()) continue; + if (m.are_equal(base_head->get_expr(), other_head->get_expr())) continue; // Directional base/head mismatch -> force exponent 0 and power -> ε. - nielsen_node* child = g.mk_child(this); - nielsen_edge* e = g.mk_edge(this, child, true); + nielsen_node* child = m_graph.mk_child(this); + nielsen_edge* e = m_graph.mk_edge(this, child, true); nielsen_subst s(pow_head, sg.mk_empty_seq(pow_head->get_sort()), eq.m_dep); e->add_subst(s); child->apply_subst(sg, s); - expr* pow_exp = get_power_exp_expr(pow_head); + expr* pow_exp = get_power_exp_expr(pow_head, seq); if (pow_exp) { expr* zero = arith.mk_numeral(rational(0), true); - e->add_side_int(g.mk_int_constraint( + e->add_side_int(m_graph.mk_int_constraint( pow_exp, zero, int_constraint_kind::eq, eq.m_dep)); } set_extended(true); @@ -1556,8 +1537,13 @@ namespace seq { // other side's prefix. If we can determine the ordering between // p and c, cancel the matched portion. Mirrors ZIPT's // SimplifyPowerElim calling CommPower. - // NSB review: ask copilot to comment these rewrites with a spec that - // resembles something more formal and is readable. + // Spec: CommPower cancellation. + // Given: pow_side = w^p · rest_pow and other_side = w^c · rest_other + // where c is the number of times the base pattern w occurs in the + // directional prefix of other_side. + // - If p ≤ c: pow_side := rest_pow, other_side := w^(c-p) · rest_other + // - If c ≤ p: pow_side := w^(p-c) · rest_pow, other_side := rest_other + // - If p = c: both reduce completely (handled by both conditions above). if (!eq.m_lhs || !eq.m_rhs) continue; bool comm_changed = false; for (int side = 0; side < 2 && !comm_changed; ++side) { @@ -1569,7 +1555,7 @@ namespace seq { euf::snode* end_tok = dir_token(pow_side, fwd); if (!end_tok || !end_tok->is_power()) continue; euf::snode* base_sn = end_tok->arg(0); - expr* pow_exp = get_power_exp_expr(end_tok); + expr* pow_exp = get_power_exp_expr(end_tok, seq); if (!base_sn || !pow_exp) continue; auto [count, consumed] = comm_power(base_sn, other_side, m, fwd); @@ -1583,14 +1569,14 @@ namespace seq { pow_le_count = diff.is_nonneg(); } else if (!cur_path.empty()) { - pow_le_count = g.check_lp_le(pow_exp, norm_count, this, cur_path); - count_le_pow = g.check_lp_le(norm_count, pow_exp, this, cur_path); + pow_le_count = m_graph.check_lp_le(pow_exp, norm_count, this, cur_path); + count_le_pow = m_graph.check_lp_le(norm_count, pow_exp, this, cur_path); } if (!pow_le_count && !count_le_pow) continue; pow_side = dir_drop(sg, pow_side, 1, fwd); other_side = dir_drop(sg, other_side, consumed, fwd); - expr* base_e = get_power_base_expr(end_tok); + expr* base_e = get_power_base_expr(end_tok, seq); if (pow_le_count && count_le_pow) { // equal: both cancel completely } @@ -1631,12 +1617,12 @@ namespace seq { euf::snode* rh = dir_token(eq.m_rhs, fwd); if (!(lh && rh && lh->is_power() && rh->is_power())) continue; - expr* lb = get_power_base_expr(lh); - expr* rb = get_power_base_expr(rh); + expr* lb = get_power_base_expr(lh, seq); + expr* rb = get_power_base_expr(rh, seq); if (!(lb && rb && lb == rb)) continue; - expr* lp = get_power_exp_expr(lh); - expr* rp = get_power_exp_expr(rh); + expr* lp = get_power_exp_expr(lh, seq); + expr* rp = get_power_exp_expr(rh, seq); rational diff; if (lp && rp && get_const_power_diff(rp, lp, arith, diff)) { // rp = lp + diff (constant difference) @@ -1659,8 +1645,8 @@ namespace seq { } // 3e: LP-aware power directional elimination else if (lp && rp && !cur_path.empty()) { - bool lp_le_rp = g.check_lp_le(lp, rp, this, cur_path); - bool rp_le_lp = g.check_lp_le(rp, lp, this, cur_path); + bool lp_le_rp = m_graph.check_lp_le(lp, rp, this, cur_path); + bool rp_le_lp = m_graph.check_lp_le(rp, lp, this, cur_path); if (lp_le_rp || rp_le_lp) { expr* smaller_exp = lp_le_rp ? lp : rp; expr* larger_exp = lp_le_rp ? rp : lp; @@ -1668,15 +1654,15 @@ namespace seq { eq.m_rhs = dir_drop(sg, eq.m_rhs, 1, fwd); if (lp_le_rp && rp_le_lp) { // both ≤ -> equal -> both cancel completely - add_int_constraint(g.mk_int_constraint(lp, rp, int_constraint_kind::eq, eq.m_dep)); + add_int_constraint(m_graph.mk_int_constraint(lp, rp, int_constraint_kind::eq, eq.m_dep)); } else { // strictly less: create diff power d = larger - smaller >= 1 - expr_ref d(g.mk_fresh_int_var()); + expr_ref d(m_graph.mk_fresh_int_var()); expr_ref one_e(arith.mk_int(1), m); expr_ref d_plus_smaller(arith.mk_add(d, smaller_exp), m); - add_int_constraint(g.mk_int_constraint(d, one_e, int_constraint_kind::ge, eq.m_dep)); - add_int_constraint(g.mk_int_constraint(d_plus_smaller, larger_exp, int_constraint_kind::eq, eq.m_dep)); + add_int_constraint(m_graph.mk_int_constraint(d, one_e, int_constraint_kind::ge, eq.m_dep)); + add_int_constraint(m_graph.mk_int_constraint(d_plus_smaller, larger_exp, int_constraint_kind::eq, eq.m_dep)); expr_ref pw(seq.str.mk_power(lb, d), m); euf::snode*& larger_side = lp_le_rp ? eq.m_rhs : eq.m_lhs; larger_side = dir_concat(sg, sg.mk(pw), larger_side, fwd); @@ -1768,7 +1754,7 @@ namespace seq { // If the symbolic char has a char_range constraint and that // range is a subset of exactly one minterm's character class, // we can deterministically take that minterm's derivative. - if (m_char_ranges.contains(tok->id()) && g.m_parikh) { + if (m_char_ranges.contains(tok->id()) && m_graph.m_parikh) { char_set const& cs = m_char_ranges[tok->id()]; if (!cs.is_empty()) { euf::snode* matching_deriv = nullptr; @@ -1776,7 +1762,7 @@ namespace seq { for (euf::snode* mt : minterms) { if (!mt || mt->is_fail()) continue; if (!mt->get_expr()) continue; - char_set mt_cs = g.m_parikh->minterm_to_char_set(mt->get_expr()); + char_set mt_cs = m_graph.m_parikh->minterm_to_char_set(mt->get_expr()); if (cs.is_subset(mt_cs)) { euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt); if (!deriv) { found = false; break; } @@ -1829,11 +1815,11 @@ namespace seq { // and check if the result intersected with the target regex is empty. // Detects infeasible constraints that would otherwise require // expensive exploration. Mirrors ZIPT NielsenNode.CheckRegexWidening. - if (g.m_seq_regex) { + if (m_graph.m_seq_regex) { for (str_mem const& mem : m_str_mem) { if (!mem.m_str || !mem.m_regex) continue; - if (g.check_regex_widening(*this, mem.m_str, mem.m_regex)) { + if (m_graph.check_regex_widening(*this, mem.m_str, mem.m_regex)) { m_is_general_conflict = true; m_reason = backtrack_reason::regex; return simplify_result::conflict; @@ -2025,8 +2011,8 @@ namespace seq { prefix_sn = dir_concat(sg, prefix_sn, char_toks[j], fwd); euf::snode* replacement = dir_concat(sg, prefix_sn, var_node, fwd); nielsen_subst s(var_node, replacement, eq.m_dep); - nielsen_node* child = g.mk_child(this); - nielsen_edge* e = g.mk_edge(this, child, true); + nielsen_node* child = m_graph.mk_child(this); + nielsen_edge* e = m_graph.mk_edge(this, child, true); e->add_subst(s); child->apply_subst(sg, s); set_extended(true); @@ -2191,7 +2177,7 @@ namespace seq { node->set_eval_idx(m_run_idx); // simplify constraints (idempotent after first call) - simplify_result sr = node->simplify_and_init(*this, cur_path); + simplify_result sr = node->simplify_and_init(cur_path); if (sr == simplify_result::conflict) { ++m_stats.m_num_simplify_conflict; @@ -3175,6 +3161,7 @@ namespace seq { bool nielsen_graph::apply_split_power_elim(nielsen_node* node) { ast_manager& m = m_sg.get_manager(); arith_util arith(m); + seq_util seq(m); for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; @@ -3190,7 +3177,7 @@ namespace seq { euf::snode* end_tok = dir_token(pow_side, fwd); if (!end_tok || !end_tok->is_power()) continue; euf::snode* base_sn = end_tok->arg(0); - expr* pow_exp = get_power_exp_expr(end_tok); + expr* pow_exp = get_power_exp_expr(end_tok, seq); if (!base_sn || !pow_exp) continue; auto [count, consumed] = comm_power(base_sn, other_side, m, fwd); @@ -3410,7 +3397,6 @@ namespace seq { bool nielsen_graph::apply_gpower_intr(nielsen_node* node) { ast_manager& m = m_sg.get_manager(); arith_util arith(m); - seq_util& seq = m_sg.get_seq_util(); for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; @@ -3491,7 +3477,7 @@ namespace seq { // E.g., [(ab)^3] → [a, b] so we get (ab)^n instead of ((ab)^3)^n. // (mirrors ZIPT: if b.Length == 1 && b is PowerToken pt => b = pt.Base) if (ground_prefix.size() == 1 && ground_prefix[0]->is_power()) { - expr* base_e = get_power_base_expr(ground_prefix[0]); + expr* base_e = get_power_base_expr(ground_prefix[0], seq); if (base_e) { euf::snode* base_sn = m_sg.mk(base_e); if (base_sn) { @@ -3554,7 +3540,7 @@ namespace seq { if (tok->is_power()) { // Token is a power u^exp: use fresh m' with 0 ≤ m' ≤ exp expr* inner_exp = get_power_exponent(tok); - expr* inner_base = get_power_base_expr(tok); + expr* inner_base = get_power_base_expr(tok, seq); if (inner_exp && inner_base) { fresh_m = mk_fresh_int_var(); expr_ref partial_pow(seq.str.mk_power(inner_base, fresh_m), m); @@ -3694,7 +3680,7 @@ namespace seq { euf::snode_vector base_toks; collect_tokens_dir(base, fwd, base_toks); unsigned base_len = base_toks.size(); - expr* base_expr = get_power_base_expr(power); + expr* base_expr = get_power_base_expr(power, seq); if (!base_expr || base_len == 0) return false; @@ -3723,7 +3709,7 @@ namespace seq { if (tok->is_power()) { // Token is a power u^exp: decompose with fresh m', 0 <= m' <= exp expr* inner_exp = get_power_exponent(tok); - expr* inner_base_e = get_power_base_expr(tok); + expr* inner_base_e = get_power_base_expr(tok, seq); if (inner_exp && inner_base_e) { fresh_inner_m = mk_fresh_int_var(); expr_ref partial_pow(seq.str.mk_power(inner_base_e, fresh_inner_m), m); diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 8a2fe520a..922a91a97 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -500,7 +500,7 @@ namespace seq { friend class nielsen_graph; unsigned m_id; - nielsen_graph* m_graph; + nielsen_graph& m_graph; // constraints at this node vector m_str_eq; // string equalities @@ -549,10 +549,10 @@ namespace seq { std::map, unsigned> m_regex_occurrence; public: - nielsen_node(nielsen_graph* graph, unsigned id); + nielsen_node(nielsen_graph& graph, unsigned id); unsigned id() const { return m_id; } - nielsen_graph* graph() const { return m_graph; } + nielsen_graph& graph() const { return m_graph; } // constraint access vector const& str_eqs() const { return m_str_eq; } @@ -654,7 +654,7 @@ namespace seq { // cur_path provides the path from root to this node so that the // LP solver can be queried for deterministic power cancellation. // Returns proceed, conflict, satisfied, or restart. - simplify_result simplify_and_init(nielsen_graph& g, svector const& cur_path = svector()); + simplify_result simplify_and_init(svector const& cur_path = svector()); // true if all str_eqs are trivial and there are no str_mems bool is_satisfied() const; @@ -724,6 +724,8 @@ namespace seq { // mirrors ZIPT's NielsenGraph class nielsen_graph { friend class nielsen_node; + ast_manager& m_m; + seq_util& m_seq; euf::sgraph& m_sg; region m_region; ptr_vector m_nodes; @@ -791,6 +793,10 @@ namespace seq { ~nielsen_graph(); euf::sgraph& sg() { return m_sg; } + ast_manager& m() { return m_m; } + ast_manager const& m() const { return m_m; } + seq_util& seq() { return m_seq; } + seq_util const& seq() const { return m_seq; } // node management nielsen_node* mk_node(); diff --git a/src/test/nseq_basic.cpp b/src/test/nseq_basic.cpp index c896be9f4..8474c1d27 100644 --- a/src/test/nseq_basic.cpp +++ b/src/test/nseq_basic.cpp @@ -132,7 +132,7 @@ static void test_nseq_node_satisfied() { SASSERT(node->str_eqs().size() == 1); SASSERT(!node->str_eqs()[0].is_trivial() || node->str_eqs()[0].m_lhs == node->str_eqs()[0].m_rhs); // After simplification, trivial equalities should be removed - seq::simplify_result sr = node->simplify_and_init(ng); + seq::simplify_result sr = node->simplify_and_init(); VERIFY(sr == seq::simplify_result::satisfied || sr == seq::simplify_result::proceed); std::cout << " ok\n"; } diff --git a/src/test/seq_nielsen.cpp b/src/test/seq_nielsen.cpp index e33b03bd1..067f6fd61 100644 --- a/src/test/seq_nielsen.cpp +++ b/src/test/seq_nielsen.cpp @@ -864,7 +864,7 @@ static void test_regex_char_split_basic() { ng.add_str_mem(x, regex); SASSERT(ng.root() != nullptr); - auto sr = ng.root()->simplify_and_init(ng); + auto sr = ng.root()->simplify_and_init(); SASSERT(sr != seq::simplify_result::conflict); bool extended = ng.generate_extensions(ng.root()); @@ -1304,7 +1304,7 @@ static void test_generate_extensions_regex_only() { ng.add_str_mem(x, re_node); seq::nielsen_node* root = ng.root(); - root->simplify_and_init(ng); + root->simplify_and_init(); bool extended = ng.generate_extensions(root); SASSERT(extended); @@ -1651,7 +1651,7 @@ static void test_simplify_prefix_cancel() { seq::dep_tracker dep; dep.insert(0); node->add_str_eq(seq::str_eq(abx, aby, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::proceed); SASSERT(node->str_eqs().size() == 1); // after prefix cancel: remaining eq has variable-only sides @@ -1681,7 +1681,7 @@ static void test_simplify_suffix_cancel_rtl() { seq::dep_tracker dep; dep.insert(0); node->add_str_eq(seq::str_eq(xa, ya, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::proceed); SASSERT(node->str_eqs().size() == 1); SASSERT(node->str_eqs()[0].m_lhs->is_var()); @@ -1710,7 +1710,7 @@ static void test_simplify_symbol_clash() { seq::dep_tracker dep; dep.insert(0); node->add_str_eq(seq::str_eq(ax, by, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::conflict); SASSERT(node->is_general_conflict()); SASSERT(node->reason() == seq::backtrack_reason::symbol_clash); @@ -1736,7 +1736,7 @@ static void test_simplify_empty_propagation() { seq::dep_tracker dep; dep.insert(0); node->add_str_eq(seq::str_eq(e, xy, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::satisfied); } @@ -1758,7 +1758,7 @@ static void test_simplify_empty_vs_char() { seq::dep_tracker dep; dep.insert(0); node->add_str_eq(seq::str_eq(e, a, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::conflict); SASSERT(node->reason() == seq::backtrack_reason::symbol_clash); } @@ -1784,7 +1784,7 @@ static void test_simplify_multi_pass_clash() { seq::dep_tracker dep; dep.insert(0); node->add_str_eq(seq::str_eq(ab, ac, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::conflict); SASSERT(node->reason() == seq::backtrack_reason::symbol_clash); } @@ -1808,7 +1808,7 @@ static void test_simplify_trivial_removal() { node->add_str_eq(seq::str_eq(e, e, dep)); // trivial node->add_str_eq(seq::str_eq(x, y, dep)); // non-trivial - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::proceed); SASSERT(node->str_eqs().size() == 1); } @@ -1831,7 +1831,7 @@ static void test_simplify_all_trivial_satisfied() { node->add_str_eq(seq::str_eq(x, x, dep)); // trivial: same pointer node->add_str_eq(seq::str_eq(e, e, dep)); // trivial: both empty - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::satisfied); } @@ -1857,7 +1857,7 @@ static void test_simplify_regex_infeasible() { seq::dep_tracker dep; dep.insert(0); node->add_str_mem(seq::str_mem(e, regex, e, 0, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::conflict); SASSERT(node->reason() == seq::backtrack_reason::regex); } @@ -1885,7 +1885,7 @@ static void test_simplify_nullable_removal() { seq::dep_tracker dep; dep.insert(0); node->add_str_mem(seq::str_mem(e, regex, e, 0, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::satisfied); SASSERT(node->str_mems().empty()); } @@ -1913,7 +1913,7 @@ static void test_simplify_brzozowski_sat() { seq::dep_tracker dep; dep.insert(0); node->add_str_mem(seq::str_mem(a, regex, e, 0, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::satisfied); } @@ -1945,7 +1945,7 @@ static void test_simplify_brzozowski_rtl_suffix() { seq::dep_tracker dep; dep.insert(0); node->add_str_mem(seq::str_mem(xa, regex, e, 0, dep)); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::proceed); SASSERT(node->str_mems().size() == 1); SASSERT(node->str_mems()[0].m_str->is_var()); @@ -1984,7 +1984,7 @@ static void test_simplify_multiple_eqs() { node->add_str_eq(seq::str_eq(x, z, dep)); SASSERT(node->str_eqs().size() == 3); - auto sr = node->simplify_and_init(ng); + auto sr = node->simplify_and_init(); SASSERT(sr == seq::simplify_result::proceed); // eq1 removed, eq2 simplified to x=y, eq3 kept → 2 eqs remain SASSERT(node->str_eqs().size() == 2); @@ -2587,7 +2587,7 @@ static void test_star_intr_no_backedge() { seq::nielsen_node* root = ng.root(); SASSERT(root->backedge() == nullptr); - auto sr = root->simplify_and_init(ng); + auto sr = root->simplify_and_init(); SASSERT(sr != seq::simplify_result::conflict); bool extended = ng.generate_extensions(root); @@ -2621,7 +2621,7 @@ static void test_star_intr_with_backedge() { seq::nielsen_node* root = ng.root(); root->set_backedge(root); // simulate backedge - auto sr = root->simplify_and_init(ng); + auto sr = root->simplify_and_init(); // star(to_re("A")) is nullable, so empty string satisfies it // simplify may remove the membership or proceed if (sr == seq::simplify_result::satisfied) { @@ -2720,7 +2720,7 @@ static void test_regex_var_split_basic() { ng.add_str_mem(x, regex); seq::nielsen_node* root = ng.root(); - auto sr = root->simplify_and_init(ng); + auto sr = root->simplify_and_init(); SASSERT(sr != seq::simplify_result::conflict); bool extended = ng.generate_extensions(root); @@ -2797,7 +2797,7 @@ static void test_const_num_unwinding_no_power() { seq::nielsen_node* root = ng.root(); // Should detect clash during simplify - auto sr = root->simplify_and_init(ng); + auto sr = root->simplify_and_init(); SASSERT(sr == seq::simplify_result::conflict); } @@ -3535,7 +3535,7 @@ static void test_simplify_adds_parikh_bounds() { seq::nielsen_node* node = ng.root(); // simplify_and_init should call init_var_bounds_from_mems - seq::simplify_result sr = node->simplify_and_init(ng); + seq::simplify_result sr = node->simplify_and_init(); (void)sr; // x ∈ to_re("AB") has min_len=2, max_len=2