From 2f46c8893e82dc42cad13a57a1be2a4d710b9552 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Wed, 11 Mar 2026 11:29:25 +0100 Subject: [PATCH 1/6] Another attempt to fix powers --- src/smt/seq/seq_nielsen.cpp | 742 ++++++++++++++++++++++++++++++++---- src/smt/seq/seq_nielsen.h | 10 +- src/test/nseq_zipt.cpp | 112 +++--- 3 files changed, 742 insertions(+), 122 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 6a2c730ca..7fdee401a 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -599,10 +599,11 @@ namespace seq { return result; } - // Helper: render an snode as a human-readable label. + // Helper: render an snode as an HTML label for DOT output. // Groups consecutive s_char tokens into quoted strings, renders s_var by name, - // and falls back to mk_pp for other token kinds. - static std::string snode_label(euf::snode const* n, ast_manager& m) { + // shows s_power with superscripts, s_unit by its inner expression, + // and falls back to mk_pp (HTML-escaped) for other token kinds. + static std::string snode_label_html(euf::snode const* n, ast_manager& m) { if (!n) return "null"; seq_util seq(m); @@ -621,7 +622,7 @@ namespace seq { auto flush_chars = [&]() { if (char_acc.empty()) return; if (!first) result += " + "; - result += "\"" + char_acc + "\""; + result += "\"" + dot_html_escape(char_acc) + "\""; first = false; char_acc.clear(); }; @@ -655,11 +656,33 @@ namespace seq { if (!e) { result += "#" + std::to_string(tok->id()); } else if (tok->is_var()) { - result += to_app(e)->get_decl()->get_name().str(); + result += dot_html_escape(to_app(e)->get_decl()->get_name().str()); + } else if (tok->is_unit()) { + // seq.unit with non-literal character: show the character expression + expr* ch = to_app(e)->get_arg(0); + if (is_app(ch)) { + result += dot_html_escape(to_app(ch)->get_decl()->get_name().str()); + } else { + std::ostringstream os; + os << mk_pp(ch, m); + result += dot_html_escape(os.str()); + } + } else if (tok->is_power()) { + // seq.power(base, n): render as basen + std::string base_html = snode_label_html(tok->arg(0), m); + if (tok->arg(0)->length() > 1) + base_html = "(" + base_html + ")"; + result += base_html; + result += ""; + expr* exp_expr = to_app(e)->get_arg(1); + std::ostringstream os; + os << mk_pp(exp_expr, m); + result += dot_html_escape(os.str()); + result += ""; } else { std::ostringstream os; os << mk_pp(e, m); - result += os.str(); + result += dot_html_escape(os.str()); } } flush_chars(); @@ -672,17 +695,17 @@ namespace seq { // string equalities for (auto const& eq : m_str_eq) { if (!any) { out << "Cnstr:
"; any = true; } - out << dot_html_escape(snode_label(eq.m_lhs, m)) + out << snode_label_html(eq.m_lhs, m) << " = " - << dot_html_escape(snode_label(eq.m_rhs, m)) + << snode_label_html(eq.m_rhs, m) << "
"; } // regex memberships for (auto const& mem : m_str_mem) { if (!any) { out << "Cnstr:
"; any = true; } - out << dot_html_escape(snode_label(mem.m_str, m)) + out << snode_label_html(mem.m_str, m) << " ∈ " - << dot_html_escape(snode_label(mem.m_regex, m)) + << snode_label_html(mem.m_regex, m) << "
"; } // character ranges @@ -790,9 +813,9 @@ namespace seq { for (auto const& s : e->subst()) { if (!first) out << "
"; first = false; - out << dot_html_escape(snode_label(s.m_var, m)) + out << snode_label_html(s.m_var, m) << " → " // mapping arrow - << dot_html_escape(snode_label(s.m_replacement, m)); + << snode_label_html(s.m_replacement, m); } for (auto const& cs : e->char_substs()) { if (!first) out << "
"; @@ -861,8 +884,215 @@ namespace seq { return false; } - simplify_result nielsen_node::simplify_and_init(nielsen_graph& g) { + // ----------------------------------------------------------------------- + // Power simplification helpers (mirrors ZIPT's MergePowers, + // SimplifyPowerElim/CommPower, SimplifyPowerSingle) + // ----------------------------------------------------------------------- + + // Check if exponent b equals exponent a + diff for some rational constant diff. + // Uses syntactic matching on Z3 expression structure: pointer equality + // detects shared sub-expressions created during ConstNumUnwinding. + static bool get_const_power_diff(expr* b, expr* a, arith_util& arith, rational& diff) { + if (a == b) { diff = rational(0); return true; } + // b = (+ a k) ? + if (arith.is_add(b) && to_app(b)->get_num_args() == 2) { + rational val; + if (to_app(b)->get_arg(0) == a && arith.is_numeral(to_app(b)->get_arg(1), val)) + { diff = val; return true; } + if (to_app(b)->get_arg(1) == a && arith.is_numeral(to_app(b)->get_arg(0), val)) + { diff = val; return true; } + } + // a = (+ b k) → diff = -k + if (arith.is_add(a) && to_app(a)->get_num_args() == 2) { + rational val; + if (to_app(a)->get_arg(0) == b && arith.is_numeral(to_app(a)->get_arg(1), val)) + { diff = -val; return true; } + if (to_app(a)->get_arg(1) == b && arith.is_numeral(to_app(a)->get_arg(0), val)) + { diff = -val; return true; } + } + // b = (- a k) → diff = -k + if (arith.is_sub(b) && to_app(b)->get_num_args() == 2) { + rational val; + if (to_app(b)->get_arg(0) == a && arith.is_numeral(to_app(b)->get_arg(1), val)) + { diff = -val; return true; } + } + // a = (- b k) → diff = k + if (arith.is_sub(a) && to_app(a)->get_num_args() == 2) { + rational val; + if (to_app(a)->get_arg(0) == b && arith.is_numeral(to_app(a)->get_arg(1), val)) + { diff = val; return true; } + } + return false; + } + + // Get the base expression of a power snode. + static expr* get_power_base_expr(euf::snode* power) { + if (!power || !power->is_power()) return nullptr; + expr* e = power->get_expr(); + return (e && is_app(e) && to_app(e)->get_num_args() >= 2) ? to_app(e)->get_arg(0) : nullptr; + } + + // Get the exponent expression of a power snode. + static expr* get_power_exp_expr(euf::snode* power) { + if (!power || !power->is_power()) return nullptr; + expr* e = power->get_expr(); + return (e && is_app(e) && to_app(e)->get_num_args() >= 2) ? to_app(e)->get_arg(1) : nullptr; + } + + // Merge adjacent tokens with the same power base on one side of an equation. + // Handles: char(c) · power(c^e) → power(c^(e+1)), + // power(c^e) · char(c) → power(c^(e+1)), + // power(c^e1) · power(c^e2) → power(c^(e1+e2)). + // Returns new snode if merging happened, nullptr otherwise. + static euf::snode* merge_adjacent_powers(euf::sgraph& sg, euf::snode* side) { + if (!side || side->is_empty() || side->is_token()) + return nullptr; + + euf::snode_vector tokens; + side->collect_tokens(tokens); + if (tokens.size() < 2) return nullptr; + + ast_manager& m = sg.get_manager(); + arith_util arith(m); + seq_util seq(m); + + bool merged = false; + euf::snode_vector result; + + unsigned i = 0; + while (i < tokens.size()) { + euf::snode* tok = tokens[i]; + + // Case 1: current is a power token — absorb following same-base tokens + if (tok->is_power()) { + expr* base_e = get_power_base_expr(tok); + expr* exp_acc = get_power_exp_expr(tok); + if (!base_e || !exp_acc) { result.push_back(tok); ++i; continue; } + + bool local_merged = false; + unsigned j = i + 1; + while (j < tokens.size()) { + euf::snode* next = tokens[j]; + if (next->is_power()) { + expr* nb = get_power_base_expr(next); + if (nb == base_e) { + exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(next)); + local_merged = true; ++j; continue; + } + } + if (next->is_char() && next->get_expr() == base_e) { + exp_acc = arith.mk_add(exp_acc, arith.mk_int(1)); + local_merged = true; ++j; continue; + } + break; + } + if (local_merged) { + merged = true; + expr_ref new_pow(seq.str.mk_power(base_e, exp_acc), m); + result.push_back(sg.mk(new_pow)); + } else { + result.push_back(tok); + } + i = j; + continue; + } + + // Case 2: current is a char — check if next is a same-base power + if (tok->is_char() && tok->get_expr() && i + 1 < tokens.size()) { + euf::snode* next = tokens[i + 1]; + if (next->is_power() && get_power_base_expr(next) == tok->get_expr()) { + expr* base_e = tok->get_expr(); + // Use same arg order as Case 1: add(exp, 1), not add(1, exp), + // so that merging "c · c^e" and "c^e · c" both produce add(e, 1) + // and the resulting power expression is hash-consed identically. + expr* exp_acc = arith.mk_add(get_power_exp_expr(next), arith.mk_int(1)); + unsigned j = i + 2; + while (j < tokens.size()) { + euf::snode* further = tokens[j]; + if (further->is_power() && get_power_base_expr(further) == base_e) { + exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(further)); + ++j; continue; + } + if (further->is_char() && further->get_expr() == base_e) { + exp_acc = arith.mk_add(exp_acc, arith.mk_int(1)); + ++j; continue; + } + break; + } + merged = true; + expr_ref new_pow(seq.str.mk_power(base_e, exp_acc), m); + result.push_back(sg.mk(new_pow)); + i = j; + continue; + } + } + + result.push_back(tok); + ++i; + } + + if (!merged) return nullptr; + + // Rebuild snode from merged token list + euf::snode* rebuilt = sg.mk_empty(); + for (unsigned k = 0; k < result.size(); ++k) { + rebuilt = (k == 0) ? result[k] : sg.mk_concat(rebuilt, result[k]); + } + return rebuilt; + } + + // Simplify constant-exponent powers: base^0 → ε, base^1 → base. + // Returns new snode if any simplification happened, nullptr otherwise. + static euf::snode* simplify_const_powers(euf::sgraph& sg, euf::snode* side) { + if (!side || side->is_empty()) return nullptr; + + euf::snode_vector tokens; + side->collect_tokens(tokens); + + ast_manager& m = sg.get_manager(); + arith_util arith(m); + + bool simplified = false; + euf::snode_vector result; + + for (euf::snode* tok : tokens) { + if (tok->is_power()) { + expr* exp_e = get_power_exp_expr(tok); + rational val; + if (exp_e && arith.is_numeral(exp_e, val)) { + if (val.is_zero()) { + // base^0 → ε (skip this token entirely) + simplified = true; + continue; + } + if (val.is_one()) { + // base^1 → base + euf::snode* base_sn = tok->arg(0); + if (base_sn) { + result.push_back(base_sn); + simplified = true; + continue; + } + } + } + } + result.push_back(tok); + } + + if (!simplified) return nullptr; + + if (result.empty()) return sg.mk_empty(); + euf::snode* rebuilt = result[0]; + for (unsigned k = 1; k < result.size(); ++k) + rebuilt = sg.mk_concat(rebuilt, result[k]); + return rebuilt; + } + + simplify_result nielsen_node::simplify_and_init(nielsen_graph& g, svector const& cur_path) { euf::sgraph& sg = g.sg(); + ast_manager& m = sg.get_manager(); + arith_util arith(m); + seq_util seq(m); bool changed = true; while (changed) { @@ -946,6 +1176,92 @@ namespace seq { } } } + + // pass 3: power simplification (mirrors ZIPT's LcpCompression + + // SimplifyPowerElim + SimplifyPowerSingle) + for (str_eq& eq : m_str_eq) { + if (eq.is_trivial() || !eq.m_lhs || !eq.m_rhs) + continue; + + // 3a: simplify constant-exponent powers (base^0 → ε, base^1 → base) + if (euf::snode* s = simplify_const_powers(sg, eq.m_lhs)) + { eq.m_lhs = s; changed = true; } + if (euf::snode* s = simplify_const_powers(sg, eq.m_rhs)) + { eq.m_rhs = s; changed = true; } + + // 3b: merge adjacent same-base tokens into combined powers + if (euf::snode* s = merge_adjacent_powers(sg, eq.m_lhs)) + { eq.m_lhs = s; changed = true; } + if (euf::snode* s = merge_adjacent_powers(sg, eq.m_rhs)) + { eq.m_rhs = s; changed = true; } + + // 3c: power prefix elimination — when both sides start with a + // power of the same base, cancel the common power prefix. + // Mirrors ZIPT's SimplifyPowerElim + CommPower. + if (!eq.m_lhs || !eq.m_rhs) continue; + euf::snode* lh = eq.m_lhs->first(); + euf::snode* rh = eq.m_rhs->first(); + if (lh && rh && lh->is_power() && rh->is_power()) { + expr* lb = get_power_base_expr(lh); + expr* rb = get_power_base_expr(rh); + if (lb && rb && lb == rb) { + expr* lp = get_power_exp_expr(lh); + expr* rp = get_power_exp_expr(rh); + rational diff; + if (lp && rp && get_const_power_diff(rp, lp, arith, diff)) { + // rp = lp + diff (constant difference) + eq.m_lhs = sg.drop_first(eq.m_lhs); + eq.m_rhs = sg.drop_first(eq.m_rhs); + if (diff.is_pos()) { + // rp > lp: prepend base^diff to RHS + expr_ref de(arith.mk_int(diff), m); + expr_ref pw(seq.str.mk_power(lb, de), m); + eq.m_rhs = sg.mk_concat(sg.mk(pw), eq.m_rhs); + } else if (diff.is_neg()) { + // lp > rp: prepend base^(-diff) to LHS + expr_ref de(arith.mk_int(-diff), m); + expr_ref pw(seq.str.mk_power(lb, de), m); + eq.m_lhs = sg.mk_concat(sg.mk(pw), eq.m_lhs); + } + // diff == 0: both powers cancel completely + changed = true; + } + // 3d: LP-aware power prefix elimination + // When the exponent difference is non-constant, query the + // LP solver to determine which exponent is larger, and + // cancel deterministically. This avoids the branching + // that NumCmp would otherwise create, matching ZIPT's + // SimplifyPowerElim that calls node.IsLe()/node.IsLt(). + else if (lp && rp && !cur_path.empty()) { + bool lp_le_rp = g.check_lp_le(lp, rp, this, cur_path); + bool rp_le_lp = g.check_lp_le(rp, lp, this, cur_path); + if (lp_le_rp || rp_le_lp) { + expr* smaller_exp = lp_le_rp ? lp : rp; + expr* larger_exp = lp_le_rp ? rp : lp; + eq.m_lhs = sg.drop_first(eq.m_lhs); + eq.m_rhs = sg.drop_first(eq.m_rhs); + if (lp_le_rp && rp_le_lp) { + // both ≤ → equal → both cancel completely + } else { + // strictly less: create diff power d = larger - smaller ≥ 1 + expr_ref d(g.mk_fresh_int_var()); + expr_ref one_e(arith.mk_int(1), m); + expr_ref zero_e(arith.mk_int(0), m); + expr_ref d_plus_smaller(arith.mk_add(d, smaller_exp), m); + // d ≥ 1 (strict inequality) + add_int_constraint(g.mk_int_constraint(d, one_e, int_constraint_kind::ge, eq.m_dep)); + // d + smaller = larger + add_int_constraint(g.mk_int_constraint(d_plus_smaller, larger_exp, int_constraint_kind::eq, eq.m_dep)); + expr_ref pw(seq.str.mk_power(lb, d), m); + euf::snode*& larger_side = lp_le_rp ? eq.m_rhs : eq.m_lhs; + larger_side = sg.mk_concat(sg.mk(pw), larger_side); + } + changed = true; + } + } + } + } + } } // consume leading concrete characters from str_mem via Brzozowski derivatives @@ -1062,7 +1378,7 @@ namespace seq { // Iterative deepening: increment by 1 on each failure. // m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it. - m_depth_bound = 3; + m_depth_bound = 10; while (true) { if (m_cancel_fn && m_cancel_fn()) { #ifdef Z3DEBUG @@ -1076,6 +1392,13 @@ namespace seq { inc_run_idx(); svector cur_path; search_result r = search_dfs(m_root, 0, cur_path); + IF_VERBOSE(1, verbose_stream() << "nseq iter=" << iter << " depth_bound=" << m_depth_bound + << " dfs_nodes=" << m_stats.m_num_dfs_nodes + << " max_depth=" << m_stats.m_max_depth + << " extensions=" << m_stats.m_num_extensions + << " arith_prune=" << m_stats.m_num_arith_infeasible + << " result=" << (r == search_result::sat ? "SAT" : r == search_result::unsat ? "UNSAT" : "UNKNOWN") + << "\n";); if (r == search_result::sat) { ++m_stats.m_num_sat; return r; @@ -1084,10 +1407,8 @@ namespace seq { ++m_stats.m_num_unsat; return r; } - // depth limit hit – increment bound by 1 and retry - if (m_max_search_depth > 0 && m_depth_bound >= m_max_search_depth) - break; - ++m_depth_bound; + // depth limit hit – double the bound and retry + m_depth_bound *= 2; } ++m_stats.m_num_unknown; return search_result::unknown; @@ -1116,7 +1437,7 @@ namespace seq { node->set_eval_idx(m_run_idx); // simplify constraints (idempotent after first call) - simplify_result sr = node->simplify_and_init(*this); + simplify_result sr = node->simplify_and_init(*this, cur_path); if (sr == simplify_result::conflict) { ++m_stats.m_num_simplify_conflict; @@ -1133,6 +1454,7 @@ namespace seq { // and verify they are jointly satisfiable using the LP solver if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) { node->set_reason(backtrack_reason::arithmetic); + ++m_stats.m_num_arith_infeasible; return search_result::unsat; } @@ -1851,10 +2173,15 @@ namespace seq { if (!eq.m_lhs || !eq.m_rhs) continue; euf::snode* lhead = eq.m_lhs->first(); euf::snode* rhead = eq.m_rhs->first(); - if (lhead && lhead->is_power() && rhead && rhead->is_char()) { + // Match power vs any non-variable, non-empty token (char, unit, + // power with different base, etc.). + // Same-base power vs power is handled by NumCmp (priority 3). + // Power vs variable is handled by PowerSplit (priority 11). + // Power vs empty is handled by PowerEpsilon (priority 2). + if (lhead && lhead->is_power() && rhead && !rhead->is_var() && !rhead->is_empty()) { power = lhead; other_head = rhead; eq_out = &eq; return true; } - if (rhead && rhead->is_power() && lhead && lhead->is_char()) { + if (rhead && rhead->is_power() && lhead && !lhead->is_var() && !lhead->is_empty()) { power = rhead; other_head = lhead; eq_out = &eq; return true; } } @@ -1886,26 +2213,41 @@ namespace seq { // ----------------------------------------------------------------------- // Modifier: apply_power_epsilon - // For a power token u^n in an equation, branch: + // Fires only when one side of an equation is empty and the other side + // starts with a power token u^n. In that case, branch: // (1) base u → ε (base is empty, so u^n = ε) // (2) u^n → ε (the power is zero, replace power with empty) - // mirrors ZIPT's PowerEpsilonModifier + // mirrors ZIPT's PowerEpsilonModifier (which requires LHS.IsEmpty()) // ----------------------------------------------------------------------- bool nielsen_graph::apply_power_epsilon(nielsen_node* node) { - euf::snode *power = find_power_token(node); + // Match only when one equation side is empty and the other starts + // with a power. This mirrors ZIPT where PowerEpsilonModifier is + // constructed only inside the "if (LHS.IsEmpty())" branch of + // ExtendDir. When both sides are non-empty and a power faces a + // constant, ConstNumUnwinding (priority 4) handles it with both + // n=0 and n≥1 branches. + euf::snode* power = nullptr; + dep_tracker dep; + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) continue; + if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.m_lhs->is_empty() && eq.m_rhs->first() && eq.m_rhs->first()->is_power()) { + power = eq.m_rhs->first(); + dep = eq.m_dep; + break; + } + if (eq.m_rhs->is_empty() && eq.m_lhs->first() && eq.m_lhs->first()->is_power()) { + power = eq.m_lhs->first(); + dep = eq.m_dep; + break; + } + } if (!power) return false; SASSERT(power->is_power() && power->num_args() >= 1); euf::snode *base = power->arg(0); - dep_tracker dep; - for (str_eq const &eq : node->str_eqs()) { - if (!eq.is_trivial()) { - dep = eq.m_dep; - break; - } - } nielsen_node* child; nielsen_edge* e; @@ -1941,6 +2283,7 @@ namespace seq { bool nielsen_graph::apply_num_cmp(nielsen_node* node) { ast_manager& m = m_sg.get_manager(); arith_util arith(m); + seq_util& seq = m_sg.get_seq_util(); // Look for two power tokens with the same base on opposite sides for (str_eq const& eq : node->str_eqs()) { @@ -1954,35 +2297,68 @@ namespace seq { // same base: compare the two powers if (lhead->arg(0)->id() != rhead->arg(0)->id()) continue; - // Get exponent expressions (m and n from u^m and u^n) + // Skip if the exponents differ by a constant — simplify_and_init's + // power prefix elimination already handles that case. expr* exp_m = get_power_exponent(lhead); expr* exp_n = get_power_exponent(rhead); - - // Branch 1: m <= n → cancel u^m from both sides - // Side constraint: m <= n - // After cancellation: ε·A = u^(n-m)·B + if (!exp_m || !exp_n) continue; { - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - if (exp_m && exp_n) - e->add_side_int(mk_int_constraint(exp_m, exp_n, int_constraint_kind::le, eq.m_dep)); + rational diff; + if (get_const_power_diff(exp_n, exp_m, arith, diff)) + continue; // handled by simplification } - // Branch 2: n < m → cancel u^n from both sides - // Side constraint: n < m, i.e., n + 1 <= m, i.e., m >= n + 1 - // After cancellation: u^(m-n)·A = ε·B + + expr* base_expr = get_power_base_expr(lhead); + if (!base_expr) continue; + expr* zero = arith.mk_int(0); + expr* one = arith.mk_int(1); + + // Use fresh integer variables for the difference exponents to avoid + // LP solver issues when arith.mk_sub produces terms that simplify + // to constants after branch constraint propagation. + + // Branch 1 (explored first): n < m → cancel u^n, leave u^d on LHS + // where d = m - n >= 1. Explored first because d≥1 means the + // difference power is non-empty, making progress in the equation. { + expr_ref d(mk_fresh_int_var()); + expr_ref diff_pow(seq.str.mk_power(base_expr, d), m); nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - if (exp_m && exp_n) { - expr_ref n_plus_1(arith.mk_add(exp_n, arith.mk_int(1)), m); - e->add_side_int(mk_int_constraint(exp_m, n_plus_1, int_constraint_kind::ge, eq.m_dep)); - } + // u^n → ε + nielsen_subst s1(rhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s1); + child->apply_subst(m_sg, s1); + // u^m → u^d + nielsen_subst s2(lhead, m_sg.mk(diff_pow), eq.m_dep); + e->add_subst(s2); + child->apply_subst(m_sg, s2); + // d >= 1 + e->add_side_int(mk_int_constraint(d, one, int_constraint_kind::ge, eq.m_dep)); + // d + n = m (defines d = m - n) + expr_ref d_plus_n(arith.mk_add(d, exp_n), m); + e->add_side_int(mk_int_constraint(d_plus_n, exp_m, int_constraint_kind::eq, eq.m_dep)); + } + // Branch 2 (explored second): m <= n → cancel u^m, leave u^d on RHS + // where d = n - m >= 0 + { + expr_ref d(mk_fresh_int_var()); + expr_ref diff_pow(seq.str.mk_power(base_expr, d), m); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + // u^m → ε + nielsen_subst s1(lhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s1); + child->apply_subst(m_sg, s1); + // u^n → u^d + nielsen_subst s2(rhead, m_sg.mk(diff_pow), eq.m_dep); + e->add_subst(s2); + child->apply_subst(m_sg, s2); + // d >= 0 + e->add_side_int(mk_int_constraint(d, zero, int_constraint_kind::ge, eq.m_dep)); + // d + m = n (defines d = n - m) + expr_ref d_plus_m(arith.mk_add(d, exp_m), m); + e->add_side_int(mk_int_constraint(d_plus_m, exp_n, int_constraint_kind::eq, eq.m_dep)); } return true; } @@ -2013,7 +2389,33 @@ namespace seq { expr* zero = arith.mk_int(0); expr* one = arith.mk_int(1); - // Branch 1: n = 0 → replace power with ε (progress) + // Branch 1 (explored first): n >= 1 → peel one u: replace u^n with u · u^(n-1) + // Side constraint: n >= 1 + // Create a proper nested power base^(n-1) instead of a fresh string variable. + // This preserves power structure so that simplify_and_init can merge and + // cancel adjacent same-base powers (mirroring ZIPT's SimplifyPowerUnwind). + // Explored first because the n≥1 branch is typically more productive + // for SAT instances (preserves power structure). + { + seq_util& seq = m_sg.get_seq_util(); + expr* power_e = power->get_expr(); + SASSERT(power_e); + expr* base_expr = to_app(power_e)->get_arg(0); + expr_ref n_minus_1(arith.mk_sub(exp_n, one), m); + expr_ref nested_pow(seq.str.mk_power(base_expr, n_minus_1), m); + euf::snode* nested_power_snode = m_sg.mk(nested_pow); + + euf::snode* replacement = m_sg.mk_concat(base, nested_power_snode); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, false); + nielsen_subst s(power, replacement, eq->m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep)); + } + + // Branch 2 (explored second): n = 0 → replace power with ε (progress) // Side constraint: n = 0 { nielsen_node* child = mk_child(node); @@ -2025,21 +2427,6 @@ namespace seq { e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep)); } - // Branch 2: n >= 1 → peel one u: replace u^n with u · u^(n-1) - // Side constraint: n >= 1 - // Use fresh power variable for u^(n-1) - { - euf::snode* fresh_power = mk_fresh_var(); - euf::snode* replacement = m_sg.mk_concat(base, fresh_power); - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, false); - nielsen_subst s(power, replacement, eq->m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - if (exp_n) - e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep)); - } - return true; } @@ -2564,6 +2951,168 @@ namespace seq { expr_ref nielsen_graph::int_constraint_to_expr(int_constraint const& ic) { ast_manager& m = m_sg.get_manager(); arith_util arith(m); +<<<<<<< Updated upstream +======= + + // If already mapped, return cached + unsigned eid = e->get_id(); + lp::lpvar j; + if (m_expr2lpvar.find(eid, j)) + return j; + + // Check if this is a numeral — shouldn't be called with a numeral directly, + // but handle it gracefully by creating a fixed variable + rational val; + if (arith.is_numeral(e, val)) { + j = m_lp_solver->add_var(m_lp_ext_cnt++, true); + m_lp_solver->add_var_bound(j, lp::EQ, lp::mpq(val)); + m_expr2lpvar.insert(eid, j); + return j; + } + + // Decompose linear arithmetic: a + b → create LP term (1*a + 1*b) + expr* e1 = nullptr; + expr* e2 = nullptr; + if (arith.is_add(e, e1, e2)) { + lp::lpvar j1 = lp_ensure_var(e1); + lp::lpvar j2 = lp_ensure_var(e2); + if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(1), j1}); + coeffs.push_back({lp::mpq(1), j2}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + + // Decompose: a - b → create LP term (1*a + (-1)*b) + if (arith.is_sub(e, e1, e2)) { + lp::lpvar j1 = lp_ensure_var(e1); + lp::lpvar j2 = lp_ensure_var(e2); + if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(1), j1}); + coeffs.push_back({lp::mpq(-1), j2}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + + // Decompose: c * x where c is numeral → create LP term (c*x) + if (arith.is_mul(e, e1, e2)) { + rational coeff; + if (arith.is_numeral(e1, coeff)) { + lp::lpvar jx = lp_ensure_var(e2); + if (jx == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(coeff), jx}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + if (arith.is_numeral(e2, coeff)) { + lp::lpvar jx = lp_ensure_var(e1); + if (jx == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(coeff), jx}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + } + + // Decompose: -x → create LP term (-1*x) + if (arith.is_uminus(e, e1)) { + lp::lpvar jx = lp_ensure_var(e1); + if (jx == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(-1), jx}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + + // Leaf expression (variable, length, or other opaque term): map to fresh LP var + j = m_lp_solver->add_var(m_lp_ext_cnt++, true /* is_integer */); + m_expr2lpvar.insert(eid, j); + return j; + } + + void nielsen_graph::lp_add_constraint(int_constraint const& ic) { + if (!m_lp_solver || !ic.m_lhs || !ic.m_rhs) + return; + + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + + // We handle simple patterns: + // expr numeral → add_var_bound + // expr expr → create term (lhs - rhs) and bound to 0 + rational val; + bool is_num_rhs = arith.is_numeral(ic.m_rhs, val); + + if (is_num_rhs && (is_app(ic.m_lhs))) { + // Simple case: single variable or len(x) vs numeral + lp::lpvar j = lp_ensure_var(ic.m_lhs); + if (j == lp::null_lpvar) return; + lp::mpq rhs_val(val); + switch (ic.m_kind) { + case int_constraint_kind::eq: + m_lp_solver->add_var_bound(j, lp::EQ, rhs_val); + break; + case int_constraint_kind::le: + m_lp_solver->add_var_bound(j, lp::LE, rhs_val); + break; + case int_constraint_kind::ge: + m_lp_solver->add_var_bound(j, lp::GE, rhs_val); + break; + } + return; + } + + // General case: create term lhs - rhs and bound it + // For now, handle single variable on LHS vs single variable on RHS + // (covers the common case of len(x) = len(y), n >= 0, etc.) + bool is_num_lhs = arith.is_numeral(ic.m_lhs, val); + if (is_num_lhs) { + // numeral expr → reverse and flip kind + lp::lpvar j = lp_ensure_var(ic.m_rhs); + if (j == lp::null_lpvar) return; + lp::mpq lhs_val(val); + switch (ic.m_kind) { + case int_constraint_kind::eq: + m_lp_solver->add_var_bound(j, lp::EQ, lhs_val); + break; + case int_constraint_kind::le: + // val <= rhs ⇔ rhs >= val + m_lp_solver->add_var_bound(j, lp::GE, lhs_val); + break; + case int_constraint_kind::ge: + // val >= rhs ⇔ rhs <= val + m_lp_solver->add_var_bound(j, lp::LE, lhs_val); + break; + } + return; + } + + // Both sides are expressions: create term (lhs - rhs) with bound 0 + lp::lpvar j_lhs = lp_ensure_var(ic.m_lhs); + lp::lpvar j_rhs = lp_ensure_var(ic.m_rhs); + if (j_lhs == lp::null_lpvar || j_rhs == lp::null_lpvar) return; + + // If both sides map to the same LP variable, the constraint is + // trivially satisfied (lhs - rhs = 0). Skip to avoid the empty-term + // assertion in add_term. + if (j_lhs == j_rhs) return; + + // Create a term: 1*lhs + (-1)*rhs + vector> coeffs; + coeffs.push_back({lp::mpq(1), j_lhs}); + coeffs.push_back({lp::mpq(-1), j_rhs}); + lp::lpvar term_j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + + lp::mpq zero(0); +>>>>>>> Stashed changes switch (ic.m_kind) { case int_constraint_kind::eq: return expr_ref(m.mk_eq(ic.m_lhs, ic.m_rhs), m); @@ -2626,6 +3175,61 @@ namespace seq { return exp_snode ? exp_snode->get_expr() : nullptr; } + bool nielsen_graph::check_lp_le(expr* a, expr* b, nielsen_node* node, svector const& cur_path) { + // Check if the path constraints entail a ≤ b. + // Strategy: add all constraints, then assert a > b (i.e. a - b ≥ 1). + // If infeasible, then a ≤ b is entailed. + // + // Uses fresh LP variables to avoid the add_term empty-term assertion + // that occurs when compound expressions share LP variables. + vector constraints; + collect_path_int_constraints(node, cur_path, constraints); + if (constraints.empty()) + return false; // no constraints → can't determine + + lp_solver_reset(); + for (auto const& ic : constraints) + lp_add_constraint(ic); + + // Create fresh LP variables to safely represent a - b ≥ 1 + lp::lpvar fa = m_lp_solver->add_var(m_lp_ext_cnt++, true); + lp::lpvar fb = m_lp_solver->add_var(m_lp_ext_cnt++, true); + + // fa = a (via fa - lp(a) = 0) + lp::lpvar ja = lp_ensure_var(a); + if (ja == lp::null_lpvar) return false; + if (fa != ja) { + vector> c1; + c1.push_back({lp::mpq(1), fa}); + c1.push_back({lp::mpq(-1), ja}); + lp::lpvar t1 = m_lp_solver->add_term(c1, m_lp_ext_cnt++); + m_lp_solver->add_var_bound(t1, lp::EQ, lp::mpq(0)); + } + + // fb = b (via fb - lp(b) = 0) + lp::lpvar jb = lp_ensure_var(b); + if (jb == lp::null_lpvar) return false; + if (fb != jb) { + vector> c2; + c2.push_back({lp::mpq(1), fb}); + c2.push_back({lp::mpq(-1), jb}); + lp::lpvar t2 = m_lp_solver->add_term(c2, m_lp_ext_cnt++); + m_lp_solver->add_var_bound(t2, lp::EQ, lp::mpq(0)); + } + + // Assert fa - fb ≥ 1 (i.e. a > b) using only fresh vars + { + vector> c3; + c3.push_back({lp::mpq(1), fa}); + c3.push_back({lp::mpq(-1), fb}); + lp::lpvar t3 = m_lp_solver->add_term(c3, m_lp_ext_cnt++); + m_lp_solver->add_var_bound(t3, lp::GE, lp::mpq(1)); + } + + lp::lp_status status = m_lp_solver->find_feasible_solution(); + return status == lp::lp_status::INFEASIBLE; // a > b infeasible ⟹ a ≤ b + } + expr_ref nielsen_graph::mk_fresh_int_var() { ast_manager& m = m_sg.get_manager(); arith_util arith(m); diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 1660be312..7beb16b38 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -613,8 +613,10 @@ namespace seq { void apply_subst(euf::sgraph& sg, nielsen_subst const& s); // simplify all constraints at this node and initialize status. + // cur_path provides the path from root to this node so that the + // LP solver can be queried for deterministic power cancellation. // Returns proceed, conflict, satisfied, or restart. - simplify_result simplify_and_init(nielsen_graph& g); + simplify_result simplify_and_init(nielsen_graph& g, svector const& cur_path); // true if all str_eqs are trivial and there are no str_mems bool is_satisfied() const; @@ -660,6 +662,7 @@ namespace seq { unsigned m_num_simplify_conflict = 0; unsigned m_num_extensions = 0; unsigned m_num_fresh_vars = 0; + unsigned m_num_arith_infeasible = 0; unsigned m_max_depth = 0; // modifier application counts unsigned m_mod_det = 0; @@ -681,6 +684,7 @@ namespace seq { // the overall Nielsen transformation graph // mirrors ZIPT's NielsenGraph class nielsen_graph { + friend class nielsen_node; euf::sgraph& m_sg; region m_region; ptr_vector m_nodes; @@ -940,6 +944,10 @@ namespace seq { // returns true if feasible, false if infeasible. bool check_int_feasibility(nielsen_node* node, svector const& cur_path); + // check if path constraints entail a <= b. + // Strategy: add all path constraints + (a > b) and check for infeasibility. + bool check_lp_le(expr* a, expr* b, nielsen_node* node, svector const& cur_path); + // create an integer constraint: lhs rhs int_constraint mk_int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep); diff --git a/src/test/nseq_zipt.cpp b/src/test/nseq_zipt.cpp index c70afaf13..930749de3 100644 --- a/src/test/nseq_zipt.cpp +++ b/src/test/nseq_zipt.cpp @@ -598,75 +598,83 @@ static void test_zipt_parikh() { static void test_tricky_str_equations() { std::cout << "test_tricky_str_equations\n"; - // --- SAT: commutativity / rotation / symmetry --- + // --------------------------------------------------------------- + // SAT — Conjugacy equations: Xw₂ = w₁X + // SAT iff w₂ is a rotation of w₁. In that case w₁ = qp, w₂ = pq + // and X = (qp)^n · q is a family of solutions (n ≥ 0). + // All of these are UNSAT for X = ε (the ground parts don't match), + // so the solver must find a non-trivial witness. + // --------------------------------------------------------------- - // XY = YX (classic commutativity; witness: X="ab", Y="abab") - VERIFY(eq_sat("XY", "YX")); + // "ba" is a rotation of "ab" (p="b", q="a"). X = "a". + VERIFY(eq_sat("Xba", "abX")); - // Xab = abX (X commutes with the word "ab"; witness: X="ab") - VERIFY(eq_sat("Xab", "abX")); + // "cb" is a rotation of "bc" (p="c", q="b"). X = "b". + VERIFY(eq_sat("Xcb", "bcX")); - // XaY = YaX (swap-symmetric; witness: X=Y=any, e.g. X=Y="b") - VERIFY(eq_sat("XaY", "YaX")); + // "bca" is a rotation of "abc" (p="bc", q="a"). X = "a". + VERIFY(eq_sat("Xbca", "abcX")); - // XYX = YXY (Markov-type; witness: X=Y) - VERIFY(eq_sat("XYX", "YXY")); + // "cab" is a rotation of "abc" (p="c", q="ab"). X = "ab". + VERIFY(eq_sat("Xcab", "abcX")); - // XYZ = ZYX (reverse-palindrome; witness: X="a",Y="b",Z="a") - VERIFY(eq_sat("XYZ", "ZYX")); + // --------------------------------------------------------------- + // SAT — Power decomposition (ε NOT a solution) + // --------------------------------------------------------------- - // XabY = YabX (rotation-like; witness: X="",Y="ab") - VERIFY(eq_sat("XabY", "YabX")); + // XX = aa ⇒ X = "a". (ε gives "" ≠ "aa") + VERIFY(eq_sat("XX", "aa")); - // aXYa = aYXa (cancel outer 'a'; reduces to XY=YX; witness: X=Y="") - VERIFY(eq_sat("aXYa", "aYXa")); + // XbX = aba ⇒ 2|X| + 1 = 3 forces |X| = 1; X = "a". + // (ε gives "b" ≠ "aba") + VERIFY(eq_sat("XbX", "aba")); - // XaXb = YaYb (both halves share variable; witness: X=Y) - VERIFY(eq_sat("XaXb", "YaYb")); + // --------------------------------------------------------------- + // SAT — Multi-variable (ε NOT a solution) + // --------------------------------------------------------------- - // abXba = Xabba (witness: X="" gives "abba"="abba") - VERIFY(eq_sat("abXba", "Xabba")); + // X = "b", Y = "a" gives "baab" = "baab". (ε gives "ab" ≠ "ba") + VERIFY(eq_sat("XaYb", "bYaX")); - // --- UNSAT: first-character mismatch --- + // X = "b", Y = "a" gives "abba" = "abba". (ε gives "ab" ≠ "ba") + VERIFY(eq_sat("abXY", "YXba")); - // abXba = baXab (LHS starts 'a', RHS starts 'b') - VERIFY(eq_unsat("abXba", "baXab")); + // --------------------------------------------------------------- + // UNSAT — Non-conjugate rotation + // Xw₂ = w₁X where w₂ is NOT a rotation of w₁. + // Heads are variable vs char, so never a trivial first-char clash. + // GPowerIntr introduces periodicity, then the period boundaries + // give a character mismatch. + // --------------------------------------------------------------- - // XabX = XbaX (cancel X prefix/suffix → "ab"="ba"; 'a'≠'b') - VERIFY(eq_unsat("XabX", "XbaX")); + // "cba" is the reverse of "abc", NOT a rotation. + // Rotations of "abc" are: abc, bca, cab. + VERIFY(eq_unsat("Xcba", "abcX")); - // --- UNSAT: mismatch exposed after cancellation --- + // "acb" is a transposition of "abc", NOT a rotation. + VERIFY(eq_unsat("Xacb", "abcX")); - // XaYb = XbYa (cancel X prefix → aYb=bYa; first char 'a'≠'b') - VERIFY(eq_unsat("XaYb", "XbYa")); + // --------------------------------------------------------------- + // UNSAT — Induction via GPowerIntr + // One side starts with a variable that also appears on the other + // side behind a ground prefix → self-cycle. GPowerIntr forces + // periodicity; all period branches yield character mismatches. + // None of these has a trivial first-char or last-char clash. + // --------------------------------------------------------------- - // XaYbX = XbYaX (cancel X prefix+suffix → aYb=bYa; first char 'a'≠'b') - VERIFY(eq_unsat("XaYbX", "XbYaX")); + // Xa = bX: LHS head = X. Scan RHS: [b], X → self-cycle, base "b". + // X = b^n ⇒ b^n·a = b^{n+1} ⇒ last chars a ≠ b ⊥. + VERIFY(eq_unsat("Xa", "bX")); - // XaXbX = XbXaX (cancel X prefix+suffix → aXb=bXa; first char 'a'≠'b') - VERIFY(eq_unsat("XaXbX", "XbXaX")); + // acX = Xbc: RHS head = X. Scan LHS: [a,c], X → self-cycle, base "ac". + // X = (ac)^n ⇒ ac = bc ⊥ (a ≠ b). + // X = (ac)^n·a ⇒ aca = abc ⊥ (c ≠ b). + VERIFY(eq_unsat("acX", "Xbc")); - // --- UNSAT: induction --- - - // aXb = Xba (forces X=a^n; final step requires a=b) - VERIFY(eq_unsat("aXb", "Xba")); - - // XaY = YbX (a≠b; recursive unrolling forces a=b) - VERIFY(eq_unsat("XaY", "YbX")); - - // --- UNSAT: length parity --- - - // XaX = YY (|XaX|=2|X|+1 is odd; |YY|=2|Y| is even) - VERIFY(eq_unsat("XaX", "YY")); - - // XaaX = YbY (|XaaX|=2|X|+2 is even; |YbY|=2|Y|+1 is odd) - VERIFY(eq_unsat("XaaX", "YbY")); - - // --- UNSAT: midpoint argument --- - - // XaX = YbY (equal length forces |X|=|Y|; midpoint position |X| - // holds 'a' in LHS and 'b' in RHS, but 'a'≠'b') - VERIFY(eq_unsat("XaX", "YbY")); + // bcX = Xab: RHS head = X. Scan LHS: [b,c], X → self-cycle, base "bc". + // X = (bc)^n ⇒ bc = ab ⊥ (b ≠ a). + // X = (bc)^n·b ⇒ bcb = bab ⊥ (c ≠ a). + VERIFY(eq_unsat("bcX", "Xab")); std::cout << " ok\n"; } From 6d0468861db52a7f601fda2a59e3977b1439c663 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Wed, 11 Mar 2026 13:05:27 +0100 Subject: [PATCH 2/6] Fixed git merge problems --- src/smt/seq/seq_nielsen.cpp | 243 ++++-------------------------------- src/smt/seq/seq_nielsen.h | 8 +- 2 files changed, 30 insertions(+), 221 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 7fdee401a..e0d9bcf97 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -1392,7 +1392,7 @@ namespace seq { inc_run_idx(); svector cur_path; search_result r = search_dfs(m_root, 0, cur_path); - IF_VERBOSE(1, verbose_stream() << "nseq iter=" << iter << " depth_bound=" << m_depth_bound + IF_VERBOSE(1, verbose_stream() << " depth_bound=" << m_depth_bound << " dfs_nodes=" << m_stats.m_num_dfs_nodes << " max_depth=" << m_stats.m_max_depth << " extensions=" << m_stats.m_num_extensions @@ -2951,168 +2951,6 @@ namespace seq { expr_ref nielsen_graph::int_constraint_to_expr(int_constraint const& ic) { ast_manager& m = m_sg.get_manager(); arith_util arith(m); -<<<<<<< Updated upstream -======= - - // If already mapped, return cached - unsigned eid = e->get_id(); - lp::lpvar j; - if (m_expr2lpvar.find(eid, j)) - return j; - - // Check if this is a numeral — shouldn't be called with a numeral directly, - // but handle it gracefully by creating a fixed variable - rational val; - if (arith.is_numeral(e, val)) { - j = m_lp_solver->add_var(m_lp_ext_cnt++, true); - m_lp_solver->add_var_bound(j, lp::EQ, lp::mpq(val)); - m_expr2lpvar.insert(eid, j); - return j; - } - - // Decompose linear arithmetic: a + b → create LP term (1*a + 1*b) - expr* e1 = nullptr; - expr* e2 = nullptr; - if (arith.is_add(e, e1, e2)) { - lp::lpvar j1 = lp_ensure_var(e1); - lp::lpvar j2 = lp_ensure_var(e2); - if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(1), j1}); - coeffs.push_back({lp::mpq(1), j2}); - j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - - // Decompose: a - b → create LP term (1*a + (-1)*b) - if (arith.is_sub(e, e1, e2)) { - lp::lpvar j1 = lp_ensure_var(e1); - lp::lpvar j2 = lp_ensure_var(e2); - if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(1), j1}); - coeffs.push_back({lp::mpq(-1), j2}); - j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - - // Decompose: c * x where c is numeral → create LP term (c*x) - if (arith.is_mul(e, e1, e2)) { - rational coeff; - if (arith.is_numeral(e1, coeff)) { - lp::lpvar jx = lp_ensure_var(e2); - if (jx == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(coeff), jx}); - j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - if (arith.is_numeral(e2, coeff)) { - lp::lpvar jx = lp_ensure_var(e1); - if (jx == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(coeff), jx}); - j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - } - - // Decompose: -x → create LP term (-1*x) - if (arith.is_uminus(e, e1)) { - lp::lpvar jx = lp_ensure_var(e1); - if (jx == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(-1), jx}); - j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - - // Leaf expression (variable, length, or other opaque term): map to fresh LP var - j = m_lp_solver->add_var(m_lp_ext_cnt++, true /* is_integer */); - m_expr2lpvar.insert(eid, j); - return j; - } - - void nielsen_graph::lp_add_constraint(int_constraint const& ic) { - if (!m_lp_solver || !ic.m_lhs || !ic.m_rhs) - return; - - ast_manager& m = m_sg.get_manager(); - arith_util arith(m); - - // We handle simple patterns: - // expr numeral → add_var_bound - // expr expr → create term (lhs - rhs) and bound to 0 - rational val; - bool is_num_rhs = arith.is_numeral(ic.m_rhs, val); - - if (is_num_rhs && (is_app(ic.m_lhs))) { - // Simple case: single variable or len(x) vs numeral - lp::lpvar j = lp_ensure_var(ic.m_lhs); - if (j == lp::null_lpvar) return; - lp::mpq rhs_val(val); - switch (ic.m_kind) { - case int_constraint_kind::eq: - m_lp_solver->add_var_bound(j, lp::EQ, rhs_val); - break; - case int_constraint_kind::le: - m_lp_solver->add_var_bound(j, lp::LE, rhs_val); - break; - case int_constraint_kind::ge: - m_lp_solver->add_var_bound(j, lp::GE, rhs_val); - break; - } - return; - } - - // General case: create term lhs - rhs and bound it - // For now, handle single variable on LHS vs single variable on RHS - // (covers the common case of len(x) = len(y), n >= 0, etc.) - bool is_num_lhs = arith.is_numeral(ic.m_lhs, val); - if (is_num_lhs) { - // numeral expr → reverse and flip kind - lp::lpvar j = lp_ensure_var(ic.m_rhs); - if (j == lp::null_lpvar) return; - lp::mpq lhs_val(val); - switch (ic.m_kind) { - case int_constraint_kind::eq: - m_lp_solver->add_var_bound(j, lp::EQ, lhs_val); - break; - case int_constraint_kind::le: - // val <= rhs ⇔ rhs >= val - m_lp_solver->add_var_bound(j, lp::GE, lhs_val); - break; - case int_constraint_kind::ge: - // val >= rhs ⇔ rhs <= val - m_lp_solver->add_var_bound(j, lp::LE, lhs_val); - break; - } - return; - } - - // Both sides are expressions: create term (lhs - rhs) with bound 0 - lp::lpvar j_lhs = lp_ensure_var(ic.m_lhs); - lp::lpvar j_rhs = lp_ensure_var(ic.m_rhs); - if (j_lhs == lp::null_lpvar || j_rhs == lp::null_lpvar) return; - - // If both sides map to the same LP variable, the constraint is - // trivially satisfied (lhs - rhs = 0). Skip to avoid the empty-term - // assertion in add_term. - if (j_lhs == j_rhs) return; - - // Create a term: 1*lhs + (-1)*rhs - vector> coeffs; - coeffs.push_back({lp::mpq(1), j_lhs}); - coeffs.push_back({lp::mpq(-1), j_rhs}); - lp::lpvar term_j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); - - lp::mpq zero(0); ->>>>>>> Stashed changes switch (ic.m_kind) { case int_constraint_kind::eq: return expr_ref(m.mk_eq(ic.m_lhs, ic.m_rhs), m); @@ -3162,6 +3000,30 @@ namespace seq { return result != l_false; } + bool nielsen_graph::check_lp_le(expr* lhs, expr* rhs, + nielsen_node* node, + svector const& cur_path) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + + vector constraints; + collect_path_int_constraints(node, cur_path, constraints); + + m_solver.push(); + for (auto const& ic : constraints) + m_solver.assert_expr(int_constraint_to_expr(ic)); + + // Assert NOT(lhs <= rhs), i.e., lhs > rhs, i.e., lhs >= rhs + 1 + // If this is unsat, then lhs <= rhs is entailed by the path constraints. + expr_ref one(arith.mk_int(1), m); + expr_ref rhs_plus_one(arith.mk_add(rhs, one), m); + m_solver.assert_expr(arith.mk_ge(lhs, rhs_plus_one)); + + lbool result = m_solver.check(); + m_solver.pop(1); + return result == l_false; + } + int_constraint nielsen_graph::mk_int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep) { @@ -3175,61 +3037,6 @@ namespace seq { return exp_snode ? exp_snode->get_expr() : nullptr; } - bool nielsen_graph::check_lp_le(expr* a, expr* b, nielsen_node* node, svector const& cur_path) { - // Check if the path constraints entail a ≤ b. - // Strategy: add all constraints, then assert a > b (i.e. a - b ≥ 1). - // If infeasible, then a ≤ b is entailed. - // - // Uses fresh LP variables to avoid the add_term empty-term assertion - // that occurs when compound expressions share LP variables. - vector constraints; - collect_path_int_constraints(node, cur_path, constraints); - if (constraints.empty()) - return false; // no constraints → can't determine - - lp_solver_reset(); - for (auto const& ic : constraints) - lp_add_constraint(ic); - - // Create fresh LP variables to safely represent a - b ≥ 1 - lp::lpvar fa = m_lp_solver->add_var(m_lp_ext_cnt++, true); - lp::lpvar fb = m_lp_solver->add_var(m_lp_ext_cnt++, true); - - // fa = a (via fa - lp(a) = 0) - lp::lpvar ja = lp_ensure_var(a); - if (ja == lp::null_lpvar) return false; - if (fa != ja) { - vector> c1; - c1.push_back({lp::mpq(1), fa}); - c1.push_back({lp::mpq(-1), ja}); - lp::lpvar t1 = m_lp_solver->add_term(c1, m_lp_ext_cnt++); - m_lp_solver->add_var_bound(t1, lp::EQ, lp::mpq(0)); - } - - // fb = b (via fb - lp(b) = 0) - lp::lpvar jb = lp_ensure_var(b); - if (jb == lp::null_lpvar) return false; - if (fb != jb) { - vector> c2; - c2.push_back({lp::mpq(1), fb}); - c2.push_back({lp::mpq(-1), jb}); - lp::lpvar t2 = m_lp_solver->add_term(c2, m_lp_ext_cnt++); - m_lp_solver->add_var_bound(t2, lp::EQ, lp::mpq(0)); - } - - // Assert fa - fb ≥ 1 (i.e. a > b) using only fresh vars - { - vector> c3; - c3.push_back({lp::mpq(1), fa}); - c3.push_back({lp::mpq(-1), fb}); - lp::lpvar t3 = m_lp_solver->add_term(c3, m_lp_ext_cnt++); - m_lp_solver->add_var_bound(t3, lp::GE, lp::mpq(1)); - } - - lp::lp_status status = m_lp_solver->find_feasible_solution(); - return status == lp::lp_status::INFEASIBLE; // a > b infeasible ⟹ a ≤ b - } - expr_ref nielsen_graph::mk_fresh_int_var() { ast_manager& m = m_sg.get_manager(); arith_util arith(m); diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 7beb16b38..4f5e24572 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -944,9 +944,11 @@ namespace seq { // returns true if feasible, false if infeasible. bool check_int_feasibility(nielsen_node* node, svector const& cur_path); - // check if path constraints entail a <= b. - // Strategy: add all path constraints + (a > b) and check for infeasibility. - bool check_lp_le(expr* a, expr* b, nielsen_node* node, svector const& cur_path); + // check whether lhs <= rhs is implied by the path constraints. + // mirrors ZIPT's NielsenNode.IsLe(): collects path constraints, + // asserts their conjunction plus NOT(lhs <= rhs), and returns true + // iff the result is unsatisfiable (i.e., lhs <= rhs is entailed). + bool check_lp_le(expr* lhs, expr* rhs, nielsen_node* node, svector const& cur_path); // create an integer constraint: lhs rhs int_constraint mk_int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep); From d23f376b39312825582447d09bc264b695ca9ad3 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Wed, 11 Mar 2026 16:44:14 +0100 Subject: [PATCH 3/6] Fixed a lot regarding powers, but there seems to be a model reconstruction bug left --- src/smt/seq/seq_nielsen.cpp | 659 +++++++++++++++++++++++++++--------- src/smt/seq/seq_nielsen.h | 15 +- 2 files changed, 512 insertions(+), 162 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index e0d9bcf97..2ded22793 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -22,6 +22,7 @@ Author: #include "smt/seq/seq_nielsen.h" #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" +#include "ast/rewriter/th_rewriter.h" #include "util/hashtable.h" #include #include @@ -29,6 +30,16 @@ Author: namespace seq { + // Normalize an arithmetic expression using th_rewriter. + // Simplifies e.g. (n - 1 + 1) to n, preventing unbounded growth + // of power exponents during unwind/merge cycles. + static expr_ref normalize_arith(ast_manager& m, expr* e) { + expr_ref result(e, m); + th_rewriter rw(m); + rw(result); + return result; + } + // ----------------------------------------------- // str_eq // ----------------------------------------------- @@ -599,6 +610,79 @@ namespace seq { return result; } + // Helper: render an arithmetic/integer expression in infix HTML notation. + // Recognises +, -, *, unary minus, numerals, str.len, and named constants; + // falls back to HTML-escaped mk_pp for anything else. + static std::string arith_expr_html(expr* e, ast_manager& m) { + if (!e) return "null"; + arith_util arith(m); + seq_util seq(m); + rational val; + if (arith.is_numeral(e, val)) + return val.to_string(); + if (!is_app(e)) { + std::ostringstream os; + os << mk_pp(e, m); + return dot_html_escape(os.str()); + } + app* a = to_app(e); + expr* x, * y; + if (arith.is_add(e)) { + std::string r = arith_expr_html(a->get_arg(0), m); + for (unsigned i = 1; i < a->get_num_args(); ++i) { + expr* arg = a->get_arg(i); + // render (+ x (- y)) as "x - y" and (+ x (- n)) as "x - n" + expr* neg_inner; + rational neg_val; + if (arith.is_uminus(arg, neg_inner)) { + r += " − "; // minus sign + r += arith_expr_html(neg_inner, m); + } else if (arith.is_numeral(arg, neg_val) && neg_val.is_neg()) { + r += " − "; // minus sign + r += (-neg_val).to_string(); + } else { + r += " + "; + r += arith_expr_html(arg, m); + } + } + return r; + } + if (arith.is_sub(e, x, y)) + return arith_expr_html(x, m) + " − " + arith_expr_html(y, m); + if (arith.is_uminus(e, x)) + return "−" + arith_expr_html(x, m); + if (arith.is_mul(e)) { + std::string r; + for (unsigned i = 0; i < a->get_num_args(); ++i) { + if (i > 0) r += " · "; // middle dot + r += arith_expr_html(a->get_arg(i), m); + } + return r; + } + if (seq.str.is_length(e, x)) { + return "|" + dot_html_escape(to_app(x)->get_decl()->get_name().str()) + "|"; + } + // named constant (fresh variable like n!0) + if (a->get_num_args() == 0) + return dot_html_escape(a->get_decl()->get_name().str()); + // fallback + std::ostringstream os; + os << mk_pp(e, m); + return dot_html_escape(os.str()); + } + + // Helper: render an int_constraint as an HTML string for DOT edge labels. + static std::string int_constraint_html(int_constraint const& ic, ast_manager& m) { + std::string r = arith_expr_html(ic.m_lhs, m); + switch (ic.m_kind) { + case int_constraint_kind::eq: r += " = "; break; + case int_constraint_kind::le: r += " ≤ "; break; // ≤ + case int_constraint_kind::ge: r += " ≥ "; break; // ≥ + } + r += arith_expr_html(ic.m_rhs, m); + return r; + } + // Helper: render an snode as an HTML label for DOT output. // Groups consecutive s_char tokens into quoted strings, renders s_var by name, // shows s_power with superscripts, s_unit by its inner expression, @@ -675,9 +759,7 @@ namespace seq { result += base_html; result += ""; expr* exp_expr = to_app(e)->get_arg(1); - std::ostringstream os; - os << mk_pp(exp_expr, m); - result += dot_html_escape(os.str()); + result += arith_expr_html(exp_expr, m); result += ""; } else { std::ostringstream os; @@ -722,6 +804,11 @@ namespace seq { out << "?" << kv.m_key << " ≠ ?" << d->id() << "
"; } } + // integer constraints + for (auto const& ic : m_int_constraints) { + if (!any) { out << "Cnstr:
"; any = true; } + out << int_constraint_html(ic, m) << "
"; + } if (!any) out << "⊤"; // ⊤ (trivially satisfied) @@ -823,6 +910,34 @@ namespace seq { out << "?" << cs.m_var->id() << " → ?" << cs.m_val->id(); } + // side constraints: string equalities + for (auto const* eq : e->side_str_eq()) { + if (!first) out << "
"; + first = false; + out << "" + << snode_label_html(eq->m_lhs, m) + << " = " + << snode_label_html(eq->m_rhs, m) + << ""; + } + // side constraints: regex memberships + for (auto const* mem : e->side_str_mem()) { + if (!first) out << "
"; + first = false; + out << "" + << snode_label_html(mem->m_str, m) + << " ∈ " + << snode_label_html(mem->m_regex, m) + << ""; + } + // side constraints: integer equalities/inequalities + for (auto const& ic : e->side_int()) { + if (!first) out << "
"; + first = false; + out << "" + << int_constraint_html(ic, m) + << ""; + } out << ">"; // colour @@ -963,8 +1078,11 @@ namespace seq { while (i < tokens.size()) { euf::snode* tok = tokens[i]; - // Case 1: current is a power token — absorb following same-base tokens - if (tok->is_power()) { + // Case 1: current is a power token — absorb following same-base tokens. + // Skip at leading position (i == 0) to keep exponents small: CommPower + // cross-side cancellation works better with unmerged leading powers + // (e.g., w^k trivially ≤ 1+k, but w^(2k) vs 1+k requires k ≥ 1). + if (tok->is_power() && i > 0) { expr* base_e = get_power_base_expr(tok); expr* exp_acc = get_power_exp_expr(tok); if (!base_e || !exp_acc) { result.push_back(tok); ++i; continue; } @@ -988,7 +1106,8 @@ namespace seq { } if (local_merged) { merged = true; - expr_ref new_pow(seq.str.mk_power(base_e, exp_acc), m); + expr_ref norm_exp = normalize_arith(m, exp_acc); + expr_ref new_pow(seq.str.mk_power(base_e, norm_exp), m); result.push_back(sg.mk(new_pow)); } else { result.push_back(tok); @@ -997,8 +1116,10 @@ namespace seq { continue; } - // Case 2: current is a char — check if next is a same-base power - if (tok->is_char() && tok->get_expr() && i + 1 < tokens.size()) { + // Case 2: current is a char — check if next is a same-base power. + // Skip at leading position (i == 0) to avoid undoing power unwinding: + // unwind produces u · u^(n-1); merging it back to u^n creates an infinite cycle. + if (i > 0 && tok->is_char() && tok->get_expr() && i + 1 < tokens.size()) { euf::snode* next = tokens[i + 1]; if (next->is_power() && get_power_base_expr(next) == tok->get_expr()) { expr* base_e = tok->get_expr(); @@ -1020,7 +1141,8 @@ namespace seq { break; } merged = true; - expr_ref new_pow(seq.str.mk_power(base_e, exp_acc), m); + expr_ref norm_exp = normalize_arith(m, exp_acc); + expr_ref new_pow(seq.str.mk_power(base_e, norm_exp), m); result.push_back(sg.mk(new_pow)); i = j; continue; @@ -1088,6 +1210,71 @@ namespace seq { return rebuilt; } + // CommPower: count how many times a power's base pattern appears in + // the prefix of the other side. Mirrors ZIPT's CommPower helper. + // Returns (count_expr, num_tokens_consumed). count_expr is nullptr + // when no complete base-pattern match is found. + static std::pair comm_power( + euf::snode* base_sn, euf::snode* side, ast_manager& m) { + arith_util arith(m); + euf::snode_vector base_tokens, side_tokens; + base_sn->collect_tokens(base_tokens); + side->collect_tokens(side_tokens); + if (base_tokens.empty() || side_tokens.empty()) + return {expr_ref(nullptr, m), 0}; + + expr* sum = nullptr; + unsigned pos = 0; + expr* last_stable_sum = nullptr; + unsigned last_stable_idx = 0; + + unsigned i = 0; + for (; i < side_tokens.size(); i++) { + euf::snode* t = side_tokens[i]; + if (pos == 0) { + last_stable_idx = i; + last_stable_sum = sum; + } + // Case 1: direct token match with base pattern + if (pos < base_tokens.size() && t == base_tokens[pos]) { + pos++; + if (pos >= base_tokens.size()) { + pos = 0; + sum = sum ? arith.mk_add(sum, arith.mk_int(1)) + : arith.mk_int(1); + } + continue; + } + // Case 2: power token whose base matches our base pattern (at pos == 0) + if (pos == 0 && t->is_power()) { + euf::snode* pow_base = t->arg(0); + if (pow_base) { + euf::snode_vector pb_tokens; + pow_base->collect_tokens(pb_tokens); + if (pb_tokens.size() == base_tokens.size()) { + bool match = true; + for (unsigned j = 0; j < pb_tokens.size() && match; j++) + match = (pb_tokens[j] == base_tokens[j]); + if (match) { + expr* pow_exp = get_power_exp_expr(t); + if (pow_exp) { + sum = sum ? arith.mk_add(sum, pow_exp) : pow_exp; + continue; + } + } + } + } + } + break; + } + // After loop: i = break index or side_tokens.size() + if (pos == 0) { + last_stable_idx = i; + last_stable_sum = sum; + } + return {expr_ref(last_stable_sum, m), last_stable_idx}; + } + simplify_result nielsen_node::simplify_and_init(nielsen_graph& g, svector const& cur_path) { euf::sgraph& sg = g.sg(); ast_manager& m = sg.get_manager(); @@ -1195,9 +1382,68 @@ namespace seq { if (euf::snode* s = merge_adjacent_powers(sg, eq.m_rhs)) { eq.m_rhs = s; changed = true; } - // 3c: power prefix elimination — when both sides start with a + // 3c: CommPower-based power elimination — when one side starts + // with a power w^p, count base-pattern occurrences c on the + // other side's prefix. If we can determine the ordering between + // p and c, cancel the matched portion. Mirrors ZIPT's + // SimplifyPowerElim calling CommPower. + if (!eq.m_lhs || !eq.m_rhs) continue; + for (int dir = 0; dir < 2; dir++) { + euf::snode*& pow_side = (dir == 0) ? eq.m_lhs : eq.m_rhs; + euf::snode*& other_side = (dir == 0) ? eq.m_rhs : eq.m_lhs; + if (!pow_side || !other_side) continue; + euf::snode* first_tok = pow_side->first(); + if (!first_tok || !first_tok->is_power()) continue; + euf::snode* base_sn = first_tok->arg(0); + expr* pow_exp = get_power_exp_expr(first_tok); + if (!base_sn || !pow_exp) continue; + + auto [count, consumed] = comm_power(base_sn, other_side, m); + if (!count.get() || consumed == 0) continue; + + expr_ref norm_count = normalize_arith(m, count); + bool pow_le_count = false, count_le_pow = false; + rational diff; + if (get_const_power_diff(norm_count, pow_exp, arith, diff)) { + count_le_pow = diff.is_nonpos(); + pow_le_count = diff.is_nonneg(); + } else if (!cur_path.empty()) { + pow_le_count = g.check_lp_le(pow_exp, norm_count, this, cur_path); + count_le_pow = g.check_lp_le(norm_count, pow_exp, this, cur_path); + } + if (!pow_le_count && !count_le_pow) continue; + + pow_side = sg.drop_first(pow_side); + other_side = sg.drop_left(other_side, consumed); + expr* base_e = get_power_base_expr(first_tok); + if (pow_le_count && count_le_pow) { + // equal: both cancel completely + } else if (pow_le_count) { + // pow <= count: remainder goes to other_side + expr_ref rem = normalize_arith(m, arith.mk_sub(norm_count, pow_exp)); + expr_ref pw(seq.str.mk_power(base_e, rem), m); + other_side = sg.mk_concat(sg.mk(pw), other_side); + } else { + // count <= pow: remainder goes to pow_side + expr_ref rem = normalize_arith(m, arith.mk_sub(pow_exp, norm_count)); + expr_ref pw(seq.str.mk_power(base_e, rem), m); + pow_side = sg.mk_concat(sg.mk(pw), pow_side); + } + changed = true; + break; + } + + // After any change in passes 3a–3c, restart the while loop + // before running 3d/3e. This lets 3a simplify new constant- + // exponent powers (e.g. base^1 → base created by 3c) before + // 3e attempts LP-based elimination which would introduce a + // needless fresh variable. + if (changed) continue; + + // 3d: power prefix elimination — when both sides start with a // power of the same base, cancel the common power prefix. - // Mirrors ZIPT's SimplifyPowerElim + CommPower. + // (Subsumed by 3c for many cases, but handles same-base-power + // pairs that CommPower may miss when both leading tokens are powers.) if (!eq.m_lhs || !eq.m_rhs) continue; euf::snode* lh = eq.m_lhs->first(); euf::snode* rh = eq.m_rhs->first(); @@ -1226,7 +1472,7 @@ namespace seq { // diff == 0: both powers cancel completely changed = true; } - // 3d: LP-aware power prefix elimination + // 3e: LP-aware power prefix elimination // When the exponent difference is non-constant, query the // LP solver to determine which exponent is larger, and // cancel deterministically. This avoids the branching @@ -1368,50 +1614,59 @@ namespace seq { if (!m_root) return search_result::sat; - ++m_stats.m_num_solve_calls; - m_sat_node = nullptr; - m_sat_path.reset(); + try { + ++m_stats.m_num_solve_calls; + m_sat_node = nullptr; + m_sat_path.reset(); - // Constraint.Shared: assert root-level length/Parikh constraints to the - // solver at the base level, so they are visible during all feasibility checks. - assert_root_constraints_to_solver(); + // Constraint.Shared: assert root-level length/Parikh constraints to the + // solver at the base level, so they are visible during all feasibility checks. + assert_root_constraints_to_solver(); - // Iterative deepening: increment by 1 on each failure. - // m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it. - m_depth_bound = 10; - while (true) { - if (m_cancel_fn && m_cancel_fn()) { + // Iterative deepening: increment by 1 on each failure. + // m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it. + m_depth_bound = 10; + while (true) { + if (m_cancel_fn && m_cancel_fn()) { #ifdef Z3DEBUG - // Examining the Nielsen graph is probably the best way of debugging - std::string dot = to_dot(); + // Examining the Nielsen graph is probably the best way of debugging + std::string dot = to_dot(); #endif - break; + break; + } + if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) + break; + inc_run_idx(); + svector cur_path; + search_result r = search_dfs(m_root, 0, cur_path); + IF_VERBOSE(1, verbose_stream() + << " depth_bound=" << m_depth_bound << " dfs_nodes=" << m_stats.m_num_dfs_nodes + << " max_depth=" << m_stats.m_max_depth << " extensions=" << m_stats.m_num_extensions + << " arith_prune=" << m_stats.m_num_arith_infeasible << " result=" + << (r == search_result::sat ? "SAT" + : r == search_result::unsat ? "UNSAT" + : "UNKNOWN") + << "\n";); + if (r == search_result::sat) { + ++m_stats.m_num_sat; + return r; + } + if (r == search_result::unsat) { + ++m_stats.m_num_unsat; + return r; + } + // depth limit hit – double the bound and retry + m_depth_bound *= 2; } - if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) - break; - inc_run_idx(); - svector cur_path; - search_result r = search_dfs(m_root, 0, cur_path); - IF_VERBOSE(1, verbose_stream() << " depth_bound=" << m_depth_bound - << " dfs_nodes=" << m_stats.m_num_dfs_nodes - << " max_depth=" << m_stats.m_max_depth - << " extensions=" << m_stats.m_num_extensions - << " arith_prune=" << m_stats.m_num_arith_infeasible - << " result=" << (r == search_result::sat ? "SAT" : r == search_result::unsat ? "UNSAT" : "UNKNOWN") - << "\n";); - if (r == search_result::sat) { - ++m_stats.m_num_sat; - return r; - } - if (r == search_result::unsat) { - ++m_stats.m_num_unsat; - return r; - } - // depth limit hit – double the bound and retry - m_depth_bound *= 2; + ++m_stats.m_num_unknown; + return search_result::unknown; + } + catch (...) { +#ifdef Z3DEBUG + std::string dot = to_dot(); +#endif + throw; } - ++m_stats.m_num_unknown; - return search_result::unknown; } nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, unsigned depth, svector& cur_path) { @@ -2096,6 +2351,12 @@ namespace seq { if (apply_num_cmp(node)) return ++m_stats.m_mod_num_cmp, true; + // Priority 3b: SplitPowerElim - CommPower-based branching when + // one side has a power and the other side has same-base occurrences + // but ordering is unknown. + if (apply_split_power_elim(node)) + return ++m_stats.m_mod_split_power_elim, true; + // Priority 4: ConstNumUnwinding - power vs constant: n=0 or peel if (apply_const_num_unwinding(node)) return ++m_stats.m_mod_const_num_unwinding, true; @@ -2164,7 +2425,7 @@ namespace seq { // Returns true if found, sets power, other_head, eq_out. // ----------------------------------------------------------------------- - bool nielsen_graph::find_power_vs_const(nielsen_node* node, + bool nielsen_graph::find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const { @@ -2179,10 +2440,16 @@ namespace seq { // Power vs variable is handled by PowerSplit (priority 11). // Power vs empty is handled by PowerEpsilon (priority 2). if (lhead && lhead->is_power() && rhead && !rhead->is_var() && !rhead->is_empty()) { - power = lhead; other_head = rhead; eq_out = &eq; return true; + power = lhead; + other_head = rhead; + eq_out = &eq; + return true; } if (rhead && rhead->is_power() && lhead && !lhead->is_var() && !lhead->is_empty()) { - power = rhead; other_head = lhead; eq_out = &eq; return true; + power = rhead; + other_head = lhead; + eq_out = &eq; + return true; } } return false; @@ -2283,88 +2550,126 @@ namespace seq { bool nielsen_graph::apply_num_cmp(nielsen_node* node) { ast_manager& m = m_sg.get_manager(); arith_util arith(m); - seq_util& seq = m_sg.get_seq_util(); // Look for two power tokens with the same base on opposite sides for (str_eq const& eq : node->str_eqs()) { - if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; euf::snode* lhead = eq.m_lhs->first(); euf::snode* rhead = eq.m_rhs->first(); if (!lhead || !rhead) continue; - if (!lhead->is_power() || !rhead->is_power()) continue; - if (lhead->num_args() < 1 || rhead->num_args() < 1) continue; + if (!lhead->is_power() || !rhead->is_power()) + continue; + if (lhead->num_args() < 1 || rhead->num_args() < 1) + continue; // same base: compare the two powers - if (lhead->arg(0)->id() != rhead->arg(0)->id()) continue; + if (lhead->arg(0)->id() != rhead->arg(0)->id()) + continue; // Skip if the exponents differ by a constant — simplify_and_init's // power prefix elimination already handles that case. expr* exp_m = get_power_exponent(lhead); expr* exp_n = get_power_exponent(rhead); - if (!exp_m || !exp_n) continue; + if (!exp_m || !exp_n) + continue; + rational diff; + SASSERT(!get_const_power_diff(exp_n, exp_m, arith, diff)); // handled by simplification + + // Only add ordering constraints — do NOT use global substitution. + // The child's simplify_and_init (pass 3d/3e) will see same-base + // leading powers and cancel them using the LP-entailed ordering. + + // Branch 1 (explored first): n < m (i.e., m ≥ n + 1) + // After constraint, simplify_and_init sees m > n and cancels u^n, + // leaving u^(m-n) on LHS. { - rational diff; - if (get_const_power_diff(exp_n, exp_m, arith, diff)) - continue; // handled by simplification - } - - expr* base_expr = get_power_base_expr(lhead); - if (!base_expr) continue; - expr* zero = arith.mk_int(0); - expr* one = arith.mk_int(1); - - // Use fresh integer variables for the difference exponents to avoid - // LP solver issues when arith.mk_sub produces terms that simplify - // to constants after branch constraint propagation. - - // Branch 1 (explored first): n < m → cancel u^n, leave u^d on LHS - // where d = m - n >= 1. Explored first because d≥1 means the - // difference power is non-empty, making progress in the equation. - { - expr_ref d(mk_fresh_int_var()); - expr_ref diff_pow(seq.str.mk_power(base_expr, d), m); nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); - // u^n → ε - nielsen_subst s1(rhead, m_sg.mk_empty(), eq.m_dep); - e->add_subst(s1); - child->apply_subst(m_sg, s1); - // u^m → u^d - nielsen_subst s2(lhead, m_sg.mk(diff_pow), eq.m_dep); - e->add_subst(s2); - child->apply_subst(m_sg, s2); - // d >= 1 - e->add_side_int(mk_int_constraint(d, one, int_constraint_kind::ge, eq.m_dep)); - // d + n = m (defines d = m - n) - expr_ref d_plus_n(arith.mk_add(d, exp_n), m); - e->add_side_int(mk_int_constraint(d_plus_n, exp_m, int_constraint_kind::eq, eq.m_dep)); + expr_ref n_plus_1(arith.mk_add(exp_n, arith.mk_int(1)), m); + e->add_side_int(mk_int_constraint( + exp_m, n_plus_1, + int_constraint_kind::ge, eq.m_dep)); } - // Branch 2 (explored second): m <= n → cancel u^m, leave u^d on RHS - // where d = n - m >= 0 + // Branch 2 (explored second): m ≤ n (i.e., n ≥ m) { - expr_ref d(mk_fresh_int_var()); - expr_ref diff_pow(seq.str.mk_power(base_expr, d), m); nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); - // u^m → ε - nielsen_subst s1(lhead, m_sg.mk_empty(), eq.m_dep); - e->add_subst(s1); - child->apply_subst(m_sg, s1); - // u^n → u^d - nielsen_subst s2(rhead, m_sg.mk(diff_pow), eq.m_dep); - e->add_subst(s2); - child->apply_subst(m_sg, s2); - // d >= 0 - e->add_side_int(mk_int_constraint(d, zero, int_constraint_kind::ge, eq.m_dep)); - // d + m = n (defines d = n - m) - expr_ref d_plus_m(arith.mk_add(d, exp_m), m); - e->add_side_int(mk_int_constraint(d_plus_m, exp_n, int_constraint_kind::eq, eq.m_dep)); + e->add_side_int(mk_int_constraint( + exp_n, exp_m, + int_constraint_kind::ge, eq.m_dep)); } return true; } return false; } + // ----------------------------------------------------------------------- + // Modifier: apply_split_power_elim + // When one side starts with a power w^p, call CommPower on the other + // side to count base-pattern occurrences c. If c > 0 and the ordering + // between p and c cannot be determined, create two branches: + // Branch 1: p < c (add constraint c ≥ p + 1) + // Branch 2: c ≤ p (add constraint p ≥ c) + // After branching, simplify_and_init's CommPower pass (3c) can resolve + // the ordering deterministically and cancel the matched portion. + // mirrors ZIPT's SplitPowerElim + NumCmpModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_split_power_elim(nielsen_node* node) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) continue; + if (!eq.m_lhs || !eq.m_rhs) continue; + + for (int dir = 0; dir < 2; dir++) { + euf::snode* pow_side = (dir == 0) ? eq.m_lhs : eq.m_rhs; + euf::snode* other_side = (dir == 0) ? eq.m_rhs : eq.m_lhs; + if (!pow_side || !other_side) continue; + + euf::snode* first_tok = pow_side->first(); + if (!first_tok || !first_tok->is_power()) continue; + euf::snode* base_sn = first_tok->arg(0); + expr* pow_exp = get_power_exp_expr(first_tok); + if (!base_sn || !pow_exp) continue; + + auto [count, consumed] = comm_power(base_sn, other_side, m); + if (!count.get() || consumed == 0) continue; + + expr_ref norm_count = normalize_arith(m, count); + + // Skip if ordering is already deterministic — simplify_and_init + // pass 3c should have handled it. + rational diff; + if (get_const_power_diff(norm_count, pow_exp, arith, diff)) + continue; + + // Branch 1: pow_exp < count (i.e., count ≥ pow_exp + 1) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + expr_ref pow_plus1(arith.mk_add(pow_exp, arith.mk_int(1)), m); + e->add_side_int(mk_int_constraint( + norm_count, pow_plus1, + int_constraint_kind::ge, eq.m_dep)); + } + // Branch 2: count ≤ pow_exp (i.e., pow_exp ≥ count) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + e->add_side_int(mk_int_constraint( + pow_exp, norm_count, + int_constraint_kind::ge, eq.m_dep)); + } + return true; + } + } + return false; + } + // ----------------------------------------------------------------------- // Modifier: apply_const_num_unwinding // For a power token u^n facing a constant (char) head, @@ -2374,58 +2679,54 @@ namespace seq { // ----------------------------------------------------------------------- bool nielsen_graph::apply_const_num_unwinding(nielsen_node* node) { - ast_manager& m = m_sg.get_manager(); + ast_manager &m = m_sg.get_manager(); arith_util arith(m); - euf::snode* power = nullptr; - euf::snode* other_head = nullptr; - str_eq const* eq = nullptr; - if (!find_power_vs_const(node, power, other_head, eq)) + euf::snode *power = nullptr; + euf::snode *other_head = nullptr; + str_eq const *eq = nullptr; + if (!find_power_vs_non_var(node, power, other_head, eq)) return false; SASSERT(power->is_power() && power->num_args() >= 1); - euf::snode* base = power->arg(0); - expr* exp_n = get_power_exponent(power); - expr* zero = arith.mk_int(0); - expr* one = arith.mk_int(1); + euf::snode *base = power->arg(0); + expr *exp_n = get_power_exponent(power); + expr *zero = arith.mk_int(0); + expr *one = arith.mk_int(1); - // Branch 1 (explored first): n >= 1 → peel one u: replace u^n with u · u^(n-1) + // Branch 1 (explored first): n = 0 → replace power with ε (progress) + // Side constraint: n = 0 + nielsen_node *child = mk_child(node); + nielsen_edge *e = mk_edge(node, child, true); + nielsen_subst s1(power, m_sg.mk_empty(), eq->m_dep); + e->add_subst(s1); + child->apply_subst(m_sg, s1); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep)); + + // Branch 2 (explored second): n >= 1 → peel one u: replace u^n with u · u^(n-1) // Side constraint: n >= 1 // Create a proper nested power base^(n-1) instead of a fresh string variable. // This preserves power structure so that simplify_and_init can merge and // cancel adjacent same-base powers (mirroring ZIPT's SimplifyPowerUnwind). // Explored first because the n≥1 branch is typically more productive // for SAT instances (preserves power structure). - { - seq_util& seq = m_sg.get_seq_util(); - expr* power_e = power->get_expr(); - SASSERT(power_e); - expr* base_expr = to_app(power_e)->get_arg(0); - expr_ref n_minus_1(arith.mk_sub(exp_n, one), m); - expr_ref nested_pow(seq.str.mk_power(base_expr, n_minus_1), m); - euf::snode* nested_power_snode = m_sg.mk(nested_pow); + seq_util &seq = m_sg.get_seq_util(); + expr *power_e = power->get_expr(); + SASSERT(power_e); + expr *base_expr = to_app(power_e)->get_arg(0); + expr_ref n_minus_1 = normalize_arith(m, arith.mk_sub(exp_n, one)); + expr_ref nested_pow(seq.str.mk_power(base_expr, n_minus_1), m); + euf::snode *nested_power_snode = m_sg.mk(nested_pow); - euf::snode* replacement = m_sg.mk_concat(base, nested_power_snode); - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, false); - nielsen_subst s(power, replacement, eq->m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - if (exp_n) - e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep)); - } - - // Branch 2 (explored second): n = 0 → replace power with ε (progress) - // Side constraint: n = 0 - { - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - if (exp_n) - e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep)); - } + euf::snode *replacement = m_sg.mk_concat(base, nested_power_snode); + child = mk_child(node); + e = mk_edge(node, child, false); + nielsen_subst s2(power, replacement, eq->m_dep); + e->add_subst(s2); + child->apply_subst(m_sg, s2); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep)); return true; } @@ -2580,17 +2881,46 @@ namespace seq { expr* zero = arith.mk_int(0); - // Generate one child per proper prefix of the base, mirroring ZIPT's - // GetDecompose. For base [t0, t1, ..., t_{k-1}], the proper prefixes - // are: ε, t0, t0·t1, ..., t0·t1·...·t_{k-2}. - // Child i substitutes var → base^n · prefix_i with n >= 0. - for (unsigned pfx_len = 0; pfx_len < base_len; ++pfx_len) { - euf::snode* replacement = power_snode; - if (pfx_len > 0) { - euf::snode* prefix_snode = ground_prefix[0]; - for (unsigned j = 1; j < pfx_len; ++j) - prefix_snode = m_sg.mk_concat(prefix_snode, ground_prefix[j]); - replacement = m_sg.mk_concat(power_snode, prefix_snode); + // Generate children mirroring ZIPT's GetDecompose: + // P(t0 · t1 · ... · t_{k-1}) = P(t0) | t0·P(t1) | ... | t0·...·t_{k-2}·P(t_{k-1}) + // For char tokens P(c) = {ε}, for power tokens P(u^m) = {u^m', 0 ≤ m' ≤ m}. + // Child at position i substitutes var → base^n · t0·...·t_{i-1} · P(t_i). + for (unsigned i = 0; i < base_len; ++i) { + euf::snode* tok = ground_prefix[i]; + + // Skip char position when preceding token is a power: + // The power case at i-1 with 0 ≤ m' ≤ exp already covers m' = exp, + // which produces the same result. Using the original exponent here + // creates a rigid coupling that causes cycling. + if (!tok->is_power() && i > 0 && ground_prefix[i - 1]->is_power()) + continue; + + // Build full-token prefix: ground_prefix[0..i-1] + euf::snode* prefix_sn = nullptr; + for (unsigned j = 0; j < i; ++j) + prefix_sn = (j == 0) ? ground_prefix[0] : m_sg.mk_concat(prefix_sn, ground_prefix[j]); + + euf::snode* replacement; + expr_ref fresh_m(m); + + if (tok->is_power()) { + // Token is a power u^exp: use fresh m' with 0 ≤ m' ≤ exp + expr* inner_exp = get_power_exponent(tok); + expr* inner_base = get_power_base_expr(tok); + if (inner_exp && inner_base) { + fresh_m = mk_fresh_int_var(); + expr_ref partial_pow(seq.str.mk_power(inner_base, fresh_m), m); + euf::snode* partial_sn = m_sg.mk(partial_pow); + euf::snode* suffix_sn = prefix_sn ? m_sg.mk_concat(prefix_sn, partial_sn) : partial_sn; + replacement = m_sg.mk_concat(power_snode, suffix_sn); + } else { + // Fallback: use full token (shouldn't normally happen) + euf::snode* suffix_sn = prefix_sn ? m_sg.mk_concat(prefix_sn, tok) : tok; + replacement = m_sg.mk_concat(power_snode, suffix_sn); + } + } else { + // Token is a char: P(char) = ε, suffix = just the prefix + replacement = prefix_sn ? m_sg.mk_concat(power_snode, prefix_sn) : power_snode; } nielsen_node* child = mk_child(node); @@ -2601,6 +2931,15 @@ namespace seq { // Side constraint: n >= 0 e->add_side_int(mk_int_constraint(fresh_n, zero, int_constraint_kind::ge, eq.m_dep)); + + // Side constraints for fresh partial exponent + if (fresh_m.get()) { + expr* inner_exp = get_power_exponent(tok); + // m' >= 0 + e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq.m_dep)); + // m' <= inner_exp + e->add_side_int(mk_int_constraint(inner_exp, fresh_m, int_constraint_kind::ge, eq.m_dep)); + } } return true; diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 4f5e24572..20c9dac4d 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -372,6 +372,7 @@ namespace seq { }; // string variable substitution: var -> replacement + // (can be used as well to substitute arbitrary nodes - like powers) // mirrors ZIPT's Subst struct nielsen_subst { euf::snode* m_var; @@ -616,7 +617,7 @@ namespace seq { // cur_path provides the path from root to this node so that the // LP solver can be queried for deterministic power cancellation. // Returns proceed, conflict, satisfied, or restart. - simplify_result simplify_and_init(nielsen_graph& g, svector const& cur_path); + simplify_result simplify_and_init(nielsen_graph& g, svector const& cur_path = svector()); // true if all str_eqs are trivial and there are no str_mems bool is_satisfied() const; @@ -668,6 +669,7 @@ namespace seq { unsigned m_mod_det = 0; unsigned m_mod_power_epsilon = 0; unsigned m_mod_num_cmp = 0; + unsigned m_mod_split_power_elim = 0; unsigned m_mod_const_num_unwinding = 0; unsigned m_mod_eq_split = 0; unsigned m_mod_star_intr = 0; @@ -875,6 +877,15 @@ namespace seq { // mirrors ZIPT's NumCmpModifier bool apply_num_cmp(nielsen_node* node); + // CommPower-based power elimination split: when one side starts with + // a power w^p and CommPower finds c base-pattern occurrences on the + // other side but the ordering between p and c is unknown, branch: + // (1) p < c (2) c ≤ p + // After branching, simplify_and_init's CommPower pass resolves the + // cancellation deterministically. + // mirrors ZIPT's SplitPowerElim + NumCmpModifier + bool apply_split_power_elim(nielsen_node* node); + // constant numeric unwinding: for a power token u^n vs a constant // (non-variable), branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u). // mirrors ZIPT's ConstNumUnwindingModifier @@ -918,7 +929,7 @@ namespace seq { euf::snode* find_power_token(nielsen_node* node) const; // find a power token facing a constant (char) head - bool find_power_vs_const(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const; + bool find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const; // find a power token facing a variable head bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const; From d3646048753760002ec8b95e7697a144b035a315 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Mar 2026 16:35:24 +0000 Subject: [PATCH 4/6] Initial plan From 255d381b72e7c1ba649b1a73db1bab897f9bbae7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Mar 2026 17:09:34 +0000 Subject: [PATCH 5/6] Make simple_solver incremental: use push/pop scopes in Nielsen DFS Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/seq/seq_nielsen.cpp | 79 ++++++++++++++++--------------------- src/smt/seq/seq_nielsen.h | 28 ++++++++----- 2 files changed, 54 insertions(+), 53 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 2ded22793..095383aaa 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -459,6 +459,7 @@ namespace seq { nielsen_node* nielsen_graph::mk_child(nielsen_node* parent) { nielsen_node* child = mk_node(); child->clone_from(*parent); + child->m_parent_ic_count = parent->int_constraints().size(); return child; } @@ -1705,8 +1706,14 @@ namespace seq { return search_result::sat; } - // integer feasibility check: collect side constraints along the path - // and verify they are jointly satisfiable using the LP solver + // Assert any new int_constraints added during simplify_and_init for this + // node into the current solver scope. Constraints inherited from the parent + // (indices 0..m_parent_ic_count-1) are already present at the enclosing + // scope level; only the newly-added tail needs to be asserted here. + assert_node_new_int_constraints(node); + + // integer feasibility check: the solver now holds all path constraints + // incrementally; just query the solver directly if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) { node->set_reason(backtrack_reason::arithmetic); ++m_stats.m_num_arith_infeasible; @@ -1739,7 +1746,15 @@ namespace seq { bool any_unknown = false; for (nielsen_edge* e : node->outgoing()) { cur_path.push_back(e); + // Push a solver scope for this edge and assert its side integer + // constraints. The child's own new int_constraints will be asserted + // inside the recursive call (above). On return, pop the scope so + // that backtracking removes those assertions. + m_solver.push(); + for (auto const& ic : e->side_int()) + m_solver.assert_expr(int_constraint_to_expr(ic)); search_result r = search_dfs(e->tgt(), depth + 1, cur_path); + m_solver.pop(1); if (r == search_result::sat) return search_result::sat; cur_path.pop_back(); @@ -3302,41 +3317,22 @@ namespace seq { return expr_ref(m.mk_true(), m); } - void nielsen_graph::collect_path_int_constraints(nielsen_node* node, - svector const& cur_path, - vector& out) { - // collect from root node - if (m_root) { - for (auto const& ic : m_root->int_constraints()) - out.push_back(ic); - } - - // collect from edges along the path and their target nodes - for (nielsen_edge* e : cur_path) { - for (auto const& ic : e->side_int()) - out.push_back(ic); - if (e->tgt()) { - for (auto const& ic : e->tgt()->int_constraints()) - out.push_back(ic); - } - } + void nielsen_graph::assert_node_new_int_constraints(nielsen_node* node) { + // Assert only the int_constraints that are new to this node (beyond those + // inherited from its parent via clone_from). The parent's constraints are + // already present in the enclosing solver scope; asserting them again would + // be redundant (though harmless). This is called by search_dfs right after + // simplify_and_init, which is where new constraints are produced. + for (unsigned i = node->m_parent_ic_count; i < node->int_constraints().size(); ++i) + m_solver.assert_expr(int_constraint_to_expr(node->int_constraints()[i])); } bool nielsen_graph::check_int_feasibility(nielsen_node* node, svector const& cur_path) { - vector constraints; - collect_path_int_constraints(node, cur_path, constraints); - - if (constraints.empty()) - return true; // no integer constraints, trivially feasible - - - m_solver.push(); - for (auto const& ic : constraints) - m_solver.assert_expr(int_constraint_to_expr(ic)); - - lbool result = m_solver.check(); - m_solver.pop(1); - return result != l_false; + // In incremental mode the solver already holds all path constraints + // (root length constraints at the base level, edge side_int and node + // int_constraints pushed/popped as the DFS descends and backtracks). + // A plain check() is therefore sufficient. + return m_solver.check() != l_false; } bool nielsen_graph::check_lp_le(expr* lhs, expr* rhs, @@ -3345,19 +3341,14 @@ namespace seq { ast_manager& m = m_sg.get_manager(); arith_util arith(m); - vector constraints; - collect_path_int_constraints(node, cur_path, constraints); - - m_solver.push(); - for (auto const& ic : constraints) - m_solver.assert_expr(int_constraint_to_expr(ic)); - - // Assert NOT(lhs <= rhs), i.e., lhs > rhs, i.e., lhs >= rhs + 1 - // If this is unsat, then lhs <= rhs is entailed by the path constraints. + // The solver already holds all path constraints incrementally. + // Temporarily add NOT(lhs <= rhs), i.e. lhs >= rhs + 1. + // If that is unsat, then lhs <= rhs is entailed. expr_ref one(arith.mk_int(1), m); expr_ref rhs_plus_one(arith.mk_add(rhs, one), m); - m_solver.assert_expr(arith.mk_ge(lhs, rhs_plus_one)); + m_solver.push(); + m_solver.assert_expr(arith.mk_ge(lhs, rhs_plus_one)); lbool result = m_solver.check(); m_solver.pop(1); return result == l_false; diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 20c9dac4d..fd11ee0d5 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -520,6 +520,12 @@ namespace seq { // evaluation index for run tracking unsigned m_eval_idx = 0; + // number of int_constraints inherited from the parent node at clone time. + // int_constraints[0..m_parent_ic_count) are already asserted at the + // parent's solver scope; only [m_parent_ic_count..end) need to be + // asserted when this node's solver scope is entered. + unsigned m_parent_ic_count = 0; + public: nielsen_node(nielsen_graph* graph, unsigned id); @@ -945,20 +951,24 @@ namespace seq { // Mirrors ZIPT's Constraint.Shared forwarding mechanism. void assert_root_constraints_to_solver(); - // collect int_constraints along the path from root to the given node, - // including constraints from edges and nodes. - void collect_path_int_constraints(nielsen_node* node, - svector const& cur_path, - vector& out); + // Assert the int_constraints of `node` that are new relative to its + // parent (indices [m_parent_ic_count..end)) into the current solver scope. + // Called by search_dfs after simplify_and_init so that the newly derived + // bounds become visible to subsequent check() and check_lp_le() calls. + void assert_node_new_int_constraints(nielsen_node* node); // check integer feasibility of the constraints along the current path. - // returns true if feasible, false if infeasible. + // returns true if feasible (including unknown), false only if l_false. + // Precondition: all path constraints have been incrementally asserted to + // m_solver by search_dfs via push/pop, so a plain check() suffices. + // l_undef (resource limit / timeout) is treated as feasible so that the + // search continues rather than reporting a false unsatisfiability. bool check_int_feasibility(nielsen_node* node, svector const& cur_path); // check whether lhs <= rhs is implied by the path constraints. - // mirrors ZIPT's NielsenNode.IsLe(): collects path constraints, - // asserts their conjunction plus NOT(lhs <= rhs), and returns true - // iff the result is unsatisfiable (i.e., lhs <= rhs is entailed). + // mirrors ZIPT's NielsenNode.IsLe(): temporarily asserts NOT(lhs <= rhs) + // and returns true iff the result is unsatisfiable (i.e., lhs <= rhs is + // entailed). Path constraints are already in the solver incrementally. bool check_lp_le(expr* lhs, expr* rhs, nielsen_node* node, svector const& cur_path); // create an integer constraint: lhs rhs From 99727faf7009ee0b4e3eb729c47c807b7145281d Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Wed, 11 Mar 2026 18:13:16 +0100 Subject: [PATCH 6/6] Model reconstruction --- src/smt/nseq_context_solver.h | 4 ++ src/smt/nseq_model.cpp | 74 +++++++++++++++++++++++++++-- src/smt/nseq_model.h | 4 ++ src/smt/seq/seq_nielsen.cpp | 87 +++++++++++++++++++++++++++++++---- src/smt/seq/seq_nielsen.h | 9 ++++ 5 files changed, 164 insertions(+), 14 deletions(-) diff --git a/src/smt/nseq_context_solver.h b/src/smt/nseq_context_solver.h index 4fb5edc5d..60db2868a 100644 --- a/src/smt/nseq_context_solver.h +++ b/src/smt/nseq_context_solver.h @@ -59,6 +59,10 @@ namespace smt { void pop(unsigned num_scopes) override { m_kernel.pop(num_scopes); } + + void get_model(model_ref& mdl) override { + m_kernel.get_model(mdl); + } }; } diff --git a/src/smt/nseq_model.cpp b/src/smt/nseq_model.cpp index c56862c6f..95ed7c869 100644 --- a/src/smt/nseq_model.cpp +++ b/src/smt/nseq_model.cpp @@ -35,6 +35,8 @@ namespace smt { m_var_values.reset(); m_var_regex.reset(); m_trail.reset(); + m_int_model = nullptr; + m_mg = &mg; m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model()); mg.register_factory(m_factory); @@ -42,12 +44,12 @@ namespace smt { register_existing_values(nielsen); collect_var_regex_constraints(state); + // solve integer constraints from the sat_path FIRST so that + // m_int_model is available when snode_to_value evaluates power exponents + nielsen.solve_sat_path_ints(m_int_model); + // extract variable assignments from the satisfying leaf's substitution path - seq::nielsen_node const* sat = nielsen.sat_node(); - IF_VERBOSE(1, verbose_stream() << "nseq model init: sat_node=" << (sat ? "set" : "null") - << " path_len=" << nielsen.sat_path().size() << "\n";); extract_assignments(nielsen.sat_path()); - IF_VERBOSE(1, verbose_stream() << "nseq model: m_var_values has " << m_var_values.size() << " entries\n";); } model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) { @@ -103,6 +105,8 @@ namespace smt { m_var_values.reset(); m_var_regex.reset(); m_trail.reset(); + m_int_model = nullptr; + m_mg = nullptr; m_factory = nullptr; } @@ -175,6 +179,68 @@ namespace smt { return expr_ref(m); } + if (n->is_power()) { + SASSERT(n->num_args() == 2); + // Evaluate the base and exponent to produce a concrete string. + // The base is a string snode; the exponent is an integer expression + // whose value comes from the sat_path integer model. + expr_ref base_val = snode_to_value(n->arg(0)); + if (!base_val) + return expr_ref(m); + + euf::snode* exp_snode = n->arg(1); + expr* exp_expr = exp_snode ? exp_snode->get_expr() : nullptr; + rational exp_val; + arith_util arith(m); + + // Try to evaluate exponent: first check if it's a numeral, + // then try the int model from sat_path constraints, + // finally fall back to the proto_model from model_generator. + if (exp_expr && arith.is_numeral(exp_expr, exp_val)) { + // already concrete + } else if (exp_expr && m_int_model.get()) { + expr_ref result(m); + if (m_int_model->eval_expr(exp_expr, result, true) && arith.is_numeral(result, exp_val)) { + // evaluated from int model + } else if (m_mg) { + proto_model& pm = m_mg->get_model(); + if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) { + // evaluated from proto_model + } else { + exp_val = rational(0); + } + } else { + exp_val = rational(0); + } + } else if (exp_expr && m_mg) { + expr_ref result(m); + proto_model& pm = m_mg->get_model(); + if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) { + // evaluated from proto_model + } else { + exp_val = rational(0); + } + } else { + exp_val = rational(0); + } + + if (exp_val.is_neg()) + exp_val = rational(0); + + // Build the repeated string: base^exp_val + if (exp_val.is_zero()) + return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); + if (exp_val.is_one()) + return base_val; + + // For small exponents, concatenate directly + unsigned n_val = exp_val.get_unsigned(); + expr_ref acc(base_val); + for (unsigned i = 1; i < n_val; ++i) + acc = m_seq.str.mk_concat(acc, base_val); + return acc; + } + // fallback: use the underlying expression expr* e = n->get_expr(); return e ? expr_ref(e, m) : expr_ref(m); diff --git a/src/smt/nseq_model.h b/src/smt/nseq_model.h index cd5d13082..14baa40cf 100644 --- a/src/smt/nseq_model.h +++ b/src/smt/nseq_model.h @@ -61,6 +61,10 @@ namespace smt { // trail for GC protection of generated expressions expr_ref_vector m_trail; + // integer variable model from sat_path constraints + model_ref m_int_model; + model_generator* m_mg = nullptr; + // per-variable regex constraints: maps snode id -> intersected regex snode. // collected during init() from the state's str_mem list. u_map m_var_regex; diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 2ded22793..aaf08a6d4 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -23,6 +23,8 @@ Author: #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" #include "ast/rewriter/th_rewriter.h" +#include "smt/smt_kernel.h" +#include "params/smt_params.h" #include "util/hashtable.h" #include #include @@ -976,25 +978,41 @@ namespace seq { dep_tracker const& dep, bool& changed) { euf::snode_vector tokens; non_empty_side->collect_tokens(tokens); - bool all_vars_or_opaque = true; + bool all_eliminable = true; bool has_char = false; for (euf::snode* t : tokens) { if (t->is_char()) has_char = true; - else if (!t->is_var() && t->kind() != euf::snode_kind::s_other) { - all_vars_or_opaque = false; break; + else if (!t->is_var() && !t->is_power() && t->kind() != euf::snode_kind::s_other) { + all_eliminable = false; break; } } - if (has_char || !all_vars_or_opaque) { + if (has_char || !all_eliminable) { m_is_general_conflict = true; m_reason = backtrack_reason::symbol_clash; return true; } + ast_manager& m = sg.get_manager(); + arith_util arith(m); for (euf::snode* t : tokens) { if (t->is_var()) { nielsen_subst s(t, sg.mk_empty(), dep); apply_subst(sg, s); changed = true; } + else if (t->is_power()) { + // Power equated to empty → exponent must be 0. + expr* e = t->get_expr(); + expr* pow_exp = (e && is_app(e) && to_app(e)->get_num_args() >= 2) + ? to_app(e)->get_arg(1) : nullptr; + if (pow_exp) { + expr* zero = arith.mk_numeral(rational(0), true); + m_int_constraints.push_back( + int_constraint(pow_exp, zero, int_constraint_kind::eq, dep, m)); + } + nielsen_subst s(t, sg.mk_empty(), dep); + apply_subst(sg, s); + changed = true; + } } return false; } @@ -1488,6 +1506,8 @@ namespace seq { eq.m_rhs = sg.drop_first(eq.m_rhs); if (lp_le_rp && rp_le_lp) { // both ≤ → equal → both cancel completely + // Record the equality constraint so the model knows lp = rp. + add_int_constraint(g.mk_int_constraint(lp, rp, int_constraint_kind::eq, eq.m_dep)); } else { // strictly less: create diff power d = larger - smaller ≥ 1 expr_ref d(g.mk_fresh_int_var()); @@ -1699,20 +1719,23 @@ namespace seq { node->set_general_conflict(true); return search_result::unsat; } - if (sr == simplify_result::satisfied || node->is_satisfied()) { - m_sat_node = node; - m_sat_path = cur_path; - return search_result::sat; - } // integer feasibility check: collect side constraints along the path - // and verify they are jointly satisfiable using the LP solver + // and verify they are jointly satisfiable using the LP solver. + // Must run AFTER simplify_and_init (which may add int_constraints) + // and BEFORE the SAT check (equation satisfied doesn't imply ints are feasible). if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) { node->set_reason(backtrack_reason::arithmetic); ++m_stats.m_num_arith_infeasible; return search_result::unsat; } + if (sr == simplify_result::satisfied || node->is_satisfied()) { + m_sat_node = node; + m_sat_path = cur_path; + return search_result::sat; + } + // depth bound check if (depth >= m_depth_bound) return search_result::unknown; @@ -3383,5 +3406,49 @@ namespace seq { return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m); } + bool nielsen_graph::solve_sat_path_ints(model_ref& mdl) { + mdl = nullptr; + if (m_sat_path.empty() && (!m_sat_node || m_sat_node->int_constraints().empty())) + return false; + + vector constraints; + collect_path_int_constraints(m_sat_node, m_sat_path, constraints); + if (constraints.empty()) + return false; + + // Use a fresh smt::kernel to solve the integer constraints. + // Add constraints incrementally, skipping any that would make the system UNSAT + // (the search may have taken contradictory branches). + ast_manager& m = m_sg.get_manager(); + smt_params params; + smt::kernel fresh_solver(m, params); + IF_VERBOSE(1, verbose_stream() << "solve_sat_path_ints: " << constraints.size() << " constraints\n";); + for (auto const& ic : constraints) { + expr_ref e = int_constraint_to_expr(ic); + IF_VERBOSE(2, verbose_stream() << " constraint: " << mk_bounded_pp(e, m, 5) << "\n";); + fresh_solver.push(); + fresh_solver.assert_expr(e); + if (fresh_solver.check() == l_false) { + IF_VERBOSE(1, verbose_stream() << " SKIPPED (infeasible): " << mk_bounded_pp(e, m, 5) << "\n";); + fresh_solver.pop(1); + } + } + + lbool result = fresh_solver.check(); + IF_VERBOSE(1, verbose_stream() << "solve_sat_path_ints result: " << result << "\n";); + if (result == l_true) { + fresh_solver.get_model(mdl); + IF_VERBOSE(1, { + verbose_stream() << " int_model:\n"; + for (unsigned i = 0; i < mdl->get_num_constants(); ++i) { + func_decl* fd = mdl->get_constant(i); + expr* val = mdl->get_const_interp(fd); + if (val) verbose_stream() << " " << fd->get_name() << " = " << mk_bounded_pp(val, m, 3) << "\n"; + } + }); + } + return mdl.get() != nullptr; + } + } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 20c9dac4d..e10aad808 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -241,6 +241,7 @@ Author: #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_sgraph.h" #include +#include "model/model.h" namespace seq { @@ -264,6 +265,7 @@ namespace seq { virtual void assert_expr(expr* e) = 0; virtual void push() = 0; virtual void pop(unsigned num_scopes) = 0; + virtual void get_model(model_ref& mdl) { mdl = nullptr; } }; // simplification result for constraint processing @@ -819,6 +821,13 @@ namespace seq { // max_len == UINT_MAX means unbounded. void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len); + // solve all integer constraints along the sat_path and return + // a model mapping integer variables to concrete values. + // Must be called after solve() returns sat. + // Returns true if a satisfying model was found. + // Caller takes ownership of the returned model pointer. + bool solve_sat_path_ints(model_ref& mdl); + private: search_result search_dfs(nielsen_node* node, unsigned depth, svector& cur_path);