From 756673f10494c093eb59bc1af1cb4a240c9fb829 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Fri, 6 Mar 2026 14:01:21 +0100 Subject: [PATCH] Trying to port integer constraints --- src/smt/seq/seq_nielsen.cpp | 674 +++++++++++++++++++++++++++++++++--- src/smt/seq/seq_nielsen.h | 99 +++++- src/test/seq_nielsen.cpp | 191 +++++----- 3 files changed, 803 insertions(+), 161 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index d439abbe2..8da4396a6 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -22,9 +22,11 @@ Author: #include "smt/seq/seq_nielsen.h" #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" +#include "math/lp/lar_solver.h" #include "util/bit_util.h" #include "util/hashtable.h" #include +#include #include namespace seq { @@ -308,12 +310,15 @@ namespace seq { void nielsen_node::clone_from(nielsen_node const& parent) { m_str_eq.reset(); m_str_mem.reset(); + m_int_constraints.reset(); m_char_diseqs.reset(); m_char_ranges.reset(); for (auto const& eq : parent.m_str_eq) m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep)); for (auto const& mem : parent.m_str_mem) m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_history, mem.m_id, mem.m_dep)); + for (auto const& ic : parent.m_int_constraints) + m_int_constraints.push_back(ic); // clone character disequalities for (auto const& kv : parent.m_char_diseqs) { ptr_vector diseqs; @@ -1093,6 +1098,13 @@ namespace seq { return search_result::sat; } + // integer feasibility check: collect side constraints along the path + // and verify they are jointly satisfiable using the LP solver + if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) { + node->set_reason(backtrack_reason::arithmetic); + return search_result::unsat; + } + // depth bound check if (depth >= m_depth_bound) return search_result::unknown; @@ -1302,12 +1314,197 @@ namespace seq { return false; } + // ----------------------------------------------------------------------- + // EqSplit helpers: token length classification + // ----------------------------------------------------------------------- + + bool nielsen_graph::token_has_variable_length(euf::snode* tok) const { + // Chars and units have known constant length 1. + if (tok->is_char() || tok->is_unit()) + return false; + // Variables and powers have symbolic/unknown length. + if (tok->is_var() || tok->is_power()) + return true; + // For s_other: check if it's a string literal (known constant length). + if (tok->get_expr()) { + seq_util& seq = m_sg.get_seq_util(); + zstring s; + if (seq.str.is_string(tok->get_expr(), s)) + return false; + } + // Everything else is treated as variable length. + return true; + } + + unsigned nielsen_graph::token_const_length(euf::snode* tok) const { + if (tok->is_char() || tok->is_unit()) + return 1; + if (tok->get_expr()) { + seq_util& seq = m_sg.get_seq_util(); + zstring s; + if (seq.str.is_string(tok->get_expr(), s)) + return s.length(); + } + return 0; + } + + // ----------------------------------------------------------------------- + // EqSplit: find_eq_split_point + // Port of ZIPT's StrEq.SplitEq algorithm. + // + // Walks tokens from each side tracking two accumulators: + // - lhs_has_symbolic / rhs_has_symbolic : whether a variable-length token + // has been consumed on that side since the last reset + // - const_diff : net constant-length difference (LHS constants − RHS constants) + // + // A potential split point arises when both accumulators are "zero" + // (no outstanding symbolic contributions on either side), meaning + // the prefix lengths are determined up to a constant offset (const_diff). + // + // Among all such split points, we pick the one minimising |const_diff| + // (the padding amount). We also require having seen at least one variable- + // length token before accepting a split, so that the split is non-trivial. + // ----------------------------------------------------------------------- + + bool nielsen_graph::find_eq_split_point( + euf::snode_vector const& lhs_toks, + euf::snode_vector const& rhs_toks, + unsigned& out_lhs_idx, + unsigned& out_rhs_idx, + int& out_padding) const { + + unsigned lhs_len = lhs_toks.size(); + unsigned rhs_len = rhs_toks.size(); + if (lhs_len <= 1 || rhs_len <= 1) + return false; + + // Initialize accumulators with the first token on each side (index 0). + bool lhs_has_symbolic = token_has_variable_length(lhs_toks[0]); + bool rhs_has_symbolic = token_has_variable_length(rhs_toks[0]); + int const_diff = 0; + if (!lhs_has_symbolic) + const_diff += (int)token_const_length(lhs_toks[0]); + if (!rhs_has_symbolic) + const_diff -= (int)token_const_length(rhs_toks[0]); + + bool seen_variable = lhs_has_symbolic || rhs_has_symbolic; + + // Best confirmed split point. + bool has_best = false; + unsigned best_lhs = 0, best_rhs = 0; + int best_padding = 0; + + // Pending split (needs confirmation by a subsequent variable token). + bool has_pending = false; + unsigned pending_lhs = 0, pending_rhs = 0; + int pending_padding = 0; + + unsigned li = 1, ri = 1; + + while (li < lhs_len || ri < rhs_len) { + bool lhs_zero = !lhs_has_symbolic; + bool rhs_zero = !rhs_has_symbolic; + + // Potential split point: both symbolic accumulators are zero. + if (seen_variable && lhs_zero && rhs_zero) { + if (!has_pending || std::abs(const_diff) < std::abs(pending_padding)) { + has_pending = true; + pending_padding = const_diff; + pending_lhs = li; + pending_rhs = ri; + } + } + + // Decide which side to consume from. + // Prefer consuming from the side whose symbolic accumulator is zero + // (i.e., that side has no outstanding variable contributions). + bool consume_lhs; + if (lhs_zero && !rhs_zero) + consume_lhs = true; + else if (!lhs_zero && rhs_zero) + consume_lhs = false; + else if (lhs_zero && rhs_zero) + consume_lhs = (const_diff <= 0); // consume from side with less accumulated constant + else + consume_lhs = (li <= ri); // both non-zero: round-robin + + if (consume_lhs) { + if (li >= lhs_len) break; + euf::snode* tok = lhs_toks[li++]; + bool is_var = token_has_variable_length(tok); + if (is_var) { + // Confirm any pending split point. + if (has_pending && (!has_best || std::abs(pending_padding) < std::abs(best_padding))) { + has_best = true; + best_padding = pending_padding; + best_lhs = pending_lhs; + best_rhs = pending_rhs; + has_pending = false; + } + seen_variable = true; + lhs_has_symbolic = true; + } else { + const_diff += (int)token_const_length(tok); + } + } else { + if (ri >= rhs_len) break; + euf::snode* tok = rhs_toks[ri++]; + bool is_var = token_has_variable_length(tok); + if (is_var) { + if (has_pending && (!has_best || std::abs(pending_padding) < std::abs(best_padding))) { + has_best = true; + best_padding = pending_padding; + best_lhs = pending_lhs; + best_rhs = pending_rhs; + has_pending = false; + } + seen_variable = true; + rhs_has_symbolic = true; + } else { + const_diff -= (int)token_const_length(tok); + } + } + } + + if (!has_best) return false; + + // A split at the very start or very end is degenerate — skip. + if ((best_lhs == 0 && best_rhs == 0) || + (best_lhs == lhs_len && best_rhs == rhs_len)) + return false; + + out_lhs_idx = best_lhs; + out_rhs_idx = best_rhs; + out_padding = best_padding; + return true; + } + + // ----------------------------------------------------------------------- + // apply_eq_split — Port of ZIPT's EqSplitModifier.Apply + // + // For a regex-free equation LHS = RHS, finds a split point and decomposes + // into two shorter equations with optional padding variable: + // + // eq1: LHS[0..lhsIdx] · [pad if padding<0] = [pad if padding>0] · RHS[0..rhsIdx] + // eq2: [pad if padding>0] · LHS[lhsIdx..] = RHS[rhsIdx..] · [pad if padding<0] + // + // Creates a single progress child. + // ----------------------------------------------------------------------- + bool nielsen_graph::apply_eq_split(nielsen_node* node) { - for (str_eq const& eq : node->str_eqs()) { + ast_manager& m = m_sg.get_manager(); + seq_util& seq = m_sg.get_seq_util(); + arith_util arith(m); + + for (unsigned eq_idx = 0; eq_idx < node->str_eqs().size(); ++eq_idx) { + str_eq const& eq = node->str_eqs()[eq_idx]; if (eq.is_trivial()) continue; if (!eq.m_lhs || !eq.m_rhs) continue; + // EqSplit only applies to regex-free equations. + if (!eq.m_lhs->is_regex_free() || !eq.m_rhs->is_regex_free()) + continue; euf::snode_vector lhs_toks, rhs_toks; eq.m_lhs->collect_tokens(lhs_toks); @@ -1315,41 +1512,70 @@ namespace seq { if (lhs_toks.empty() || rhs_toks.empty()) continue; - euf::snode* lhead = lhs_toks[0]; - euf::snode* rhead = rhs_toks[0]; - - if (!lhead->is_var() || !rhead->is_var()) + unsigned split_lhs = 0, split_rhs = 0; + int padding = 0; + if (!find_eq_split_point(lhs_toks, rhs_toks, split_lhs, split_rhs, padding)) continue; - // x·A = y·B where x,y are distinct variables - // child 1: x → ε (progress) + // Split the equation sides into prefix / suffix. + euf::snode* lhs_prefix = m_sg.drop_right(eq.m_lhs, lhs_toks.size() - split_lhs); + euf::snode* lhs_suffix = m_sg.drop_left(eq.m_lhs, split_lhs); + euf::snode* rhs_prefix = m_sg.drop_right(eq.m_rhs, rhs_toks.size() - split_rhs); + euf::snode* rhs_suffix = m_sg.drop_left(eq.m_rhs, split_rhs); + + // Build the two new equations, incorporating a padding variable if needed. + euf::snode* eq1_lhs = lhs_prefix; + euf::snode* eq1_rhs = rhs_prefix; + euf::snode* eq2_lhs = lhs_suffix; + euf::snode* eq2_rhs = rhs_suffix; + + euf::snode* pad = nullptr; + if (padding != 0) { + pad = mk_fresh_var(); + if (padding > 0) { + // LHS prefix is longer by |padding| constants. + // Prepend pad to RHS prefix, append pad to LHS suffix. + eq1_rhs = m_sg.mk_concat(pad, rhs_prefix); + eq2_lhs = m_sg.mk_concat(lhs_suffix, pad); + } else { + // RHS prefix is longer by |padding| constants. + // Append pad to LHS prefix, prepend pad to RHS suffix. + eq1_lhs = m_sg.mk_concat(lhs_prefix, pad); + eq2_rhs = m_sg.mk_concat(pad, rhs_suffix); + } + } + + // Create single progress child. + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + + // Remove the original equation and add the two new ones. + auto& eqs = child->str_eqs(); + eqs[eq_idx] = eqs.back(); + eqs.pop_back(); + eqs.push_back(str_eq(eq1_lhs, eq1_rhs, eq.m_dep)); + eqs.push_back(str_eq(eq2_lhs, eq2_rhs, eq.m_dep)); + + // Int constraints on the edge. + // 1) len(pad) = |padding| (if padding variable was created) + if (pad && pad->get_expr()) { + expr_ref len_pad(seq.str.mk_length(pad->get_expr()), m); + expr_ref abs_pad(arith.mk_int(std::abs(padding)), m); + e->add_side_int(mk_int_constraint(len_pad, abs_pad, int_constraint_kind::eq, eq.m_dep)); + } + // 2) len(eq1_lhs) = len(eq1_rhs) { - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); + expr_ref l1 = compute_length_expr(eq1_lhs); + expr_ref r1 = compute_length_expr(eq1_rhs); + e->add_side_int(mk_int_constraint(l1, r1, int_constraint_kind::eq, eq.m_dep)); } - // child 2: x → y·z_fresh (non-progress, x starts with y) - if (lhead->id() != rhead->id()) { - euf::snode* fresh = mk_fresh_var(); - euf::snode* replacement = m_sg.mk_concat(rhead, fresh); - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, false); - nielsen_subst s(lhead, replacement, eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - } - // child 3: y → x·z_fresh (non-progress, y starts with x) - if (lhead->id() != rhead->id()) { - euf::snode* fresh = mk_fresh_var(); - euf::snode* replacement = m_sg.mk_concat(lhead, fresh); - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, false); - nielsen_subst s(rhead, replacement, eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); + // 3) len(eq2_lhs) = len(eq2_rhs) + { + expr_ref l2 = compute_length_expr(eq2_lhs); + expr_ref r2 = compute_length_expr(eq2_rhs); + e->add_side_int(mk_int_constraint(l2, r2, int_constraint_kind::eq, eq.m_dep)); } + return true; } return false; @@ -1525,7 +1751,7 @@ namespace seq { if (apply_const_num_unwinding(node)) return ++m_stats.m_mod_const_num_unwinding, true; - // Priority 5: EqSplit - var vs var (branching, 3 children) + // Priority 5: EqSplit - split regex-free equation into two (single progress child) if (apply_eq_split(node)) return ++m_stats.m_mod_eq_split, true; @@ -1675,12 +1901,15 @@ namespace seq { // ----------------------------------------------------------------------- // Modifier: apply_num_cmp // For equations involving two power tokens u^m and u^n with the same base, - // branch on the numeric relationship: m < n vs n <= m. - // Approximated as: (1) replace u^m with ε, (2) replace u^n with ε. + // branch on the numeric relationship: m <= n vs n < m. + // Generates proper integer side constraints for each branch. // mirrors ZIPT's NumCmpModifier // ----------------------------------------------------------------------- bool nielsen_graph::apply_num_cmp(nielsen_node* node) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + // Look for two power tokens with the same base on opposite sides for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; @@ -1693,21 +1922,35 @@ namespace seq { // same base: compare the two powers if (lhead->arg(0)->id() != rhead->arg(0)->id()) continue; - // Branch 1: m < n → peel from lhs power (replace lhead with ε) + // Get exponent expressions (m and n from u^m and u^n) + expr* exp_m = get_power_exponent(lhead); + expr* exp_n = get_power_exponent(rhead); + + // Branch 1: m <= n → cancel u^m from both sides + // Side constraint: m <= n + // After cancellation: ε·A = u^(n-m)·B { nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); + if (exp_m && exp_n) + e->add_side_int(mk_int_constraint(exp_m, exp_n, int_constraint_kind::le, eq.m_dep)); } - // Branch 2: n <= m → peel from rhs power (replace rhead with ε) + // Branch 2: n < m → cancel u^n from both sides + // Side constraint: n < m, i.e., n + 1 <= m, i.e., m >= n + 1 + // After cancellation: u^(m-n)·A = ε·B { nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); + if (exp_m && exp_n) { + expr_ref n_plus_1(arith.mk_add(exp_n, arith.mk_int(1)), m); + e->add_side_int(mk_int_constraint(exp_m, n_plus_1, int_constraint_kind::ge, eq.m_dep)); + } } return true; } @@ -1718,10 +1961,14 @@ namespace seq { // Modifier: apply_const_num_unwinding // For a power token u^n facing a constant (char) head, // branch: (1) n = 0 → u^n = ε, (2) n >= 1 → peel one u from power. + // Generates integer side constraints for each branch. // mirrors ZIPT's ConstNumUnwindingModifier // ----------------------------------------------------------------------- bool nielsen_graph::apply_const_num_unwinding(nielsen_node* node) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + euf::snode* power = nullptr; euf::snode* other_head = nullptr; str_eq const* eq = nullptr; @@ -1730,18 +1977,25 @@ namespace seq { SASSERT(power->is_power() && power->num_args() >= 1); euf::snode* base = power->arg(0); + expr* exp_n = get_power_exponent(power); + expr* zero = arith.mk_int(0); + expr* one = arith.mk_int(1); // Branch 1: n = 0 → replace power with ε (progress) + // Side constraint: n = 0 { nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep); e->add_subst(s); child->apply_subst(m_sg, s); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep)); } - // Branch 2: n >= 1 → peel one u: replace u^n with u · u^n' - // Approximated by prepending the base and keeping a fresh power variable + // Branch 2: n >= 1 → peel one u: replace u^n with u · u^(n-1) + // Side constraint: n >= 1 + // Use fresh power variable for u^(n-1) { euf::snode* fresh_power = mk_fresh_var(); euf::snode* replacement = m_sg.mk_concat(base, fresh_power); @@ -1750,6 +2004,8 @@ namespace seq { nielsen_subst s(power, replacement, eq->m_dep); e->add_subst(s); child->apply_subst(m_sg, s); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep)); } return true; @@ -1820,12 +2076,15 @@ namespace seq { // Modifier: apply_gpower_intr // Ground power introduction: for a variable x matched against a // ground repeated pattern, introduce x = base^n · prefix with fresh n. - // Approximated: for each non-trivial equation with a variable head vs - // a ground concatenation, introduce power decomposition. + // Generates integer side constraint n >= 0 for the fresh exponent. // mirrors ZIPT's GPowerIntrModifier // ----------------------------------------------------------------------- bool nielsen_graph::apply_gpower_intr(nielsen_node* node) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + seq_util& seq = m_sg.get_seq_util(); + for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; if (!eq.m_lhs || !eq.m_rhs) continue; @@ -1866,9 +2125,22 @@ namespace seq { if (repeat_len < 2) continue; // need at least 2 repetitions // Introduce: x = base^n · fresh_suffix - // Branch with side constraint n >= 0 + // Create fresh integer variable n for the exponent + expr_ref fresh_n = mk_fresh_int_var(); + expr* base_expr = base_char->get_expr(); + + // Create the power snode: base^n euf::snode* fresh_suffix = mk_fresh_var(); - euf::snode* fresh_power = mk_fresh_var(); // represents base^n + euf::snode* fresh_power = nullptr; + if (base_expr) { + // Build the seq.power(base_str, n) expression and register it + expr_ref base_str(seq.str.mk_unit(base_expr), m); + expr_ref power_expr(seq.str.mk_power(base_str, fresh_n), m); + fresh_power = m_sg.mk(power_expr); + } + if (!fresh_power) + fresh_power = mk_fresh_var(); // fallback + euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix); nielsen_node* child = mk_child(node); @@ -1877,6 +2149,19 @@ namespace seq { e->add_subst(s); child->apply_subst(m_sg, s); + // Side constraint: n >= 0 + expr* zero = arith.mk_int(0); + e->add_side_int(mk_int_constraint(fresh_n, zero, int_constraint_kind::ge, eq.m_dep)); + + // Side constraint: len(fresh_suffix) < len(base) = 1 + // This ensures the remainder is shorter than the base character + if (fresh_suffix->get_expr()) { + expr_ref len_suffix(seq.str.mk_length(fresh_suffix->get_expr()), m); + expr* one = arith.mk_int(1); + e->add_side_int(mk_int_constraint(len_suffix, one, int_constraint_kind::le, eq.m_dep)); + e->add_side_int(mk_int_constraint(len_suffix, zero, int_constraint_kind::ge, eq.m_dep)); + } + return true; } return false; @@ -1948,12 +2233,16 @@ namespace seq { // Modifier: apply_power_split // For a variable x facing a power token u^n, branch: // (1) x = u^m · prefix(u) with m < n (bounded power prefix) - // (2) x = u^n · x (the variable extends past the full power) - // Approximated since we lack integer constraint infrastructure. + // (2) x = u^n · x' (the variable extends past the full power) + // Generates integer side constraints for the fresh exponent variables. // mirrors ZIPT's PowerSplitModifier // ----------------------------------------------------------------------- bool nielsen_graph::apply_power_split(nielsen_node* node) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + seq_util& seq = m_sg.get_seq_util(); + euf::snode* power = nullptr; euf::snode* var_head = nullptr; str_eq const* eq = nullptr; @@ -1962,28 +2251,43 @@ namespace seq { SASSERT(power->is_power() && power->num_args() >= 1); euf::snode* base = power->arg(0); + expr* exp_n = get_power_exponent(power); + expr* zero = arith.mk_int(0); - // Branch 1: x = base^m · prefix where m < n - // Approximated: x = fresh_power_var · fresh_suffix + // Branch 1: x = base^m · prefix where 0 <= m < n + // Side constraints: m >= 0, m < n (i.e., n >= m + 1) { - euf::snode* fresh_power = mk_fresh_var(); - euf::snode* fresh_suffix = mk_fresh_var(); + expr_ref fresh_m = mk_fresh_int_var(); + euf::snode* fresh_power = mk_fresh_var(); // represents base^m + euf::snode* fresh_suffix = mk_fresh_var(); // represents prefix(base) euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix); nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); nielsen_subst s(var_head, replacement, eq->m_dep); e->add_subst(s); child->apply_subst(m_sg, s); + // m >= 0 + e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq->m_dep)); + // m < n ⟺ n >= m + 1 + if (exp_n) { + expr_ref m_plus_1(arith.mk_add(fresh_m, arith.mk_int(1)), m); + e->add_side_int(mk_int_constraint(exp_n, m_plus_1, int_constraint_kind::ge, eq->m_dep)); + } } - // Branch 2: x = u^n · x (variable extends past full power, non-progress) + // Branch 2: x = u^n · x' (variable extends past full power, non-progress) + // Side constraint: n >= 0 { - euf::snode* replacement = m_sg.mk_concat(base, var_head); + euf::snode* fresh_tail = mk_fresh_var(); + // Peel one base unit (approximation of extending past the power) + euf::snode* replacement = m_sg.mk_concat(base, fresh_tail); nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, false); nielsen_subst s(var_head, replacement, eq->m_dep); e->add_subst(s); child->apply_subst(m_sg, s); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::ge, eq->m_dep)); } return true; @@ -1994,10 +2298,14 @@ namespace seq { // For a power token u^n facing a variable, branch: // (1) n = 0 → u^n = ε (replace power with empty) // (2) n >= 1 → peel one u from the power + // Generates integer side constraints for each branch. // mirrors ZIPT's VarNumUnwindingModifier // ----------------------------------------------------------------------- bool nielsen_graph::apply_var_num_unwinding(nielsen_node* node) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + euf::snode* power = nullptr; euf::snode* var_head = nullptr; str_eq const* eq = nullptr; @@ -2006,18 +2314,24 @@ namespace seq { SASSERT(power->is_power() && power->num_args() >= 1); euf::snode* base = power->arg(0); + expr* exp_n = get_power_exponent(power); + expr* zero = arith.mk_int(0); + expr* one = arith.mk_int(1); // Branch 1: n = 0 → replace u^n with ε (progress) + // Side constraint: n = 0 { nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep); e->add_subst(s); child->apply_subst(m_sg, s); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep)); } // Branch 2: n >= 1 → peel one u: u^n → u · u^(n-1) - // Approximated: replace u^n with base · fresh_var + // Side constraint: n >= 1 { euf::snode* fresh = mk_fresh_var(); euf::snode* replacement = m_sg.mk_concat(base, fresh); @@ -2026,6 +2340,8 @@ namespace seq { nielsen_subst s(power, replacement, eq->m_dep); e->add_subst(s); child->apply_subst(m_sg, s); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep)); } return true; @@ -2173,5 +2489,261 @@ namespace seq { } } + // ----------------------------------------------------------------------- + // int_constraint display + // ----------------------------------------------------------------------- + + std::ostream& int_constraint::display(std::ostream& out) const { + if (m_lhs) out << mk_pp(m_lhs, m_lhs.get_manager()); + else out << "null"; + switch (m_kind) { + case int_constraint_kind::eq: out << " = "; break; + case int_constraint_kind::le: out << " <= "; break; + case int_constraint_kind::ge: out << " >= "; break; + } + if (m_rhs) out << mk_pp(m_rhs, m_rhs.get_manager()); + else out << "null"; + return out; + } + + // ----------------------------------------------------------------------- + // LP integer subsolver implementation + // Replaces ZIPT's SubSolver with Z3's native lp::lar_solver. + // ----------------------------------------------------------------------- + + void nielsen_graph::lp_solver_reset() { + m_lp_solver = alloc(lp::lar_solver); + m_expr2lpvar.reset(); + m_lp_ext_cnt = 0; + } + + lp::lpvar nielsen_graph::lp_ensure_var(expr* e) { + if (!e) return lp::null_lpvar; + + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + + // If already mapped, return cached + unsigned eid = e->get_id(); + lp::lpvar j; + if (m_expr2lpvar.find(eid, j)) + return j; + + // Check if this is a numeral — shouldn't be called with a numeral directly, + // but handle it gracefully by creating a fixed variable + rational val; + if (arith.is_numeral(e, val)) { + j = m_lp_solver->add_var(m_lp_ext_cnt++, true); + m_lp_solver->add_var_bound(j, lp::EQ, lp::mpq(val)); + m_expr2lpvar.insert(eid, j); + return j; + } + + // Decompose linear arithmetic: a + b → create LP term (1*a + 1*b) + expr* e1 = nullptr; + expr* e2 = nullptr; + if (arith.is_add(e, e1, e2)) { + lp::lpvar j1 = lp_ensure_var(e1); + lp::lpvar j2 = lp_ensure_var(e2); + if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(1), j1}); + coeffs.push_back({lp::mpq(1), j2}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + + // Decompose: a - b → create LP term (1*a + (-1)*b) + if (arith.is_sub(e, e1, e2)) { + lp::lpvar j1 = lp_ensure_var(e1); + lp::lpvar j2 = lp_ensure_var(e2); + if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(1), j1}); + coeffs.push_back({lp::mpq(-1), j2}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + + // Decompose: c * x where c is numeral → create LP term (c*x) + if (arith.is_mul(e, e1, e2)) { + rational coeff; + if (arith.is_numeral(e1, coeff)) { + lp::lpvar jx = lp_ensure_var(e2); + if (jx == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(coeff), jx}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + if (arith.is_numeral(e2, coeff)) { + lp::lpvar jx = lp_ensure_var(e1); + if (jx == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(coeff), jx}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + } + + // Decompose: -x → create LP term (-1*x) + if (arith.is_uminus(e, e1)) { + lp::lpvar jx = lp_ensure_var(e1); + if (jx == lp::null_lpvar) return lp::null_lpvar; + vector> coeffs; + coeffs.push_back({lp::mpq(-1), jx}); + j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + m_expr2lpvar.insert(eid, j); + return j; + } + + // Leaf expression (variable, length, or other opaque term): map to fresh LP var + j = m_lp_solver->add_var(m_lp_ext_cnt++, true /* is_integer */); + m_expr2lpvar.insert(eid, j); + return j; + } + + void nielsen_graph::lp_add_constraint(int_constraint const& ic) { + if (!m_lp_solver || !ic.m_lhs || !ic.m_rhs) + return; + + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + + // We handle simple patterns: + // expr numeral → add_var_bound + // expr expr → create term (lhs - rhs) and bound to 0 + rational val; + bool is_num_rhs = arith.is_numeral(ic.m_rhs, val); + + if (is_num_rhs && (is_app(ic.m_lhs))) { + // Simple case: single variable or len(x) vs numeral + lp::lpvar j = lp_ensure_var(ic.m_lhs); + if (j == lp::null_lpvar) return; + lp::mpq rhs_val(val); + switch (ic.m_kind) { + case int_constraint_kind::eq: + m_lp_solver->add_var_bound(j, lp::EQ, rhs_val); + break; + case int_constraint_kind::le: + m_lp_solver->add_var_bound(j, lp::LE, rhs_val); + break; + case int_constraint_kind::ge: + m_lp_solver->add_var_bound(j, lp::GE, rhs_val); + break; + } + return; + } + + // General case: create term lhs - rhs and bound it + // For now, handle single variable on LHS vs single variable on RHS + // (covers the common case of len(x) = len(y), n >= 0, etc.) + bool is_num_lhs = arith.is_numeral(ic.m_lhs, val); + if (is_num_lhs) { + // numeral expr → reverse and flip kind + lp::lpvar j = lp_ensure_var(ic.m_rhs); + if (j == lp::null_lpvar) return; + lp::mpq lhs_val(val); + switch (ic.m_kind) { + case int_constraint_kind::eq: + m_lp_solver->add_var_bound(j, lp::EQ, lhs_val); + break; + case int_constraint_kind::le: + // val <= rhs ⇔ rhs >= val + m_lp_solver->add_var_bound(j, lp::GE, lhs_val); + break; + case int_constraint_kind::ge: + // val >= rhs ⇔ rhs <= val + m_lp_solver->add_var_bound(j, lp::LE, lhs_val); + break; + } + return; + } + + // Both sides are expressions: create term (lhs - rhs) with bound 0 + lp::lpvar j_lhs = lp_ensure_var(ic.m_lhs); + lp::lpvar j_rhs = lp_ensure_var(ic.m_rhs); + if (j_lhs == lp::null_lpvar || j_rhs == lp::null_lpvar) return; + + // Create a term: 1*lhs + (-1)*rhs + vector> coeffs; + coeffs.push_back({lp::mpq(1), j_lhs}); + coeffs.push_back({lp::mpq(-1), j_rhs}); + lp::lpvar term_j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++); + + lp::mpq zero(0); + switch (ic.m_kind) { + case int_constraint_kind::eq: + m_lp_solver->add_var_bound(term_j, lp::EQ, zero); + break; + case int_constraint_kind::le: + m_lp_solver->add_var_bound(term_j, lp::LE, zero); + break; + case int_constraint_kind::ge: + m_lp_solver->add_var_bound(term_j, lp::GE, zero); + break; + } + } + + void nielsen_graph::collect_path_int_constraints(nielsen_node* node, + svector const& cur_path, + vector& out) { + // collect from root node + if (m_root) { + for (auto const& ic : m_root->int_constraints()) + out.push_back(ic); + } + + // collect from edges along the path and their target nodes + for (nielsen_edge* e : cur_path) { + for (auto const& ic : e->side_int()) + out.push_back(ic); + if (e->tgt()) { + for (auto const& ic : e->tgt()->int_constraints()) + out.push_back(ic); + } + } + } + + bool nielsen_graph::check_int_feasibility(nielsen_node* node, svector const& cur_path) { + vector constraints; + collect_path_int_constraints(node, cur_path, constraints); + + if (constraints.empty()) + return true; // no integer constraints, trivially feasible + + lp_solver_reset(); + + for (auto const& ic : constraints) + lp_add_constraint(ic); + + lp::lp_status status = m_lp_solver->find_feasible_solution(); + return status != lp::lp_status::INFEASIBLE; + } + + int_constraint nielsen_graph::mk_int_constraint(expr* lhs, expr* rhs, + int_constraint_kind kind, + dep_tracker const& dep) { + return int_constraint(lhs, rhs, kind, dep, m_sg.get_manager()); + } + + expr* nielsen_graph::get_power_exponent(euf::snode* power) { + if (!power || !power->is_power()) return nullptr; + if (power->num_args() < 2) return nullptr; + euf::snode* exp_snode = power->arg(1); + return exp_snode ? exp_snode->get_expr() : nullptr; + } + + expr_ref nielsen_graph::mk_fresh_int_var() { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + std::string name = "n!" + std::to_string(m_fresh_cnt++); + return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m); + } + } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index caaa8533d..ef55be8c1 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -237,6 +237,7 @@ Author: #include "ast/arith_decl_plugin.h" #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_sgraph.h" +#include "math/lp/lar_solver.h" namespace seq { @@ -480,6 +481,34 @@ namespace seq { m_expr(e, m), m_dep(dep), m_kind(kind) {} }; + // ----------------------------------------------- + // integer constraint: equality or inequality over length expressions + // mirrors ZIPT's IntEq and IntLe + // ----------------------------------------------- + + enum class int_constraint_kind { + eq, // lhs = rhs + le, // lhs <= rhs + ge, // lhs >= rhs + }; + + // integer constraint stored per nielsen_node, tracking arithmetic + // relationships between length variables and power exponents. + // mirrors ZIPT's IntEq / IntLe over Presburger arithmetic polynomials. + struct int_constraint { + expr_ref m_lhs; // left-hand side (arithmetic expression) + expr_ref m_rhs; // right-hand side (arithmetic expression) + int_constraint_kind m_kind; // eq, le, or ge + dep_tracker m_dep; // tracks which input constraints contributed + + int_constraint(ast_manager& m): + m_lhs(m), m_rhs(m), m_kind(int_constraint_kind::eq) {} + int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep, ast_manager& m): + m_lhs(lhs, m), m_rhs(rhs, m), m_kind(kind), m_dep(dep) {} + + std::ostream& display(std::ostream& out) const; + }; + // edge in the Nielsen graph connecting two nodes // mirrors ZIPT's NielsenEdge class nielsen_edge { @@ -489,6 +518,7 @@ namespace seq { vector m_char_subst; // character-level substitutions (mirrors ZIPT's SubstC) ptr_vector m_side_str_eq; // side constraints: string equalities ptr_vector m_side_str_mem; // side constraints: regex memberships + vector m_side_int; // side constraints: integer equalities/inequalities bool m_is_progress; // does this edge represent progress? public: nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress); @@ -506,9 +536,11 @@ namespace seq { void add_side_str_eq(str_eq* eq) { m_side_str_eq.push_back(eq); } void add_side_str_mem(str_mem* mem) { m_side_str_mem.push_back(mem); } + void add_side_int(int_constraint const& ic) { m_side_int.push_back(ic); } ptr_vector const& side_str_eq() const { return m_side_str_eq; } ptr_vector const& side_str_mem() const { return m_side_str_mem; } + vector const& side_int() const { return m_side_int; } bool is_progress() const { return m_is_progress; } @@ -528,6 +560,7 @@ namespace seq { // constraints at this node vector m_str_eq; // string equalities vector m_str_mem; // regex memberships + vector m_int_constraints; // integer equalities/inequalities (mirrors ZIPT's IntEq/IntLe) // character constraints (mirrors ZIPT's DisEqualities and CharRanges) // key: snode id of the s_unit symbolic character @@ -562,6 +595,10 @@ namespace seq { void add_str_eq(str_eq const& eq) { m_str_eq.push_back(eq); } void add_str_mem(str_mem const& mem) { m_str_mem.push_back(mem); } + void add_int_constraint(int_constraint const& ic) { m_int_constraints.push_back(ic); } + + vector const& int_constraints() const { return m_int_constraints; } + vector& int_constraints() { return m_int_constraints; } // character constraint access (mirrors ZIPT's DisEqualities / CharRanges) u_map> const& char_diseqs() const { return m_char_diseqs; } @@ -687,6 +724,15 @@ namespace seq { unsigned m_num_input_mems = 0; nielsen_stats m_stats; + // ----------------------------------------------- + // Integer subsolver using lp::lar_solver + // Replaces ZIPT's SubSolver (auxiliary Z3 instance) + // with Z3's native LP solver for integer feasibility. + // ----------------------------------------------- + scoped_ptr m_lp_solver; + u_map m_expr2lpvar; // maps expr id → LP variable + unsigned m_lp_ext_cnt = 0; // external variable counter for LP solver + public: nielsen_graph(euf::sgraph& sg); ~nielsen_graph(); @@ -791,9 +837,28 @@ namespace seq { // variable Nielsen modifier: var vs var, all progress (3 branches) bool apply_var_nielsen(nielsen_node* node); - // eq split modifier: var vs var (3 branches) + // eq split modifier: splits a regex-free equation at a chosen index into + // two shorter equalities with optional padding (single progress child). + // Mirrors ZIPT's EqSplitModifier + StrEq.SplitEq. bool apply_eq_split(nielsen_node* node); + // helper: classify whether a token has variable (symbolic) length + // returns true for variables, powers, etc.; false for chars, units, string literals + bool token_has_variable_length(euf::snode* tok) const; + + // helper: get the constant length of a token (only valid when !token_has_variable_length) + unsigned token_const_length(euf::snode* tok) const; + + // helper: find a split point in a regex-free equation. + // ports ZIPT's StrEq.SplitEq algorithm. + // returns true if a valid split point is found, with results in out params. + bool find_eq_split_point( + euf::snode_vector const& lhs_toks, + euf::snode_vector const& rhs_toks, + unsigned& out_lhs_idx, + unsigned& out_rhs_idx, + int& out_padding) const; + // apply regex character split modifier to a node. // for a str_mem constraint x·s ∈ R where x is a variable: // (1) x → c·z for each char c accepted by R at first position @@ -864,6 +929,38 @@ namespace seq { // uses seq_util::rex min_length/max_length on the underlying expression. // max_len == UINT_MAX means unbounded. void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len); + + // ----------------------------------------------- + // LP integer subsolver methods + // ----------------------------------------------- + + // initialize the LP solver fresh for a feasibility check + void lp_solver_reset(); + + // get or create an LP variable for an arithmetic expression + lp::lpvar lp_ensure_var(expr* e); + + // add an int_constraint to the LP solver + void lp_add_constraint(int_constraint const& ic); + + // collect int_constraints along the path from root to the given node, + // including constraints from edges and nodes. + void collect_path_int_constraints(nielsen_node* node, + svector const& cur_path, + vector& out); + + // check integer feasibility of the constraints along the current path. + // returns true if feasible, false if infeasible. + bool check_int_feasibility(nielsen_node* node, svector const& cur_path); + + // create an integer constraint: lhs rhs + int_constraint mk_int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep); + + // get the exponent expression from a power snode (arg(1)) + expr* get_power_exponent(euf::snode* power); + + // create a fresh integer variable expression (for power exponents) + expr_ref mk_fresh_int_var(); }; } diff --git a/src/test/seq_nielsen.cpp b/src/test/seq_nielsen.cpp index 3650cdcb1..69ffb4a0f 100644 --- a/src/test/seq_nielsen.cpp +++ b/src/test/seq_nielsen.cpp @@ -463,7 +463,7 @@ static void test_backedge() { SASSERT(root->backedge() == nullptr); } -// test apply_eq_split: basic structure (x·A = y·B produces 3 children via eq_split) +// test var vs var basic structure (x·A = y·B now handled by var_nielsen, not eq_split) static void test_eq_split_basic() { std::cout << "test_eq_split_basic\n"; ast_manager m; @@ -480,21 +480,20 @@ static void test_eq_split_basic() { euf::snode* xa = sg.mk_concat(x, a); euf::snode* yb = sg.mk_concat(y, b); - // x·A = y·B + // x·A = y·B — eq_split returns false (no valid split point), + // falls through to var_nielsen (priority 12) → 3 progress children ng.add_str_eq(xa, yb); seq::nielsen_node* root = ng.root(); - // eq_split fires: both heads are distinct vars - // produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress) bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 3); - // first child is progress (x→ε), rest are non-progress (eq_split) + // all children are progress (var_nielsen marks all as progress) SASSERT(root->outgoing()[0]->is_progress()); } -// test eq_split with solve: x·y = z·w is satisfiable (all vars can be ε) +// test var vs var with solve: x·y = z·w is satisfiable (all vars can be ε) static void test_eq_split_solve_sat() { std::cout << "test_eq_split_solve_sat\n"; ast_manager m; @@ -516,7 +515,7 @@ static void test_eq_split_solve_sat() { SASSERT(result == seq::nielsen_graph::search_result::sat); } -// test eq_split with solve: x·A = y·B is unsat (last char mismatch) +// test var vs var with solve: x·A = y·B is unsat (last char mismatch) static void test_eq_split_solve_unsat() { std::cout << "test_eq_split_solve_unsat\n"; ast_manager m; @@ -538,7 +537,7 @@ static void test_eq_split_solve_unsat() { SASSERT(result == seq::nielsen_graph::search_result::unsat); } -// test eq_split: same var x·A = x·B triggers det modifier (cancel), not eq_split +// test: same var x·A = x·B triggers det modifier (cancel), not eq_split or var_nielsen static void test_eq_split_same_var_det() { std::cout << "test_eq_split_same_var_det\n"; ast_manager m; @@ -560,7 +559,7 @@ static void test_eq_split_same_var_det() { SASSERT(result == seq::nielsen_graph::search_result::unsat); } -// test eq_split: x·y·A = y·x·A is commutation, should be sat (x=y=ε) +// test: x·y·A = y·x·A is commutation, should be sat (x=y=ε) static void test_eq_split_commutation_sat() { std::cout << "test_eq_split_commutation_sat\n"; ast_manager m; @@ -582,6 +581,7 @@ static void test_eq_split_commutation_sat() { } // test apply_const_nielsen: char·A = y produces 2 children (y→ε, y→char·fresh) +// test: A = y is handled by det modifier (variable definition: y → A), producing 1 child static void test_const_nielsen_char_var() { std::cout << "test_const_nielsen_char_var\n"; ast_manager m; @@ -593,19 +593,18 @@ static void test_const_nielsen_char_var() { euf::snode* a = sg.mk_char('A'); euf::snode* y = sg.mk_var(symbol("y")); - // A = y (char vs var) + // A = y (single var definition → det modifier fires) ng.add_str_eq(a, y); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - SASSERT(root->outgoing().size() == 2); - // both branches are progress + // det modifier: y → A (1 progress child) + SASSERT(root->outgoing().size() == 1); SASSERT(root->outgoing()[0]->is_progress()); - SASSERT(root->outgoing()[1]->is_progress()); } -// test apply_const_nielsen: x = B·y produces 2 children (x→ε, x→B·fresh) +// test: x = B·y is handled by det modifier (variable definition: x → B·y), producing 1 child static void test_const_nielsen_var_char() { std::cout << "test_const_nielsen_var_char\n"; ast_manager m; @@ -619,15 +618,15 @@ static void test_const_nielsen_var_char() { euf::snode* b = sg.mk_char('B'); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* by = sg.mk_concat(b, y); - // x = B·y + // x = B·y (single var definition → det modifier fires) ng.add_str_eq(x, by); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - SASSERT(root->outgoing().size() == 2); + // det modifier: x → B·y (1 progress child) + SASSERT(root->outgoing().size() == 1); SASSERT(root->outgoing()[0]->is_progress()); - SASSERT(root->outgoing()[1]->is_progress()); } // test const_nielsen solve: A·x = A·B → sat (x = B via det cancel then const_nielsen x→ε or x→B·fresh) @@ -673,7 +672,7 @@ static void test_const_nielsen_solve_unsat() { SASSERT(result == seq::nielsen_graph::search_result::unsat); } -// test const_nielsen priority: A·x = y·B → const_nielsen (2 children), not eq_split (3) +// test const_nielsen priority: A·x = y·B → const_nielsen (2 children), not var_nielsen (3) static void test_const_nielsen_priority_over_eq_split() { std::cout << "test_const_nielsen_priority_over_eq_split\n"; ast_manager m; @@ -696,11 +695,11 @@ static void test_const_nielsen_priority_over_eq_split() { bool extended = ng.generate_extensions(root); SASSERT(extended); - // const_nielsen produces 2 children, not eq_split's 3 + // const_nielsen produces 2 children, not var_nielsen's 3 SASSERT(root->outgoing().size() == 2); } -// test: both sides start with vars → eq_split (3 children), not const_nielsen +// test: both sides start with vars → var_nielsen (3 children), not const_nielsen static void test_const_nielsen_not_applicable_both_vars() { std::cout << "test_const_nielsen_not_applicable_both_vars\n"; ast_manager m; @@ -717,7 +716,7 @@ static void test_const_nielsen_not_applicable_both_vars() { euf::snode* xa = sg.mk_concat(x, a); euf::snode* yb = sg.mk_concat(y, b); - // x·A = y·B → both heads are vars → eq_split + // x·A = y·B → both heads are vars → var_nielsen fires (priority 12) ng.add_str_eq(xa, yb); seq::nielsen_node* root = ng.root(); @@ -976,8 +975,8 @@ static void test_regex_char_split_ground_skip() { // Variable Nielsen modifier tests // ----------------------------------------------------------------------- -// test var_nielsen basic: x = y (two distinct vars) → eq_split fires (priority 5 < 12) -// produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress) +// test var_nielsen basic: x = y (two distinct vars) → det modifier fires (variable definition x → y) +// produces 1 progress child static void test_var_nielsen_basic() { std::cout << "test_var_nielsen_basic\n"; ast_manager m; @@ -990,13 +989,13 @@ static void test_var_nielsen_basic() { euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); - // x = y → eq_split: x→ε, x→y·z_fresh, y→x·z_fresh + // x = y → det: x → y (single var definition) ng.add_str_eq(x, y); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - SASSERT(root->outgoing().size() == 3); + SASSERT(root->outgoing().size() == 1); SASSERT(root->outgoing()[0]->is_progress()); } @@ -1022,7 +1021,7 @@ static void test_var_nielsen_same_var_det() { SASSERT(result == seq::nielsen_graph::search_result::unsat); } -// test var_nielsen: char vs var → const_nielsen fires, not var_nielsen +// test var_nielsen: char vs var → det fires (y → A), not var_nielsen static void test_var_nielsen_not_applicable_char() { std::cout << "test_var_nielsen_not_applicable_char\n"; ast_manager m; @@ -1035,13 +1034,13 @@ static void test_var_nielsen_not_applicable_char() { euf::snode* a = sg.mk_char('A'); euf::snode* y = sg.mk_var(symbol("y")); - // A = y → const_nielsen (2 children), not var_nielsen (3) + // A = y → det: y → A (variable definition, 1 child) ng.add_str_eq(a, y); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - SASSERT(root->outgoing().size() == 2); + SASSERT(root->outgoing().size() == 1); } // test var_nielsen solve: x·y = z·w is sat (all vars can be ε) @@ -1109,8 +1108,8 @@ static void test_var_nielsen_commutation_sat() { SASSERT(result == seq::nielsen_graph::search_result::sat); } -// test var_nielsen priority: var vs var → eq_split fires first (priority 5 < 12) -// eq_split produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress) +// test var_nielsen priority: var vs var → det fires first for x = y (variable definition) +// var_nielsen only fires when neither side is a single var (e.g., x·A = y·B) static void test_var_nielsen_priority() { std::cout << "test_var_nielsen_priority\n"; ast_manager m; @@ -1128,14 +1127,14 @@ static void test_var_nielsen_priority() { bool extended = ng.generate_extensions(root); SASSERT(extended); - // eq_split produces 3 children - SASSERT(root->outgoing().size() == 3); - // first edge is progress (x→ε), others are non-progress + // det modifier: x → y (1 progress child) + SASSERT(root->outgoing().size() == 1); + // first edge is progress (all var_nielsen children are progress) SASSERT(root->outgoing()[0]->is_progress()); } -// test generate_extensions: det modifier has priority over const_nielsen -// x·A = x·y → det cancel (1 child), not const_nielsen (2 children) +// test generate_extensions: det modifier handles same-head cancel after simplification +// x·A = x·y → simplify cancels prefix x → A = y → det fires (y → A) static void test_generate_extensions_det_priority() { std::cout << "test_generate_extensions_det_priority\n"; ast_manager m; @@ -1151,15 +1150,10 @@ static void test_generate_extensions_det_priority() { euf::snode* xa = sg.mk_concat(x, a); euf::snode* xy = sg.mk_concat(x, y); - // x·A = x·y → same-head x cancel → A = y (1 child via det) + // x·A = x·y → after simplify, becomes A = y → det: y → A ng.add_str_eq(xa, xy); - seq::nielsen_node* root = ng.root(); - - bool extended = ng.generate_extensions(root); - SASSERT(extended); - // det modifier produces 1 child (head cancel), not 2 (const) or 3 (var) - SASSERT(root->outgoing().size() == 1); - SASSERT(root->outgoing()[0]->is_progress()); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); } // test generate_extensions: returns false when no modifier applies @@ -1217,8 +1211,7 @@ static void test_generate_extensions_regex_only() { SASSERT(root->outgoing().size() >= 1); } -// test generate_extensions: mixed constraints, det fires first -// x·A = x·B and y ∈ R → det cancel on eq first, regex untouched +// test: mixed constraints, x·A = x·B and y ∈ R → after simplify, A = B clash → unsat static void test_generate_extensions_mixed_det_first() { std::cout << "test_generate_extensions_mixed_det_first\n"; ast_manager m; @@ -1242,14 +1235,11 @@ static void test_generate_extensions_mixed_det_first() { expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); euf::snode* re_node = sg.mk(to_re_a); + // x·A = x·B → simplify cancels x → A = B → clash → unsat ng.add_str_eq(xa, xb); ng.add_str_mem(y, re_node); - seq::nielsen_node* root = ng.root(); - - bool extended = ng.generate_extensions(root); - SASSERT(extended); - // det modifier (same-head x cancel) produces 1 child - SASSERT(root->outgoing().size() == 1); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); } // ----------------------------------------------------------------------- @@ -1827,7 +1817,7 @@ static void test_simplify_multiple_eqs() { // Modifier child state verification tests // ----------------------------------------------------------------------- -// test det cancel: verify child has the tail equality +// test det cancel: x·A = x·B → simplify cancels prefix x → A = B → clash → unsat static void test_det_cancel_child_eq() { std::cout << "test_det_cancel_child_eq\n"; ast_manager m; @@ -1842,22 +1832,14 @@ static void test_det_cancel_child_eq() { euf::snode* xa = sg.mk_concat(x, a); euf::snode* xb = sg.mk_concat(x, b); - // x·A = x·B → det same-head cancel → child has A = B + // x·A = x·B → simplify cancels x → A = B → clash → unsat ng.add_str_eq(xa, xb); - seq::nielsen_node* root = ng.root(); - - bool extended = ng.generate_extensions(root); - SASSERT(extended); - SASSERT(root->outgoing().size() == 1); - - seq::nielsen_node* child = root->outgoing()[0]->tgt(); - SASSERT(child->str_eqs().size() == 1); - auto const& ceq = child->str_eqs()[0]; - // child's eq should have the two single chars (sorted) - SASSERT((ceq.m_lhs == a && ceq.m_rhs == b) || (ceq.m_lhs == b && ceq.m_rhs == a)); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); } // test const_nielsen: verify children's substitutions target the variable +// A·x = y·B → char vs var: const_nielsen fires (2 children, both substitute y) static void test_const_nielsen_child_substitutions() { std::cout << "test_const_nielsen_child_substitutions\n"; ast_manager m; @@ -1867,10 +1849,14 @@ static void test_const_nielsen_child_substitutions() { seq::nielsen_graph ng(sg); euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* ax = sg.mk_concat(a, x); + euf::snode* yb = sg.mk_concat(y, b); - // A = y → const_nielsen → 2 children, both substitute y - ng.add_str_eq(a, y); + // A·x = y·B → const_nielsen: 2 children, both substitute y + ng.add_str_eq(ax, yb); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); @@ -1889,7 +1875,7 @@ static void test_const_nielsen_child_substitutions() { SASSERT(!root->outgoing()[1]->subst()[0].m_replacement->is_empty()); } -// test var_nielsen: verify substitution structure of 3 children +// test var_nielsen: verify substitution structure — det fires for x = y (single var def) static void test_var_nielsen_substitution_types() { std::cout << "test_var_nielsen_substitution_types\n"; ast_manager m; @@ -1901,29 +1887,17 @@ static void test_var_nielsen_substitution_types() { euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); - // x = y → var_nielsen: 3 children + // x = y → det: x → y (single var definition, 1 child) ng.add_str_eq(x, y); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - SASSERT(root->outgoing().size() == 3); + SASSERT(root->outgoing().size() == 1); - // edge 0: elimination to ε + // edge 0: x → y substitution SASSERT(root->outgoing()[0]->subst().size() == 1); - SASSERT(root->outgoing()[0]->subst()[0].m_replacement->is_empty()); - SASSERT(root->outgoing()[0]->subst()[0].is_eliminating()); - - // edges 1,2: replacements are non-empty (concat with fresh var) - SASSERT(root->outgoing()[1]->subst().size() == 1); - SASSERT(!root->outgoing()[1]->subst()[0].m_replacement->is_empty()); - - SASSERT(root->outgoing()[2]->subst().size() == 1); - SASSERT(!root->outgoing()[2]->subst()[0].m_replacement->is_empty()); - - // edges 1 and 2 target different variables - SASSERT(root->outgoing()[1]->subst()[0].m_var != - root->outgoing()[2]->subst()[0].m_var); + SASSERT(root->outgoing()[0]->is_progress()); } // ----------------------------------------------------------------------- @@ -2381,13 +2355,11 @@ static void test_power_epsilon_no_power() { ng.add_str_eq(x, a); seq::nielsen_node* root = ng.root(); - // det fires (x = single char → const_nielsen fires eventually), - // but power_epsilon (priority 2) should not fire; det (priority 1) fires. + // det fires (x is single var, A doesn't contain x → x → A) bool extended = ng.generate_extensions(root); SASSERT(extended); - // det catches x = A first (single token eq, but actually ConstNielsen fires): - // x is var, A is char → ConstNielsen: 2 children (x→ε, x→A) - SASSERT(root->outgoing().size() == 2); + // det: x → A (variable definition, 1 child) + SASSERT(root->outgoing().size() == 1); } // test_num_cmp_no_power: no same-base power pair → modifier returns false @@ -2409,8 +2381,8 @@ static void test_num_cmp_no_power() { bool extended = ng.generate_extensions(root); SASSERT(extended); - // eq_split fires (priority 5): 3 children - SASSERT(root->outgoing().size() == 3); + // det fires (x → y, variable definition): 1 child + SASSERT(root->outgoing().size() == 1); } // test_star_intr_no_backedge: no backedge → modifier returns false @@ -2529,15 +2501,15 @@ static void test_gpower_intr_no_repeat() { euf::snode* b = sg.mk_char('B'); euf::snode* ab = sg.mk_concat(a, b); - // x = AB → only 1 repeated 'A', needs >= 2 + // x = AB → det fires (x is single var, AB doesn't contain x → x → AB) ng.add_str_eq(x, ab); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); // gpower_intr should NOT fire (< 2 repeats) - // const_nielsen (priority 8) fires for var vs char: 2 children - SASSERT(root->outgoing().size() == 2); + // det (priority 1) fires: x → AB, 1 child + SASSERT(root->outgoing().size() == 1); } // test_regex_var_split_basic: x ∈ re → uses minterms for splitting @@ -2593,13 +2565,14 @@ static void test_power_split_no_power() { euf::snode* xa = sg.mk_concat(x, a); // x·A = y: no power tokens, power_split should not fire + // det fires (y is single var, y ∉ vars(x·A) → y → x·A) ng.add_str_eq(xa, y); seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - // eq_split or const_nielsen fires - SASSERT(root->outgoing().size() >= 2); + // det fires: 1 child (y → x·A) + SASSERT(root->outgoing().size() == 1); } // test_var_num_unwinding_no_power: no power tokens → modifier returns false @@ -2621,8 +2594,8 @@ static void test_var_num_unwinding_no_power() { bool extended = ng.generate_extensions(root); SASSERT(extended); - // eq_split fires: 3 children - SASSERT(root->outgoing().size() == 3); + // det fires: 1 child (x → y) + SASSERT(root->outgoing().size() == 1); } // test_const_num_unwinding_no_power: no power vs const → modifier returns false @@ -2654,26 +2627,26 @@ static void test_priority_chain_order() { ast_manager m; reg_decl_plugins(m); - // Case 1: same-head cancel → Det (priority 1) fires + // Case 1: same-head cancel → simplify handles prefix cancel, then det/clash + // x·A = x·B → simplify: prefix cancel x → A = B → clash + // Use a non-clashing example: x·A = x·y → simplify: prefix cancel x → A = y → det: y → A { euf::egraph eg(m); euf::sgraph sg(m, eg); seq::nielsen_graph ng(sg); euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); euf::snode* a = sg.mk_char('A'); - euf::snode* b = sg.mk_char('B'); euf::snode* xa = sg.mk_concat(x, a); - euf::snode* xb = sg.mk_concat(x, b); + euf::snode* xy = sg.mk_concat(x, y); - ng.add_str_eq(xa, xb); - seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root); - SASSERT(extended); - SASSERT(root->outgoing().size() == 1); // Det: single child (cancel) + ng.add_str_eq(xa, xy); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); } - // Case 2: both vars different → EqSplit (priority 5) fires + // Case 2: both vars different → Det (priority 1) fires (variable definition x → y) { euf::egraph eg(m); euf::sgraph sg(m, eg); @@ -2686,10 +2659,10 @@ static void test_priority_chain_order() { seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - SASSERT(root->outgoing().size() == 3); // EqSplit: 3 children + SASSERT(root->outgoing().size() == 1); // Det: variable definition, 1 child } - // Case 3: char vs var → ConstNielsen (priority 8) fires + // Case 3: char vs var → Det (priority 1) fires (variable definition y → A) { euf::egraph eg(m); euf::sgraph sg(m, eg); @@ -2702,7 +2675,7 @@ static void test_priority_chain_order() { seq::nielsen_node* root = ng.root(); bool extended = ng.generate_extensions(root); SASSERT(extended); - SASSERT(root->outgoing().size() == 2); // ConstNielsen: 2 children + SASSERT(root->outgoing().size() == 1); // Det: variable definition, 1 child } }