3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 05:44:51 +00:00

Trying to port integer constraints

This commit is contained in:
CEisenhofer 2026-03-06 14:01:21 +01:00
parent 009c6de235
commit 756673f104
3 changed files with 803 additions and 161 deletions

View file

@ -22,9 +22,11 @@ Author:
#include "smt/seq/seq_nielsen.h"
#include "ast/arith_decl_plugin.h"
#include "ast/ast_pp.h"
#include "math/lp/lar_solver.h"
#include "util/bit_util.h"
#include "util/hashtable.h"
#include <algorithm>
#include <cstdlib>
#include <sstream>
namespace seq {
@ -308,12 +310,15 @@ namespace seq {
void nielsen_node::clone_from(nielsen_node const& parent) {
m_str_eq.reset();
m_str_mem.reset();
m_int_constraints.reset();
m_char_diseqs.reset();
m_char_ranges.reset();
for (auto const& eq : parent.m_str_eq)
m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep));
for (auto const& mem : parent.m_str_mem)
m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_history, mem.m_id, mem.m_dep));
for (auto const& ic : parent.m_int_constraints)
m_int_constraints.push_back(ic);
// clone character disequalities
for (auto const& kv : parent.m_char_diseqs) {
ptr_vector<euf::snode> diseqs;
@ -1093,6 +1098,13 @@ namespace seq {
return search_result::sat;
}
// integer feasibility check: collect side constraints along the path
// and verify they are jointly satisfiable using the LP solver
if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) {
node->set_reason(backtrack_reason::arithmetic);
return search_result::unsat;
}
// depth bound check
if (depth >= m_depth_bound)
return search_result::unknown;
@ -1302,12 +1314,197 @@ namespace seq {
return false;
}
// -----------------------------------------------------------------------
// EqSplit helpers: token length classification
// -----------------------------------------------------------------------
bool nielsen_graph::token_has_variable_length(euf::snode* tok) const {
// Chars and units have known constant length 1.
if (tok->is_char() || tok->is_unit())
return false;
// Variables and powers have symbolic/unknown length.
if (tok->is_var() || tok->is_power())
return true;
// For s_other: check if it's a string literal (known constant length).
if (tok->get_expr()) {
seq_util& seq = m_sg.get_seq_util();
zstring s;
if (seq.str.is_string(tok->get_expr(), s))
return false;
}
// Everything else is treated as variable length.
return true;
}
unsigned nielsen_graph::token_const_length(euf::snode* tok) const {
if (tok->is_char() || tok->is_unit())
return 1;
if (tok->get_expr()) {
seq_util& seq = m_sg.get_seq_util();
zstring s;
if (seq.str.is_string(tok->get_expr(), s))
return s.length();
}
return 0;
}
// -----------------------------------------------------------------------
// EqSplit: find_eq_split_point
// Port of ZIPT's StrEq.SplitEq algorithm.
//
// Walks tokens from each side tracking two accumulators:
// - lhs_has_symbolic / rhs_has_symbolic : whether a variable-length token
// has been consumed on that side since the last reset
// - const_diff : net constant-length difference (LHS constants RHS constants)
//
// A potential split point arises when both accumulators are "zero"
// (no outstanding symbolic contributions on either side), meaning
// the prefix lengths are determined up to a constant offset (const_diff).
//
// Among all such split points, we pick the one minimising |const_diff|
// (the padding amount). We also require having seen at least one variable-
// length token before accepting a split, so that the split is non-trivial.
// -----------------------------------------------------------------------
bool nielsen_graph::find_eq_split_point(
euf::snode_vector const& lhs_toks,
euf::snode_vector const& rhs_toks,
unsigned& out_lhs_idx,
unsigned& out_rhs_idx,
int& out_padding) const {
unsigned lhs_len = lhs_toks.size();
unsigned rhs_len = rhs_toks.size();
if (lhs_len <= 1 || rhs_len <= 1)
return false;
// Initialize accumulators with the first token on each side (index 0).
bool lhs_has_symbolic = token_has_variable_length(lhs_toks[0]);
bool rhs_has_symbolic = token_has_variable_length(rhs_toks[0]);
int const_diff = 0;
if (!lhs_has_symbolic)
const_diff += (int)token_const_length(lhs_toks[0]);
if (!rhs_has_symbolic)
const_diff -= (int)token_const_length(rhs_toks[0]);
bool seen_variable = lhs_has_symbolic || rhs_has_symbolic;
// Best confirmed split point.
bool has_best = false;
unsigned best_lhs = 0, best_rhs = 0;
int best_padding = 0;
// Pending split (needs confirmation by a subsequent variable token).
bool has_pending = false;
unsigned pending_lhs = 0, pending_rhs = 0;
int pending_padding = 0;
unsigned li = 1, ri = 1;
while (li < lhs_len || ri < rhs_len) {
bool lhs_zero = !lhs_has_symbolic;
bool rhs_zero = !rhs_has_symbolic;
// Potential split point: both symbolic accumulators are zero.
if (seen_variable && lhs_zero && rhs_zero) {
if (!has_pending || std::abs(const_diff) < std::abs(pending_padding)) {
has_pending = true;
pending_padding = const_diff;
pending_lhs = li;
pending_rhs = ri;
}
}
// Decide which side to consume from.
// Prefer consuming from the side whose symbolic accumulator is zero
// (i.e., that side has no outstanding variable contributions).
bool consume_lhs;
if (lhs_zero && !rhs_zero)
consume_lhs = true;
else if (!lhs_zero && rhs_zero)
consume_lhs = false;
else if (lhs_zero && rhs_zero)
consume_lhs = (const_diff <= 0); // consume from side with less accumulated constant
else
consume_lhs = (li <= ri); // both non-zero: round-robin
if (consume_lhs) {
if (li >= lhs_len) break;
euf::snode* tok = lhs_toks[li++];
bool is_var = token_has_variable_length(tok);
if (is_var) {
// Confirm any pending split point.
if (has_pending && (!has_best || std::abs(pending_padding) < std::abs(best_padding))) {
has_best = true;
best_padding = pending_padding;
best_lhs = pending_lhs;
best_rhs = pending_rhs;
has_pending = false;
}
seen_variable = true;
lhs_has_symbolic = true;
} else {
const_diff += (int)token_const_length(tok);
}
} else {
if (ri >= rhs_len) break;
euf::snode* tok = rhs_toks[ri++];
bool is_var = token_has_variable_length(tok);
if (is_var) {
if (has_pending && (!has_best || std::abs(pending_padding) < std::abs(best_padding))) {
has_best = true;
best_padding = pending_padding;
best_lhs = pending_lhs;
best_rhs = pending_rhs;
has_pending = false;
}
seen_variable = true;
rhs_has_symbolic = true;
} else {
const_diff -= (int)token_const_length(tok);
}
}
}
if (!has_best) return false;
// A split at the very start or very end is degenerate — skip.
if ((best_lhs == 0 && best_rhs == 0) ||
(best_lhs == lhs_len && best_rhs == rhs_len))
return false;
out_lhs_idx = best_lhs;
out_rhs_idx = best_rhs;
out_padding = best_padding;
return true;
}
// -----------------------------------------------------------------------
// apply_eq_split — Port of ZIPT's EqSplitModifier.Apply
//
// For a regex-free equation LHS = RHS, finds a split point and decomposes
// into two shorter equations with optional padding variable:
//
// eq1: LHS[0..lhsIdx] · [pad if padding<0] = [pad if padding>0] · RHS[0..rhsIdx]
// eq2: [pad if padding>0] · LHS[lhsIdx..] = RHS[rhsIdx..] · [pad if padding<0]
//
// Creates a single progress child.
// -----------------------------------------------------------------------
bool nielsen_graph::apply_eq_split(nielsen_node* node) {
for (str_eq const& eq : node->str_eqs()) {
ast_manager& m = m_sg.get_manager();
seq_util& seq = m_sg.get_seq_util();
arith_util arith(m);
for (unsigned eq_idx = 0; eq_idx < node->str_eqs().size(); ++eq_idx) {
str_eq const& eq = node->str_eqs()[eq_idx];
if (eq.is_trivial())
continue;
if (!eq.m_lhs || !eq.m_rhs)
continue;
// EqSplit only applies to regex-free equations.
if (!eq.m_lhs->is_regex_free() || !eq.m_rhs->is_regex_free())
continue;
euf::snode_vector lhs_toks, rhs_toks;
eq.m_lhs->collect_tokens(lhs_toks);
@ -1315,41 +1512,70 @@ namespace seq {
if (lhs_toks.empty() || rhs_toks.empty())
continue;
euf::snode* lhead = lhs_toks[0];
euf::snode* rhead = rhs_toks[0];
if (!lhead->is_var() || !rhead->is_var())
unsigned split_lhs = 0, split_rhs = 0;
int padding = 0;
if (!find_eq_split_point(lhs_toks, rhs_toks, split_lhs, split_rhs, padding))
continue;
// x·A = y·B where x,y are distinct variables
// child 1: x → ε (progress)
// Split the equation sides into prefix / suffix.
euf::snode* lhs_prefix = m_sg.drop_right(eq.m_lhs, lhs_toks.size() - split_lhs);
euf::snode* lhs_suffix = m_sg.drop_left(eq.m_lhs, split_lhs);
euf::snode* rhs_prefix = m_sg.drop_right(eq.m_rhs, rhs_toks.size() - split_rhs);
euf::snode* rhs_suffix = m_sg.drop_left(eq.m_rhs, split_rhs);
// Build the two new equations, incorporating a padding variable if needed.
euf::snode* eq1_lhs = lhs_prefix;
euf::snode* eq1_rhs = rhs_prefix;
euf::snode* eq2_lhs = lhs_suffix;
euf::snode* eq2_rhs = rhs_suffix;
euf::snode* pad = nullptr;
if (padding != 0) {
pad = mk_fresh_var();
if (padding > 0) {
// LHS prefix is longer by |padding| constants.
// Prepend pad to RHS prefix, append pad to LHS suffix.
eq1_rhs = m_sg.mk_concat(pad, rhs_prefix);
eq2_lhs = m_sg.mk_concat(lhs_suffix, pad);
} else {
// RHS prefix is longer by |padding| constants.
// Append pad to LHS prefix, prepend pad to RHS suffix.
eq1_lhs = m_sg.mk_concat(lhs_prefix, pad);
eq2_rhs = m_sg.mk_concat(pad, rhs_suffix);
}
}
// Create single progress child.
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
// Remove the original equation and add the two new ones.
auto& eqs = child->str_eqs();
eqs[eq_idx] = eqs.back();
eqs.pop_back();
eqs.push_back(str_eq(eq1_lhs, eq1_rhs, eq.m_dep));
eqs.push_back(str_eq(eq2_lhs, eq2_rhs, eq.m_dep));
// Int constraints on the edge.
// 1) len(pad) = |padding| (if padding variable was created)
if (pad && pad->get_expr()) {
expr_ref len_pad(seq.str.mk_length(pad->get_expr()), m);
expr_ref abs_pad(arith.mk_int(std::abs(padding)), m);
e->add_side_int(mk_int_constraint(len_pad, abs_pad, int_constraint_kind::eq, eq.m_dep));
}
// 2) len(eq1_lhs) = len(eq1_rhs)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
expr_ref l1 = compute_length_expr(eq1_lhs);
expr_ref r1 = compute_length_expr(eq1_rhs);
e->add_side_int(mk_int_constraint(l1, r1, int_constraint_kind::eq, eq.m_dep));
}
// child 2: x → y·z_fresh (non-progress, x starts with y)
if (lhead->id() != rhead->id()) {
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(rhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(lhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// child 3: y → x·z_fresh (non-progress, y starts with x)
if (lhead->id() != rhead->id()) {
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(lhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(rhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
// 3) len(eq2_lhs) = len(eq2_rhs)
{
expr_ref l2 = compute_length_expr(eq2_lhs);
expr_ref r2 = compute_length_expr(eq2_rhs);
e->add_side_int(mk_int_constraint(l2, r2, int_constraint_kind::eq, eq.m_dep));
}
return true;
}
return false;
@ -1525,7 +1751,7 @@ namespace seq {
if (apply_const_num_unwinding(node))
return ++m_stats.m_mod_const_num_unwinding, true;
// Priority 5: EqSplit - var vs var (branching, 3 children)
// Priority 5: EqSplit - split regex-free equation into two (single progress child)
if (apply_eq_split(node))
return ++m_stats.m_mod_eq_split, true;
@ -1675,12 +1901,15 @@ namespace seq {
// -----------------------------------------------------------------------
// Modifier: apply_num_cmp
// For equations involving two power tokens u^m and u^n with the same base,
// branch on the numeric relationship: m < n vs n <= m.
// Approximated as: (1) replace u^m with ε, (2) replace u^n with ε.
// branch on the numeric relationship: m <= n vs n < m.
// Generates proper integer side constraints for each branch.
// mirrors ZIPT's NumCmpModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_num_cmp(nielsen_node* node) {
ast_manager& m = m_sg.get_manager();
arith_util arith(m);
// Look for two power tokens with the same base on opposite sides
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial()) continue;
@ -1693,21 +1922,35 @@ namespace seq {
// same base: compare the two powers
if (lhead->arg(0)->id() != rhead->arg(0)->id()) continue;
// Branch 1: m < n → peel from lhs power (replace lhead with ε)
// Get exponent expressions (m and n from u^m and u^n)
expr* exp_m = get_power_exponent(lhead);
expr* exp_n = get_power_exponent(rhead);
// Branch 1: m <= n → cancel u^m from both sides
// Side constraint: m <= n
// After cancellation: ε·A = u^(n-m)·B
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
if (exp_m && exp_n)
e->add_side_int(mk_int_constraint(exp_m, exp_n, int_constraint_kind::le, eq.m_dep));
}
// Branch 2: n <= m → peel from rhs power (replace rhead with ε)
// Branch 2: n < m → cancel u^n from both sides
// Side constraint: n < m, i.e., n + 1 <= m, i.e., m >= n + 1
// After cancellation: u^(m-n)·A = ε·B
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
if (exp_m && exp_n) {
expr_ref n_plus_1(arith.mk_add(exp_n, arith.mk_int(1)), m);
e->add_side_int(mk_int_constraint(exp_m, n_plus_1, int_constraint_kind::ge, eq.m_dep));
}
}
return true;
}
@ -1718,10 +1961,14 @@ namespace seq {
// Modifier: apply_const_num_unwinding
// For a power token u^n facing a constant (char) head,
// branch: (1) n = 0 → u^n = ε, (2) n >= 1 → peel one u from power.
// Generates integer side constraints for each branch.
// mirrors ZIPT's ConstNumUnwindingModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_const_num_unwinding(nielsen_node* node) {
ast_manager& m = m_sg.get_manager();
arith_util arith(m);
euf::snode* power = nullptr;
euf::snode* other_head = nullptr;
str_eq const* eq = nullptr;
@ -1730,18 +1977,25 @@ namespace seq {
SASSERT(power->is_power() && power->num_args() >= 1);
euf::snode* base = power->arg(0);
expr* exp_n = get_power_exponent(power);
expr* zero = arith.mk_int(0);
expr* one = arith.mk_int(1);
// Branch 1: n = 0 → replace power with ε (progress)
// Side constraint: n = 0
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
if (exp_n)
e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep));
}
// Branch 2: n >= 1 → peel one u: replace u^n with u · u^n'
// Approximated by prepending the base and keeping a fresh power variable
// Branch 2: n >= 1 → peel one u: replace u^n with u · u^(n-1)
// Side constraint: n >= 1
// Use fresh power variable for u^(n-1)
{
euf::snode* fresh_power = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(base, fresh_power);
@ -1750,6 +2004,8 @@ namespace seq {
nielsen_subst s(power, replacement, eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
if (exp_n)
e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep));
}
return true;
@ -1820,12 +2076,15 @@ namespace seq {
// Modifier: apply_gpower_intr
// Ground power introduction: for a variable x matched against a
// ground repeated pattern, introduce x = base^n · prefix with fresh n.
// Approximated: for each non-trivial equation with a variable head vs
// a ground concatenation, introduce power decomposition.
// Generates integer side constraint n >= 0 for the fresh exponent.
// mirrors ZIPT's GPowerIntrModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_gpower_intr(nielsen_node* node) {
ast_manager& m = m_sg.get_manager();
arith_util arith(m);
seq_util& seq = m_sg.get_seq_util();
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial()) continue;
if (!eq.m_lhs || !eq.m_rhs) continue;
@ -1866,9 +2125,22 @@ namespace seq {
if (repeat_len < 2) continue; // need at least 2 repetitions
// Introduce: x = base^n · fresh_suffix
// Branch with side constraint n >= 0
// Create fresh integer variable n for the exponent
expr_ref fresh_n = mk_fresh_int_var();
expr* base_expr = base_char->get_expr();
// Create the power snode: base^n
euf::snode* fresh_suffix = mk_fresh_var();
euf::snode* fresh_power = mk_fresh_var(); // represents base^n
euf::snode* fresh_power = nullptr;
if (base_expr) {
// Build the seq.power(base_str, n) expression and register it
expr_ref base_str(seq.str.mk_unit(base_expr), m);
expr_ref power_expr(seq.str.mk_power(base_str, fresh_n), m);
fresh_power = m_sg.mk(power_expr);
}
if (!fresh_power)
fresh_power = mk_fresh_var(); // fallback
euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix);
nielsen_node* child = mk_child(node);
@ -1877,6 +2149,19 @@ namespace seq {
e->add_subst(s);
child->apply_subst(m_sg, s);
// Side constraint: n >= 0
expr* zero = arith.mk_int(0);
e->add_side_int(mk_int_constraint(fresh_n, zero, int_constraint_kind::ge, eq.m_dep));
// Side constraint: len(fresh_suffix) < len(base) = 1
// This ensures the remainder is shorter than the base character
if (fresh_suffix->get_expr()) {
expr_ref len_suffix(seq.str.mk_length(fresh_suffix->get_expr()), m);
expr* one = arith.mk_int(1);
e->add_side_int(mk_int_constraint(len_suffix, one, int_constraint_kind::le, eq.m_dep));
e->add_side_int(mk_int_constraint(len_suffix, zero, int_constraint_kind::ge, eq.m_dep));
}
return true;
}
return false;
@ -1948,12 +2233,16 @@ namespace seq {
// Modifier: apply_power_split
// For a variable x facing a power token u^n, branch:
// (1) x = u^m · prefix(u) with m < n (bounded power prefix)
// (2) x = u^n · x (the variable extends past the full power)
// Approximated since we lack integer constraint infrastructure.
// (2) x = u^n · x' (the variable extends past the full power)
// Generates integer side constraints for the fresh exponent variables.
// mirrors ZIPT's PowerSplitModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_power_split(nielsen_node* node) {
ast_manager& m = m_sg.get_manager();
arith_util arith(m);
seq_util& seq = m_sg.get_seq_util();
euf::snode* power = nullptr;
euf::snode* var_head = nullptr;
str_eq const* eq = nullptr;
@ -1962,28 +2251,43 @@ namespace seq {
SASSERT(power->is_power() && power->num_args() >= 1);
euf::snode* base = power->arg(0);
expr* exp_n = get_power_exponent(power);
expr* zero = arith.mk_int(0);
// Branch 1: x = base^m · prefix where m < n
// Approximated: x = fresh_power_var · fresh_suffix
// Branch 1: x = base^m · prefix where 0 <= m < n
// Side constraints: m >= 0, m < n (i.e., n >= m + 1)
{
euf::snode* fresh_power = mk_fresh_var();
euf::snode* fresh_suffix = mk_fresh_var();
expr_ref fresh_m = mk_fresh_int_var();
euf::snode* fresh_power = mk_fresh_var(); // represents base^m
euf::snode* fresh_suffix = mk_fresh_var(); // represents prefix(base)
euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(var_head, replacement, eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
// m >= 0
e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq->m_dep));
// m < n ⟺ n >= m + 1
if (exp_n) {
expr_ref m_plus_1(arith.mk_add(fresh_m, arith.mk_int(1)), m);
e->add_side_int(mk_int_constraint(exp_n, m_plus_1, int_constraint_kind::ge, eq->m_dep));
}
}
// Branch 2: x = u^n · x (variable extends past full power, non-progress)
// Branch 2: x = u^n · x' (variable extends past full power, non-progress)
// Side constraint: n >= 0
{
euf::snode* replacement = m_sg.mk_concat(base, var_head);
euf::snode* fresh_tail = mk_fresh_var();
// Peel one base unit (approximation of extending past the power)
euf::snode* replacement = m_sg.mk_concat(base, fresh_tail);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(var_head, replacement, eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
if (exp_n)
e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::ge, eq->m_dep));
}
return true;
@ -1994,10 +2298,14 @@ namespace seq {
// For a power token u^n facing a variable, branch:
// (1) n = 0 → u^n = ε (replace power with empty)
// (2) n >= 1 → peel one u from the power
// Generates integer side constraints for each branch.
// mirrors ZIPT's VarNumUnwindingModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_var_num_unwinding(nielsen_node* node) {
ast_manager& m = m_sg.get_manager();
arith_util arith(m);
euf::snode* power = nullptr;
euf::snode* var_head = nullptr;
str_eq const* eq = nullptr;
@ -2006,18 +2314,24 @@ namespace seq {
SASSERT(power->is_power() && power->num_args() >= 1);
euf::snode* base = power->arg(0);
expr* exp_n = get_power_exponent(power);
expr* zero = arith.mk_int(0);
expr* one = arith.mk_int(1);
// Branch 1: n = 0 → replace u^n with ε (progress)
// Side constraint: n = 0
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
if (exp_n)
e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep));
}
// Branch 2: n >= 1 → peel one u: u^n → u · u^(n-1)
// Approximated: replace u^n with base · fresh_var
// Side constraint: n >= 1
{
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(base, fresh);
@ -2026,6 +2340,8 @@ namespace seq {
nielsen_subst s(power, replacement, eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
if (exp_n)
e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep));
}
return true;
@ -2173,5 +2489,261 @@ namespace seq {
}
}
// -----------------------------------------------------------------------
// int_constraint display
// -----------------------------------------------------------------------
std::ostream& int_constraint::display(std::ostream& out) const {
if (m_lhs) out << mk_pp(m_lhs, m_lhs.get_manager());
else out << "null";
switch (m_kind) {
case int_constraint_kind::eq: out << " = "; break;
case int_constraint_kind::le: out << " <= "; break;
case int_constraint_kind::ge: out << " >= "; break;
}
if (m_rhs) out << mk_pp(m_rhs, m_rhs.get_manager());
else out << "null";
return out;
}
// -----------------------------------------------------------------------
// LP integer subsolver implementation
// Replaces ZIPT's SubSolver with Z3's native lp::lar_solver.
// -----------------------------------------------------------------------
void nielsen_graph::lp_solver_reset() {
m_lp_solver = alloc(lp::lar_solver);
m_expr2lpvar.reset();
m_lp_ext_cnt = 0;
}
lp::lpvar nielsen_graph::lp_ensure_var(expr* e) {
if (!e) return lp::null_lpvar;
ast_manager& m = m_sg.get_manager();
arith_util arith(m);
// If already mapped, return cached
unsigned eid = e->get_id();
lp::lpvar j;
if (m_expr2lpvar.find(eid, j))
return j;
// Check if this is a numeral — shouldn't be called with a numeral directly,
// but handle it gracefully by creating a fixed variable
rational val;
if (arith.is_numeral(e, val)) {
j = m_lp_solver->add_var(m_lp_ext_cnt++, true);
m_lp_solver->add_var_bound(j, lp::EQ, lp::mpq(val));
m_expr2lpvar.insert(eid, j);
return j;
}
// Decompose linear arithmetic: a + b → create LP term (1*a + 1*b)
expr* e1 = nullptr;
expr* e2 = nullptr;
if (arith.is_add(e, e1, e2)) {
lp::lpvar j1 = lp_ensure_var(e1);
lp::lpvar j2 = lp_ensure_var(e2);
if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar;
vector<std::pair<lp::mpq, lp::lpvar>> coeffs;
coeffs.push_back({lp::mpq(1), j1});
coeffs.push_back({lp::mpq(1), j2});
j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++);
m_expr2lpvar.insert(eid, j);
return j;
}
// Decompose: a - b → create LP term (1*a + (-1)*b)
if (arith.is_sub(e, e1, e2)) {
lp::lpvar j1 = lp_ensure_var(e1);
lp::lpvar j2 = lp_ensure_var(e2);
if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar;
vector<std::pair<lp::mpq, lp::lpvar>> coeffs;
coeffs.push_back({lp::mpq(1), j1});
coeffs.push_back({lp::mpq(-1), j2});
j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++);
m_expr2lpvar.insert(eid, j);
return j;
}
// Decompose: c * x where c is numeral → create LP term (c*x)
if (arith.is_mul(e, e1, e2)) {
rational coeff;
if (arith.is_numeral(e1, coeff)) {
lp::lpvar jx = lp_ensure_var(e2);
if (jx == lp::null_lpvar) return lp::null_lpvar;
vector<std::pair<lp::mpq, lp::lpvar>> coeffs;
coeffs.push_back({lp::mpq(coeff), jx});
j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++);
m_expr2lpvar.insert(eid, j);
return j;
}
if (arith.is_numeral(e2, coeff)) {
lp::lpvar jx = lp_ensure_var(e1);
if (jx == lp::null_lpvar) return lp::null_lpvar;
vector<std::pair<lp::mpq, lp::lpvar>> coeffs;
coeffs.push_back({lp::mpq(coeff), jx});
j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++);
m_expr2lpvar.insert(eid, j);
return j;
}
}
// Decompose: -x → create LP term (-1*x)
if (arith.is_uminus(e, e1)) {
lp::lpvar jx = lp_ensure_var(e1);
if (jx == lp::null_lpvar) return lp::null_lpvar;
vector<std::pair<lp::mpq, lp::lpvar>> coeffs;
coeffs.push_back({lp::mpq(-1), jx});
j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++);
m_expr2lpvar.insert(eid, j);
return j;
}
// Leaf expression (variable, length, or other opaque term): map to fresh LP var
j = m_lp_solver->add_var(m_lp_ext_cnt++, true /* is_integer */);
m_expr2lpvar.insert(eid, j);
return j;
}
void nielsen_graph::lp_add_constraint(int_constraint const& ic) {
if (!m_lp_solver || !ic.m_lhs || !ic.m_rhs)
return;
ast_manager& m = m_sg.get_manager();
arith_util arith(m);
// We handle simple patterns:
// expr <kind> numeral → add_var_bound
// expr <kind> expr → create term (lhs - rhs) and bound to 0
rational val;
bool is_num_rhs = arith.is_numeral(ic.m_rhs, val);
if (is_num_rhs && (is_app(ic.m_lhs))) {
// Simple case: single variable or len(x) vs numeral
lp::lpvar j = lp_ensure_var(ic.m_lhs);
if (j == lp::null_lpvar) return;
lp::mpq rhs_val(val);
switch (ic.m_kind) {
case int_constraint_kind::eq:
m_lp_solver->add_var_bound(j, lp::EQ, rhs_val);
break;
case int_constraint_kind::le:
m_lp_solver->add_var_bound(j, lp::LE, rhs_val);
break;
case int_constraint_kind::ge:
m_lp_solver->add_var_bound(j, lp::GE, rhs_val);
break;
}
return;
}
// General case: create term lhs - rhs and bound it
// For now, handle single variable on LHS vs single variable on RHS
// (covers the common case of len(x) = len(y), n >= 0, etc.)
bool is_num_lhs = arith.is_numeral(ic.m_lhs, val);
if (is_num_lhs) {
// numeral <kind> expr → reverse and flip kind
lp::lpvar j = lp_ensure_var(ic.m_rhs);
if (j == lp::null_lpvar) return;
lp::mpq lhs_val(val);
switch (ic.m_kind) {
case int_constraint_kind::eq:
m_lp_solver->add_var_bound(j, lp::EQ, lhs_val);
break;
case int_constraint_kind::le:
// val <= rhs ⇔ rhs >= val
m_lp_solver->add_var_bound(j, lp::GE, lhs_val);
break;
case int_constraint_kind::ge:
// val >= rhs ⇔ rhs <= val
m_lp_solver->add_var_bound(j, lp::LE, lhs_val);
break;
}
return;
}
// Both sides are expressions: create term (lhs - rhs) with bound 0
lp::lpvar j_lhs = lp_ensure_var(ic.m_lhs);
lp::lpvar j_rhs = lp_ensure_var(ic.m_rhs);
if (j_lhs == lp::null_lpvar || j_rhs == lp::null_lpvar) return;
// Create a term: 1*lhs + (-1)*rhs
vector<std::pair<lp::mpq, lp::lpvar>> coeffs;
coeffs.push_back({lp::mpq(1), j_lhs});
coeffs.push_back({lp::mpq(-1), j_rhs});
lp::lpvar term_j = m_lp_solver->add_term(coeffs, m_lp_ext_cnt++);
lp::mpq zero(0);
switch (ic.m_kind) {
case int_constraint_kind::eq:
m_lp_solver->add_var_bound(term_j, lp::EQ, zero);
break;
case int_constraint_kind::le:
m_lp_solver->add_var_bound(term_j, lp::LE, zero);
break;
case int_constraint_kind::ge:
m_lp_solver->add_var_bound(term_j, lp::GE, zero);
break;
}
}
void nielsen_graph::collect_path_int_constraints(nielsen_node* node,
svector<nielsen_edge*> const& cur_path,
vector<int_constraint>& out) {
// collect from root node
if (m_root) {
for (auto const& ic : m_root->int_constraints())
out.push_back(ic);
}
// collect from edges along the path and their target nodes
for (nielsen_edge* e : cur_path) {
for (auto const& ic : e->side_int())
out.push_back(ic);
if (e->tgt()) {
for (auto const& ic : e->tgt()->int_constraints())
out.push_back(ic);
}
}
}
bool nielsen_graph::check_int_feasibility(nielsen_node* node, svector<nielsen_edge*> const& cur_path) {
vector<int_constraint> constraints;
collect_path_int_constraints(node, cur_path, constraints);
if (constraints.empty())
return true; // no integer constraints, trivially feasible
lp_solver_reset();
for (auto const& ic : constraints)
lp_add_constraint(ic);
lp::lp_status status = m_lp_solver->find_feasible_solution();
return status != lp::lp_status::INFEASIBLE;
}
int_constraint nielsen_graph::mk_int_constraint(expr* lhs, expr* rhs,
int_constraint_kind kind,
dep_tracker const& dep) {
return int_constraint(lhs, rhs, kind, dep, m_sg.get_manager());
}
expr* nielsen_graph::get_power_exponent(euf::snode* power) {
if (!power || !power->is_power()) return nullptr;
if (power->num_args() < 2) return nullptr;
euf::snode* exp_snode = power->arg(1);
return exp_snode ? exp_snode->get_expr() : nullptr;
}
expr_ref nielsen_graph::mk_fresh_int_var() {
ast_manager& m = m_sg.get_manager();
arith_util arith(m);
std::string name = "n!" + std::to_string(m_fresh_cnt++);
return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m);
}
}

View file

@ -237,6 +237,7 @@ Author:
#include "ast/arith_decl_plugin.h"
#include "ast/seq_decl_plugin.h"
#include "ast/euf/euf_sgraph.h"
#include "math/lp/lar_solver.h"
namespace seq {
@ -480,6 +481,34 @@ namespace seq {
m_expr(e, m), m_dep(dep), m_kind(kind) {}
};
// -----------------------------------------------
// integer constraint: equality or inequality over length expressions
// mirrors ZIPT's IntEq and IntLe
// -----------------------------------------------
enum class int_constraint_kind {
eq, // lhs = rhs
le, // lhs <= rhs
ge, // lhs >= rhs
};
// integer constraint stored per nielsen_node, tracking arithmetic
// relationships between length variables and power exponents.
// mirrors ZIPT's IntEq / IntLe over Presburger arithmetic polynomials.
struct int_constraint {
expr_ref m_lhs; // left-hand side (arithmetic expression)
expr_ref m_rhs; // right-hand side (arithmetic expression)
int_constraint_kind m_kind; // eq, le, or ge
dep_tracker m_dep; // tracks which input constraints contributed
int_constraint(ast_manager& m):
m_lhs(m), m_rhs(m), m_kind(int_constraint_kind::eq) {}
int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep, ast_manager& m):
m_lhs(lhs, m), m_rhs(rhs, m), m_kind(kind), m_dep(dep) {}
std::ostream& display(std::ostream& out) const;
};
// edge in the Nielsen graph connecting two nodes
// mirrors ZIPT's NielsenEdge
class nielsen_edge {
@ -489,6 +518,7 @@ namespace seq {
vector<char_subst> m_char_subst; // character-level substitutions (mirrors ZIPT's SubstC)
ptr_vector<str_eq> m_side_str_eq; // side constraints: string equalities
ptr_vector<str_mem> m_side_str_mem; // side constraints: regex memberships
vector<int_constraint> m_side_int; // side constraints: integer equalities/inequalities
bool m_is_progress; // does this edge represent progress?
public:
nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress);
@ -506,9 +536,11 @@ namespace seq {
void add_side_str_eq(str_eq* eq) { m_side_str_eq.push_back(eq); }
void add_side_str_mem(str_mem* mem) { m_side_str_mem.push_back(mem); }
void add_side_int(int_constraint const& ic) { m_side_int.push_back(ic); }
ptr_vector<str_eq> const& side_str_eq() const { return m_side_str_eq; }
ptr_vector<str_mem> const& side_str_mem() const { return m_side_str_mem; }
vector<int_constraint> const& side_int() const { return m_side_int; }
bool is_progress() const { return m_is_progress; }
@ -528,6 +560,7 @@ namespace seq {
// constraints at this node
vector<str_eq> m_str_eq; // string equalities
vector<str_mem> m_str_mem; // regex memberships
vector<int_constraint> m_int_constraints; // integer equalities/inequalities (mirrors ZIPT's IntEq/IntLe)
// character constraints (mirrors ZIPT's DisEqualities and CharRanges)
// key: snode id of the s_unit symbolic character
@ -562,6 +595,10 @@ namespace seq {
void add_str_eq(str_eq const& eq) { m_str_eq.push_back(eq); }
void add_str_mem(str_mem const& mem) { m_str_mem.push_back(mem); }
void add_int_constraint(int_constraint const& ic) { m_int_constraints.push_back(ic); }
vector<int_constraint> const& int_constraints() const { return m_int_constraints; }
vector<int_constraint>& int_constraints() { return m_int_constraints; }
// character constraint access (mirrors ZIPT's DisEqualities / CharRanges)
u_map<ptr_vector<euf::snode>> const& char_diseqs() const { return m_char_diseqs; }
@ -687,6 +724,15 @@ namespace seq {
unsigned m_num_input_mems = 0;
nielsen_stats m_stats;
// -----------------------------------------------
// Integer subsolver using lp::lar_solver
// Replaces ZIPT's SubSolver (auxiliary Z3 instance)
// with Z3's native LP solver for integer feasibility.
// -----------------------------------------------
scoped_ptr<lp::lar_solver> m_lp_solver;
u_map<lp::lpvar> m_expr2lpvar; // maps expr id → LP variable
unsigned m_lp_ext_cnt = 0; // external variable counter for LP solver
public:
nielsen_graph(euf::sgraph& sg);
~nielsen_graph();
@ -791,9 +837,28 @@ namespace seq {
// variable Nielsen modifier: var vs var, all progress (3 branches)
bool apply_var_nielsen(nielsen_node* node);
// eq split modifier: var vs var (3 branches)
// eq split modifier: splits a regex-free equation at a chosen index into
// two shorter equalities with optional padding (single progress child).
// Mirrors ZIPT's EqSplitModifier + StrEq.SplitEq.
bool apply_eq_split(nielsen_node* node);
// helper: classify whether a token has variable (symbolic) length
// returns true for variables, powers, etc.; false for chars, units, string literals
bool token_has_variable_length(euf::snode* tok) const;
// helper: get the constant length of a token (only valid when !token_has_variable_length)
unsigned token_const_length(euf::snode* tok) const;
// helper: find a split point in a regex-free equation.
// ports ZIPT's StrEq.SplitEq algorithm.
// returns true if a valid split point is found, with results in out params.
bool find_eq_split_point(
euf::snode_vector const& lhs_toks,
euf::snode_vector const& rhs_toks,
unsigned& out_lhs_idx,
unsigned& out_rhs_idx,
int& out_padding) const;
// apply regex character split modifier to a node.
// for a str_mem constraint x·s ∈ R where x is a variable:
// (1) x → c·z for each char c accepted by R at first position
@ -864,6 +929,38 @@ namespace seq {
// uses seq_util::rex min_length/max_length on the underlying expression.
// max_len == UINT_MAX means unbounded.
void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);
// -----------------------------------------------
// LP integer subsolver methods
// -----------------------------------------------
// initialize the LP solver fresh for a feasibility check
void lp_solver_reset();
// get or create an LP variable for an arithmetic expression
lp::lpvar lp_ensure_var(expr* e);
// add an int_constraint to the LP solver
void lp_add_constraint(int_constraint const& ic);
// collect int_constraints along the path from root to the given node,
// including constraints from edges and nodes.
void collect_path_int_constraints(nielsen_node* node,
svector<nielsen_edge*> const& cur_path,
vector<int_constraint>& out);
// check integer feasibility of the constraints along the current path.
// returns true if feasible, false if infeasible.
bool check_int_feasibility(nielsen_node* node, svector<nielsen_edge*> const& cur_path);
// create an integer constraint: lhs <kind> rhs
int_constraint mk_int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep);
// get the exponent expression from a power snode (arg(1))
expr* get_power_exponent(euf::snode* power);
// create a fresh integer variable expression (for power exponents)
expr_ref mk_fresh_int_var();
};
}

View file

@ -463,7 +463,7 @@ static void test_backedge() {
SASSERT(root->backedge() == nullptr);
}
// test apply_eq_split: basic structure (x·A = y·B produces 3 children via eq_split)
// test var vs var basic structure (x·A = y·B now handled by var_nielsen, not eq_split)
static void test_eq_split_basic() {
std::cout << "test_eq_split_basic\n";
ast_manager m;
@ -480,21 +480,20 @@ static void test_eq_split_basic() {
euf::snode* xa = sg.mk_concat(x, a);
euf::snode* yb = sg.mk_concat(y, b);
// x·A = y·B
// x·A = y·B — eq_split returns false (no valid split point),
// falls through to var_nielsen (priority 12) → 3 progress children
ng.add_str_eq(xa, yb);
seq::nielsen_node* root = ng.root();
// eq_split fires: both heads are distinct vars
// produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress)
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 3);
// first child is progress (x→ε), rest are non-progress (eq_split)
// all children are progress (var_nielsen marks all as progress)
SASSERT(root->outgoing()[0]->is_progress());
}
// test eq_split with solve: x·y = z·w is satisfiable (all vars can be ε)
// test var vs var with solve: x·y = z·w is satisfiable (all vars can be ε)
static void test_eq_split_solve_sat() {
std::cout << "test_eq_split_solve_sat\n";
ast_manager m;
@ -516,7 +515,7 @@ static void test_eq_split_solve_sat() {
SASSERT(result == seq::nielsen_graph::search_result::sat);
}
// test eq_split with solve: x·A = y·B is unsat (last char mismatch)
// test var vs var with solve: x·A = y·B is unsat (last char mismatch)
static void test_eq_split_solve_unsat() {
std::cout << "test_eq_split_solve_unsat\n";
ast_manager m;
@ -538,7 +537,7 @@ static void test_eq_split_solve_unsat() {
SASSERT(result == seq::nielsen_graph::search_result::unsat);
}
// test eq_split: same var x·A = x·B triggers det modifier (cancel), not eq_split
// test: same var x·A = x·B triggers det modifier (cancel), not eq_split or var_nielsen
static void test_eq_split_same_var_det() {
std::cout << "test_eq_split_same_var_det\n";
ast_manager m;
@ -560,7 +559,7 @@ static void test_eq_split_same_var_det() {
SASSERT(result == seq::nielsen_graph::search_result::unsat);
}
// test eq_split: x·y·A = y·x·A is commutation, should be sat (x=y=ε)
// test: x·y·A = y·x·A is commutation, should be sat (x=y=ε)
static void test_eq_split_commutation_sat() {
std::cout << "test_eq_split_commutation_sat\n";
ast_manager m;
@ -582,6 +581,7 @@ static void test_eq_split_commutation_sat() {
}
// test apply_const_nielsen: char·A = y produces 2 children (y→ε, y→char·fresh)
// test: A = y is handled by det modifier (variable definition: y → A), producing 1 child
static void test_const_nielsen_char_var() {
std::cout << "test_const_nielsen_char_var\n";
ast_manager m;
@ -593,19 +593,18 @@ static void test_const_nielsen_char_var() {
euf::snode* a = sg.mk_char('A');
euf::snode* y = sg.mk_var(symbol("y"));
// A = y (char vs var)
// A = y (single var definition → det modifier fires)
ng.add_str_eq(a, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 2);
// both branches are progress
// det modifier: y → A (1 progress child)
SASSERT(root->outgoing().size() == 1);
SASSERT(root->outgoing()[0]->is_progress());
SASSERT(root->outgoing()[1]->is_progress());
}
// test apply_const_nielsen: x = B·y produces 2 children (x→ε, x→B·fresh)
// test: x = B·y is handled by det modifier (variable definition: x → B·y), producing 1 child
static void test_const_nielsen_var_char() {
std::cout << "test_const_nielsen_var_char\n";
ast_manager m;
@ -619,15 +618,15 @@ static void test_const_nielsen_var_char() {
euf::snode* b = sg.mk_char('B');
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* by = sg.mk_concat(b, y);
// x = B·y
// x = B·y (single var definition → det modifier fires)
ng.add_str_eq(x, by);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 2);
// det modifier: x → B·y (1 progress child)
SASSERT(root->outgoing().size() == 1);
SASSERT(root->outgoing()[0]->is_progress());
SASSERT(root->outgoing()[1]->is_progress());
}
// test const_nielsen solve: A·x = A·B → sat (x = B via det cancel then const_nielsen x→ε or x→B·fresh)
@ -673,7 +672,7 @@ static void test_const_nielsen_solve_unsat() {
SASSERT(result == seq::nielsen_graph::search_result::unsat);
}
// test const_nielsen priority: A·x = y·B → const_nielsen (2 children), not eq_split (3)
// test const_nielsen priority: A·x = y·B → const_nielsen (2 children), not var_nielsen (3)
static void test_const_nielsen_priority_over_eq_split() {
std::cout << "test_const_nielsen_priority_over_eq_split\n";
ast_manager m;
@ -696,11 +695,11 @@ static void test_const_nielsen_priority_over_eq_split() {
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// const_nielsen produces 2 children, not eq_split's 3
// const_nielsen produces 2 children, not var_nielsen's 3
SASSERT(root->outgoing().size() == 2);
}
// test: both sides start with vars → eq_split (3 children), not const_nielsen
// test: both sides start with vars → var_nielsen (3 children), not const_nielsen
static void test_const_nielsen_not_applicable_both_vars() {
std::cout << "test_const_nielsen_not_applicable_both_vars\n";
ast_manager m;
@ -717,7 +716,7 @@ static void test_const_nielsen_not_applicable_both_vars() {
euf::snode* xa = sg.mk_concat(x, a);
euf::snode* yb = sg.mk_concat(y, b);
// x·A = y·B → both heads are vars → eq_split
// x·A = y·B → both heads are vars → var_nielsen fires (priority 12)
ng.add_str_eq(xa, yb);
seq::nielsen_node* root = ng.root();
@ -976,8 +975,8 @@ static void test_regex_char_split_ground_skip() {
// Variable Nielsen modifier tests
// -----------------------------------------------------------------------
// test var_nielsen basic: x = y (two distinct vars) → eq_split fires (priority 5 < 12)
// produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress)
// test var_nielsen basic: x = y (two distinct vars) → det modifier fires (variable definition x → y)
// produces 1 progress child
static void test_var_nielsen_basic() {
std::cout << "test_var_nielsen_basic\n";
ast_manager m;
@ -990,13 +989,13 @@ static void test_var_nielsen_basic() {
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
// x = y → eq_split: x→ε, x→y·z_fresh, y→x·z_fresh
// x = y → det: x → y (single var definition)
ng.add_str_eq(x, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 3);
SASSERT(root->outgoing().size() == 1);
SASSERT(root->outgoing()[0]->is_progress());
}
@ -1022,7 +1021,7 @@ static void test_var_nielsen_same_var_det() {
SASSERT(result == seq::nielsen_graph::search_result::unsat);
}
// test var_nielsen: char vs var → const_nielsen fires, not var_nielsen
// test var_nielsen: char vs var → det fires (y → A), not var_nielsen
static void test_var_nielsen_not_applicable_char() {
std::cout << "test_var_nielsen_not_applicable_char\n";
ast_manager m;
@ -1035,13 +1034,13 @@ static void test_var_nielsen_not_applicable_char() {
euf::snode* a = sg.mk_char('A');
euf::snode* y = sg.mk_var(symbol("y"));
// A = y → const_nielsen (2 children), not var_nielsen (3)
// A = y → det: y → A (variable definition, 1 child)
ng.add_str_eq(a, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 2);
SASSERT(root->outgoing().size() == 1);
}
// test var_nielsen solve: x·y = z·w is sat (all vars can be ε)
@ -1109,8 +1108,8 @@ static void test_var_nielsen_commutation_sat() {
SASSERT(result == seq::nielsen_graph::search_result::sat);
}
// test var_nielsen priority: var vs var → eq_split fires first (priority 5 < 12)
// eq_split produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress)
// test var_nielsen priority: var vs var → det fires first for x = y (variable definition)
// var_nielsen only fires when neither side is a single var (e.g., x·A = y·B)
static void test_var_nielsen_priority() {
std::cout << "test_var_nielsen_priority\n";
ast_manager m;
@ -1128,14 +1127,14 @@ static void test_var_nielsen_priority() {
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// eq_split produces 3 children
SASSERT(root->outgoing().size() == 3);
// first edge is progress (x→ε), others are non-progress
// det modifier: x → y (1 progress child)
SASSERT(root->outgoing().size() == 1);
// first edge is progress (all var_nielsen children are progress)
SASSERT(root->outgoing()[0]->is_progress());
}
// test generate_extensions: det modifier has priority over const_nielsen
// x·A = x·y → det cancel (1 child), not const_nielsen (2 children)
// test generate_extensions: det modifier handles same-head cancel after simplification
// x·A = x·y → simplify cancels prefix x → A = y → det fires (y → A)
static void test_generate_extensions_det_priority() {
std::cout << "test_generate_extensions_det_priority\n";
ast_manager m;
@ -1151,15 +1150,10 @@ static void test_generate_extensions_det_priority() {
euf::snode* xa = sg.mk_concat(x, a);
euf::snode* xy = sg.mk_concat(x, y);
// x·A = x·y → same-head x cancel → A = y (1 child via det)
// x·A = x·y → after simplify, becomes A = y → det: y → A
ng.add_str_eq(xa, xy);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// det modifier produces 1 child (head cancel), not 2 (const) or 3 (var)
SASSERT(root->outgoing().size() == 1);
SASSERT(root->outgoing()[0]->is_progress());
auto result = ng.solve();
SASSERT(result == seq::nielsen_graph::search_result::sat);
}
// test generate_extensions: returns false when no modifier applies
@ -1217,8 +1211,7 @@ static void test_generate_extensions_regex_only() {
SASSERT(root->outgoing().size() >= 1);
}
// test generate_extensions: mixed constraints, det fires first
// x·A = x·B and y ∈ R → det cancel on eq first, regex untouched
// test: mixed constraints, x·A = x·B and y ∈ R → after simplify, A = B clash → unsat
static void test_generate_extensions_mixed_det_first() {
std::cout << "test_generate_extensions_mixed_det_first\n";
ast_manager m;
@ -1242,14 +1235,11 @@ static void test_generate_extensions_mixed_det_first() {
expr_ref to_re_a(seq.re.mk_to_re(unit_a), m);
euf::snode* re_node = sg.mk(to_re_a);
// x·A = x·B → simplify cancels x → A = B → clash → unsat
ng.add_str_eq(xa, xb);
ng.add_str_mem(y, re_node);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// det modifier (same-head x cancel) produces 1 child
SASSERT(root->outgoing().size() == 1);
auto result = ng.solve();
SASSERT(result == seq::nielsen_graph::search_result::unsat);
}
// -----------------------------------------------------------------------
@ -1827,7 +1817,7 @@ static void test_simplify_multiple_eqs() {
// Modifier child state verification tests
// -----------------------------------------------------------------------
// test det cancel: verify child has the tail equality
// test det cancel: x·A = x·B → simplify cancels prefix x → A = B → clash → unsat
static void test_det_cancel_child_eq() {
std::cout << "test_det_cancel_child_eq\n";
ast_manager m;
@ -1842,22 +1832,14 @@ static void test_det_cancel_child_eq() {
euf::snode* xa = sg.mk_concat(x, a);
euf::snode* xb = sg.mk_concat(x, b);
// x·A = x·B → det same-head cancel → child has A = B
// x·A = x·B → simplify cancels x → A = B → clash → unsat
ng.add_str_eq(xa, xb);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 1);
seq::nielsen_node* child = root->outgoing()[0]->tgt();
SASSERT(child->str_eqs().size() == 1);
auto const& ceq = child->str_eqs()[0];
// child's eq should have the two single chars (sorted)
SASSERT((ceq.m_lhs == a && ceq.m_rhs == b) || (ceq.m_lhs == b && ceq.m_rhs == a));
auto result = ng.solve();
SASSERT(result == seq::nielsen_graph::search_result::unsat);
}
// test const_nielsen: verify children's substitutions target the variable
// A·x = y·B → char vs var: const_nielsen fires (2 children, both substitute y)
static void test_const_nielsen_child_substitutions() {
std::cout << "test_const_nielsen_child_substitutions\n";
ast_manager m;
@ -1867,10 +1849,14 @@ static void test_const_nielsen_child_substitutions() {
seq::nielsen_graph ng(sg);
euf::snode* a = sg.mk_char('A');
euf::snode* b = sg.mk_char('B');
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* ax = sg.mk_concat(a, x);
euf::snode* yb = sg.mk_concat(y, b);
// A = y → const_nielsen → 2 children, both substitute y
ng.add_str_eq(a, y);
// A·x = y·B → const_nielsen: 2 children, both substitute y
ng.add_str_eq(ax, yb);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
@ -1889,7 +1875,7 @@ static void test_const_nielsen_child_substitutions() {
SASSERT(!root->outgoing()[1]->subst()[0].m_replacement->is_empty());
}
// test var_nielsen: verify substitution structure of 3 children
// test var_nielsen: verify substitution structure — det fires for x = y (single var def)
static void test_var_nielsen_substitution_types() {
std::cout << "test_var_nielsen_substitution_types\n";
ast_manager m;
@ -1901,29 +1887,17 @@ static void test_var_nielsen_substitution_types() {
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
// x = y → var_nielsen: 3 children
// x = y → det: x → y (single var definition, 1 child)
ng.add_str_eq(x, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 3);
SASSERT(root->outgoing().size() == 1);
// edge 0: elimination to ε
// edge 0: x → y substitution
SASSERT(root->outgoing()[0]->subst().size() == 1);
SASSERT(root->outgoing()[0]->subst()[0].m_replacement->is_empty());
SASSERT(root->outgoing()[0]->subst()[0].is_eliminating());
// edges 1,2: replacements are non-empty (concat with fresh var)
SASSERT(root->outgoing()[1]->subst().size() == 1);
SASSERT(!root->outgoing()[1]->subst()[0].m_replacement->is_empty());
SASSERT(root->outgoing()[2]->subst().size() == 1);
SASSERT(!root->outgoing()[2]->subst()[0].m_replacement->is_empty());
// edges 1 and 2 target different variables
SASSERT(root->outgoing()[1]->subst()[0].m_var !=
root->outgoing()[2]->subst()[0].m_var);
SASSERT(root->outgoing()[0]->is_progress());
}
// -----------------------------------------------------------------------
@ -2381,13 +2355,11 @@ static void test_power_epsilon_no_power() {
ng.add_str_eq(x, a);
seq::nielsen_node* root = ng.root();
// det fires (x = single char → const_nielsen fires eventually),
// but power_epsilon (priority 2) should not fire; det (priority 1) fires.
// det fires (x is single var, A doesn't contain x → x → A)
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// det catches x = A first (single token eq, but actually ConstNielsen fires):
// x is var, A is char → ConstNielsen: 2 children (x→ε, x→A)
SASSERT(root->outgoing().size() == 2);
// det: x → A (variable definition, 1 child)
SASSERT(root->outgoing().size() == 1);
}
// test_num_cmp_no_power: no same-base power pair → modifier returns false
@ -2409,8 +2381,8 @@ static void test_num_cmp_no_power() {
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// eq_split fires (priority 5): 3 children
SASSERT(root->outgoing().size() == 3);
// det fires (x → y, variable definition): 1 child
SASSERT(root->outgoing().size() == 1);
}
// test_star_intr_no_backedge: no backedge → modifier returns false
@ -2529,15 +2501,15 @@ static void test_gpower_intr_no_repeat() {
euf::snode* b = sg.mk_char('B');
euf::snode* ab = sg.mk_concat(a, b);
// x = AB → only 1 repeated 'A', needs >= 2
// x = AB → det fires (x is single var, AB doesn't contain x → x → AB)
ng.add_str_eq(x, ab);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// gpower_intr should NOT fire (< 2 repeats)
// const_nielsen (priority 8) fires for var vs char: 2 children
SASSERT(root->outgoing().size() == 2);
// det (priority 1) fires: x → AB, 1 child
SASSERT(root->outgoing().size() == 1);
}
// test_regex_var_split_basic: x ∈ re → uses minterms for splitting
@ -2593,13 +2565,14 @@ static void test_power_split_no_power() {
euf::snode* xa = sg.mk_concat(x, a);
// x·A = y: no power tokens, power_split should not fire
// det fires (y is single var, y ∉ vars(x·A) → y → x·A)
ng.add_str_eq(xa, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// eq_split or const_nielsen fires
SASSERT(root->outgoing().size() >= 2);
// det fires: 1 child (y → x·A)
SASSERT(root->outgoing().size() == 1);
}
// test_var_num_unwinding_no_power: no power tokens → modifier returns false
@ -2621,8 +2594,8 @@ static void test_var_num_unwinding_no_power() {
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// eq_split fires: 3 children
SASSERT(root->outgoing().size() == 3);
// det fires: 1 child (x → y)
SASSERT(root->outgoing().size() == 1);
}
// test_const_num_unwinding_no_power: no power vs const → modifier returns false
@ -2654,26 +2627,26 @@ static void test_priority_chain_order() {
ast_manager m;
reg_decl_plugins(m);
// Case 1: same-head cancel → Det (priority 1) fires
// Case 1: same-head cancel → simplify handles prefix cancel, then det/clash
// x·A = x·B → simplify: prefix cancel x → A = B → clash
// Use a non-clashing example: x·A = x·y → simplify: prefix cancel x → A = y → det: y → A
{
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq::nielsen_graph ng(sg);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* a = sg.mk_char('A');
euf::snode* b = sg.mk_char('B');
euf::snode* xa = sg.mk_concat(x, a);
euf::snode* xb = sg.mk_concat(x, b);
euf::snode* xy = sg.mk_concat(x, y);
ng.add_str_eq(xa, xb);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 1); // Det: single child (cancel)
ng.add_str_eq(xa, xy);
auto result = ng.solve();
SASSERT(result == seq::nielsen_graph::search_result::sat);
}
// Case 2: both vars different → EqSplit (priority 5) fires
// Case 2: both vars different → Det (priority 1) fires (variable definition x → y)
{
euf::egraph eg(m);
euf::sgraph sg(m, eg);
@ -2686,10 +2659,10 @@ static void test_priority_chain_order() {
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 3); // EqSplit: 3 children
SASSERT(root->outgoing().size() == 1); // Det: variable definition, 1 child
}
// Case 3: char vs var → ConstNielsen (priority 8) fires
// Case 3: char vs var → Det (priority 1) fires (variable definition y → A)
{
euf::egraph eg(m);
euf::sgraph sg(m, eg);
@ -2702,7 +2675,7 @@ static void test_priority_chain_order() {
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 2); // ConstNielsen: 2 children
SASSERT(root->outgoing().size() == 1); // Det: variable definition, 1 child
}
}