3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-17 18:43:45 +00:00

Merge branch 'c3' into copilot/add-parikh-filter-implementation-again

This commit is contained in:
Nikolaj Bjorner 2026-03-11 19:27:03 -07:00 committed by GitHub
commit 6fa3c7eabb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 1157 additions and 230 deletions

View file

@ -59,6 +59,10 @@ namespace smt {
void pop(unsigned num_scopes) override {
m_kernel.pop(num_scopes);
}
void get_model(model_ref& mdl) override {
m_kernel.get_model(mdl);
}
};
}

View file

@ -35,6 +35,8 @@ namespace smt {
m_var_values.reset();
m_var_regex.reset();
m_trail.reset();
m_int_model = nullptr;
m_mg = &mg;
m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model());
mg.register_factory(m_factory);
@ -42,12 +44,12 @@ namespace smt {
register_existing_values(nielsen);
collect_var_regex_constraints(state);
// solve integer constraints from the sat_path FIRST so that
// m_int_model is available when snode_to_value evaluates power exponents
nielsen.solve_sat_path_ints(m_int_model);
// extract variable assignments from the satisfying leaf's substitution path
seq::nielsen_node const* sat = nielsen.sat_node();
IF_VERBOSE(1, verbose_stream() << "nseq model init: sat_node=" << (sat ? "set" : "null")
<< " path_len=" << nielsen.sat_path().size() << "\n";);
extract_assignments(nielsen.sat_path());
IF_VERBOSE(1, verbose_stream() << "nseq model: m_var_values has " << m_var_values.size() << " entries\n";);
}
model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) {
@ -103,6 +105,8 @@ namespace smt {
m_var_values.reset();
m_var_regex.reset();
m_trail.reset();
m_int_model = nullptr;
m_mg = nullptr;
m_factory = nullptr;
}
@ -175,6 +179,68 @@ namespace smt {
return expr_ref(m);
}
if (n->is_power()) {
SASSERT(n->num_args() == 2);
// Evaluate the base and exponent to produce a concrete string.
// The base is a string snode; the exponent is an integer expression
// whose value comes from the sat_path integer model.
expr_ref base_val = snode_to_value(n->arg(0));
if (!base_val)
return expr_ref(m);
euf::snode* exp_snode = n->arg(1);
expr* exp_expr = exp_snode ? exp_snode->get_expr() : nullptr;
rational exp_val;
arith_util arith(m);
// Try to evaluate exponent: first check if it's a numeral,
// then try the int model from sat_path constraints,
// finally fall back to the proto_model from model_generator.
if (exp_expr && arith.is_numeral(exp_expr, exp_val)) {
// already concrete
} else if (exp_expr && m_int_model.get()) {
expr_ref result(m);
if (m_int_model->eval_expr(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
// evaluated from int model
} else if (m_mg) {
proto_model& pm = m_mg->get_model();
if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
// evaluated from proto_model
} else {
exp_val = rational(0);
}
} else {
exp_val = rational(0);
}
} else if (exp_expr && m_mg) {
expr_ref result(m);
proto_model& pm = m_mg->get_model();
if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
// evaluated from proto_model
} else {
exp_val = rational(0);
}
} else {
exp_val = rational(0);
}
if (exp_val.is_neg())
exp_val = rational(0);
// Build the repeated string: base^exp_val
if (exp_val.is_zero())
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
if (exp_val.is_one())
return base_val;
// For small exponents, concatenate directly
unsigned n_val = exp_val.get_unsigned();
expr_ref acc(base_val);
for (unsigned i = 1; i < n_val; ++i)
acc = m_seq.str.mk_concat(acc, base_val);
return acc;
}
// fallback: use the underlying expression
expr* e = n->get_expr();
return e ? expr_ref(e, m) : expr_ref(m);

View file

@ -61,6 +61,10 @@ namespace smt {
// trail for GC protection of generated expressions
expr_ref_vector m_trail;
// integer variable model from sat_path constraints
model_ref m_int_model;
model_generator* m_mg = nullptr;
// per-variable regex constraints: maps snode id -> intersected regex snode.
// collected during init() from the state's str_mem list.
u_map<euf::snode*> m_var_regex;

File diff suppressed because it is too large Load diff

View file

@ -241,6 +241,7 @@ Author:
#include "ast/seq_decl_plugin.h"
#include "ast/euf/euf_sgraph.h"
#include <functional>
#include "model/model.h"
namespace seq {
@ -265,6 +266,7 @@ namespace seq {
virtual void assert_expr(expr* e) = 0;
virtual void push() = 0;
virtual void pop(unsigned num_scopes) = 0;
virtual void get_model(model_ref& mdl) { mdl = nullptr; }
};
// simplification result for constraint processing
@ -373,6 +375,7 @@ namespace seq {
};
// string variable substitution: var -> replacement
// (can be used as well to substitute arbitrary nodes - like powers)
// mirrors ZIPT's Subst
struct nielsen_subst {
euf::snode* m_var;
@ -523,6 +526,11 @@ namespace seq {
// Parikh filter: set to true once apply_parikh_to_node has been applied
// to this node. Prevents duplicate constraint generation across DFS runs.
bool m_parikh_applied = false;
// number of int_constraints inherited from the parent node at clone time.
// int_constraints[0..m_parent_ic_count) are already asserted at the
// parent's solver scope; only [m_parent_ic_count..end) need to be
// asserted when this node's solver scope is entered.
unsigned m_parent_ic_count = 0;
public:
nielsen_node(nielsen_graph* graph, unsigned id);
@ -618,8 +626,10 @@ namespace seq {
void apply_subst(euf::sgraph& sg, nielsen_subst const& s);
// simplify all constraints at this node and initialize status.
// cur_path provides the path from root to this node so that the
// LP solver can be queried for deterministic power cancellation.
// Returns proceed, conflict, satisfied, or restart.
simplify_result simplify_and_init(nielsen_graph& g);
simplify_result simplify_and_init(nielsen_graph& g, svector<nielsen_edge*> const& cur_path = svector<nielsen_edge*>());
// true if all str_eqs are trivial and there are no str_mems
bool is_satisfied() const;
@ -665,11 +675,13 @@ namespace seq {
unsigned m_num_simplify_conflict = 0;
unsigned m_num_extensions = 0;
unsigned m_num_fresh_vars = 0;
unsigned m_num_arith_infeasible = 0;
unsigned m_max_depth = 0;
// modifier application counts
unsigned m_mod_det = 0;
unsigned m_mod_power_epsilon = 0;
unsigned m_mod_num_cmp = 0;
unsigned m_mod_split_power_elim = 0;
unsigned m_mod_const_num_unwinding = 0;
unsigned m_mod_eq_split = 0;
unsigned m_mod_star_intr = 0;
@ -686,6 +698,7 @@ namespace seq {
// the overall Nielsen transformation graph
// mirrors ZIPT's NielsenGraph
class nielsen_graph {
friend class nielsen_node;
euf::sgraph& m_sg;
region m_region;
ptr_vector<nielsen_node> m_nodes;
@ -822,6 +835,13 @@ namespace seq {
// max_len == UINT_MAX means unbounded.
void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);
// solve all integer constraints along the sat_path and return
// a model mapping integer variables to concrete values.
// Must be called after solve() returns sat.
// Returns true if a satisfying model was found.
// Caller takes ownership of the returned model pointer.
bool solve_sat_path_ints(model_ref& mdl);
private:
search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);
@ -890,6 +910,15 @@ namespace seq {
// mirrors ZIPT's NumCmpModifier
bool apply_num_cmp(nielsen_node* node);
// CommPower-based power elimination split: when one side starts with
// a power w^p and CommPower finds c base-pattern occurrences on the
// other side but the ordering between p and c is unknown, branch:
// (1) p < c (2) c ≤ p
// After branching, simplify_and_init's CommPower pass resolves the
// cancellation deterministically.
// mirrors ZIPT's SplitPowerElim + NumCmpModifier
bool apply_split_power_elim(nielsen_node* node);
// constant numeric unwinding: for a power token u^n vs a constant
// (non-variable), branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u).
// mirrors ZIPT's ConstNumUnwindingModifier
@ -933,7 +962,7 @@ namespace seq {
euf::snode* find_power_token(nielsen_node* node) const;
// find a power token facing a constant (char) head
bool find_power_vs_const(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;
bool find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;
// find a power token facing a variable head
bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const;
@ -949,16 +978,26 @@ namespace seq {
// Mirrors ZIPT's Constraint.Shared forwarding mechanism.
void assert_root_constraints_to_solver();
// collect int_constraints along the path from root to the given node,
// including constraints from edges and nodes.
void collect_path_int_constraints(nielsen_node* node,
svector<nielsen_edge*> const& cur_path,
vector<int_constraint>& out);
// Assert the int_constraints of `node` that are new relative to its
// parent (indices [m_parent_ic_count..end)) into the current solver scope.
// Called by search_dfs after simplify_and_init so that the newly derived
// bounds become visible to subsequent check() and check_lp_le() calls.
void assert_node_new_int_constraints(nielsen_node* node);
// check integer feasibility of the constraints along the current path.
// returns true if feasible, false if infeasible.
// returns true if feasible (including unknown), false only if l_false.
// Precondition: all path constraints have been incrementally asserted to
// m_solver by search_dfs via push/pop, so a plain check() suffices.
// l_undef (resource limit / timeout) is treated as feasible so that the
// search continues rather than reporting a false unsatisfiability.
bool check_int_feasibility(nielsen_node* node, svector<nielsen_edge*> const& cur_path);
// check whether lhs <= rhs is implied by the path constraints.
// mirrors ZIPT's NielsenNode.IsLe(): temporarily asserts NOT(lhs <= rhs)
// and returns true iff the result is unsatisfiable (i.e., lhs <= rhs is
// entailed). Path constraints are already in the solver incrementally.
bool check_lp_le(expr* lhs, expr* rhs, nielsen_node* node, svector<nielsen_edge*> const& cur_path);
// create an integer constraint: lhs <kind> rhs
int_constraint mk_int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep);

View file

@ -598,75 +598,83 @@ static void test_zipt_parikh() {
static void test_tricky_str_equations() {
std::cout << "test_tricky_str_equations\n";
// --- SAT: commutativity / rotation / symmetry ---
// ---------------------------------------------------------------
// SAT — Conjugacy equations: Xw₂ = w₁X
// SAT iff w₂ is a rotation of w₁. In that case w₁ = qp, w₂ = pq
// and X = (qp)^n · q is a family of solutions (n ≥ 0).
// All of these are UNSAT for X = ε (the ground parts don't match),
// so the solver must find a non-trivial witness.
// ---------------------------------------------------------------
// XY = YX (classic commutativity; witness: X="ab", Y="abab")
VERIFY(eq_sat("XY", "YX"));
// "ba" is a rotation of "ab" (p="b", q="a"). X = "a".
VERIFY(eq_sat("Xba", "abX"));
// Xab = abX (X commutes with the word "ab"; witness: X="ab")
VERIFY(eq_sat("Xab", "abX"));
// "cb" is a rotation of "bc" (p="c", q="b"). X = "b".
VERIFY(eq_sat("Xcb", "bcX"));
// XaY = YaX (swap-symmetric; witness: X=Y=any, e.g. X=Y="b")
VERIFY(eq_sat("XaY", "YaX"));
// "bca" is a rotation of "abc" (p="bc", q="a"). X = "a".
VERIFY(eq_sat("Xbca", "abcX"));
// XYX = YXY (Markov-type; witness: X=Y)
VERIFY(eq_sat("XYX", "YXY"));
// "cab" is a rotation of "abc" (p="c", q="ab"). X = "ab".
VERIFY(eq_sat("Xcab", "abcX"));
// XYZ = ZYX (reverse-palindrome; witness: X="a",Y="b",Z="a")
VERIFY(eq_sat("XYZ", "ZYX"));
// ---------------------------------------------------------------
// SAT — Power decomposition (ε NOT a solution)
// ---------------------------------------------------------------
// XabY = YabX (rotation-like; witness: X="",Y="ab")
VERIFY(eq_sat("XabY", "YabX"));
// XX = aa ⇒ X = "a". (ε gives "" ≠ "aa")
VERIFY(eq_sat("XX", "aa"));
// aXYa = aYXa (cancel outer 'a'; reduces to XY=YX; witness: X=Y="")
VERIFY(eq_sat("aXYa", "aYXa"));
// XbX = aba ⇒ 2|X| + 1 = 3 forces |X| = 1; X = "a".
// (ε gives "b" ≠ "aba")
VERIFY(eq_sat("XbX", "aba"));
// XaXb = YaYb (both halves share variable; witness: X=Y)
VERIFY(eq_sat("XaXb", "YaYb"));
// ---------------------------------------------------------------
// SAT — Multi-variable (ε NOT a solution)
// ---------------------------------------------------------------
// abXba = Xabba (witness: X="" gives "abba"="abba")
VERIFY(eq_sat("abXba", "Xabba"));
// X = "b", Y = "a" gives "baab" = "baab". (ε gives "ab" ≠ "ba")
VERIFY(eq_sat("XaYb", "bYaX"));
// --- UNSAT: first-character mismatch ---
// X = "b", Y = "a" gives "abba" = "abba". (ε gives "ab" ≠ "ba")
VERIFY(eq_sat("abXY", "YXba"));
// abXba = baXab (LHS starts 'a', RHS starts 'b')
VERIFY(eq_unsat("abXba", "baXab"));
// ---------------------------------------------------------------
// UNSAT — Non-conjugate rotation
// Xw₂ = w₁X where w₂ is NOT a rotation of w₁.
// Heads are variable vs char, so never a trivial first-char clash.
// GPowerIntr introduces periodicity, then the period boundaries
// give a character mismatch.
// ---------------------------------------------------------------
// XabX = XbaX (cancel X prefix/suffix → "ab"="ba"; 'a'≠'b')
VERIFY(eq_unsat("XabX", "XbaX"));
// "cba" is the reverse of "abc", NOT a rotation.
// Rotations of "abc" are: abc, bca, cab.
VERIFY(eq_unsat("Xcba", "abcX"));
// --- UNSAT: mismatch exposed after cancellation ---
// "acb" is a transposition of "abc", NOT a rotation.
VERIFY(eq_unsat("Xacb", "abcX"));
// XaYb = XbYa (cancel X prefix → aYb=bYa; first char 'a'≠'b')
VERIFY(eq_unsat("XaYb", "XbYa"));
// ---------------------------------------------------------------
// UNSAT — Induction via GPowerIntr
// One side starts with a variable that also appears on the other
// side behind a ground prefix → self-cycle. GPowerIntr forces
// periodicity; all period branches yield character mismatches.
// None of these has a trivial first-char or last-char clash.
// ---------------------------------------------------------------
// XaYbX = XbYaX (cancel X prefix+suffix → aYb=bYa; first char 'a'≠'b')
VERIFY(eq_unsat("XaYbX", "XbYaX"));
// Xa = bX: LHS head = X. Scan RHS: [b], X → self-cycle, base "b".
// X = b^n ⇒ b^n·a = b^{n+1} ⇒ last chars a ≠ b ⊥.
VERIFY(eq_unsat("Xa", "bX"));
// XaXbX = XbXaX (cancel X prefix+suffix → aXb=bXa; first char 'a'≠'b')
VERIFY(eq_unsat("XaXbX", "XbXaX"));
// acX = Xbc: RHS head = X. Scan LHS: [a,c], X → self-cycle, base "ac".
// X = (ac)^n ⇒ ac = bc ⊥ (a ≠ b).
// X = (ac)^n·a ⇒ aca = abc ⊥ (c ≠ b).
VERIFY(eq_unsat("acX", "Xbc"));
// --- UNSAT: induction ---
// aXb = Xba (forces X=a^n; final step requires a=b)
VERIFY(eq_unsat("aXb", "Xba"));
// XaY = YbX (a≠b; recursive unrolling forces a=b)
VERIFY(eq_unsat("XaY", "YbX"));
// --- UNSAT: length parity ---
// XaX = YY (|XaX|=2|X|+1 is odd; |YY|=2|Y| is even)
VERIFY(eq_unsat("XaX", "YY"));
// XaaX = YbY (|XaaX|=2|X|+2 is even; |YbY|=2|Y|+1 is odd)
VERIFY(eq_unsat("XaaX", "YbY"));
// --- UNSAT: midpoint argument ---
// XaX = YbY (equal length forces |X|=|Y|; midpoint position |X|
// holds 'a' in LHS and 'b' in RHS, but 'a'≠'b')
VERIFY(eq_unsat("XaX", "YbY"));
// bcX = Xab: RHS head = X. Scan LHS: [b,c], X → self-cycle, base "bc".
// X = (bc)^n ⇒ bc = ab ⊥ (b ≠ a).
// X = (bc)^n·b ⇒ bcb = bab ⊥ (c ≠ a).
VERIFY(eq_unsat("bcX", "Xab"));
std::cout << " ok\n";
}