mirror of
https://github.com/Z3Prover/z3
synced 2026-03-17 18:43:45 +00:00
Merge branch 'c3' into copilot/add-parikh-filter-implementation-again
This commit is contained in:
commit
6fa3c7eabb
6 changed files with 1157 additions and 230 deletions
|
|
@ -59,6 +59,10 @@ namespace smt {
|
|||
void pop(unsigned num_scopes) override {
|
||||
m_kernel.pop(num_scopes);
|
||||
}
|
||||
|
||||
void get_model(model_ref& mdl) override {
|
||||
m_kernel.get_model(mdl);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ namespace smt {
|
|||
m_var_values.reset();
|
||||
m_var_regex.reset();
|
||||
m_trail.reset();
|
||||
m_int_model = nullptr;
|
||||
m_mg = &mg;
|
||||
|
||||
m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model());
|
||||
mg.register_factory(m_factory);
|
||||
|
|
@ -42,12 +44,12 @@ namespace smt {
|
|||
register_existing_values(nielsen);
|
||||
collect_var_regex_constraints(state);
|
||||
|
||||
// solve integer constraints from the sat_path FIRST so that
|
||||
// m_int_model is available when snode_to_value evaluates power exponents
|
||||
nielsen.solve_sat_path_ints(m_int_model);
|
||||
|
||||
// extract variable assignments from the satisfying leaf's substitution path
|
||||
seq::nielsen_node const* sat = nielsen.sat_node();
|
||||
IF_VERBOSE(1, verbose_stream() << "nseq model init: sat_node=" << (sat ? "set" : "null")
|
||||
<< " path_len=" << nielsen.sat_path().size() << "\n";);
|
||||
extract_assignments(nielsen.sat_path());
|
||||
IF_VERBOSE(1, verbose_stream() << "nseq model: m_var_values has " << m_var_values.size() << " entries\n";);
|
||||
}
|
||||
|
||||
model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) {
|
||||
|
|
@ -103,6 +105,8 @@ namespace smt {
|
|||
m_var_values.reset();
|
||||
m_var_regex.reset();
|
||||
m_trail.reset();
|
||||
m_int_model = nullptr;
|
||||
m_mg = nullptr;
|
||||
m_factory = nullptr;
|
||||
}
|
||||
|
||||
|
|
@ -175,6 +179,68 @@ namespace smt {
|
|||
return expr_ref(m);
|
||||
}
|
||||
|
||||
if (n->is_power()) {
|
||||
SASSERT(n->num_args() == 2);
|
||||
// Evaluate the base and exponent to produce a concrete string.
|
||||
// The base is a string snode; the exponent is an integer expression
|
||||
// whose value comes from the sat_path integer model.
|
||||
expr_ref base_val = snode_to_value(n->arg(0));
|
||||
if (!base_val)
|
||||
return expr_ref(m);
|
||||
|
||||
euf::snode* exp_snode = n->arg(1);
|
||||
expr* exp_expr = exp_snode ? exp_snode->get_expr() : nullptr;
|
||||
rational exp_val;
|
||||
arith_util arith(m);
|
||||
|
||||
// Try to evaluate exponent: first check if it's a numeral,
|
||||
// then try the int model from sat_path constraints,
|
||||
// finally fall back to the proto_model from model_generator.
|
||||
if (exp_expr && arith.is_numeral(exp_expr, exp_val)) {
|
||||
// already concrete
|
||||
} else if (exp_expr && m_int_model.get()) {
|
||||
expr_ref result(m);
|
||||
if (m_int_model->eval_expr(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
|
||||
// evaluated from int model
|
||||
} else if (m_mg) {
|
||||
proto_model& pm = m_mg->get_model();
|
||||
if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
|
||||
// evaluated from proto_model
|
||||
} else {
|
||||
exp_val = rational(0);
|
||||
}
|
||||
} else {
|
||||
exp_val = rational(0);
|
||||
}
|
||||
} else if (exp_expr && m_mg) {
|
||||
expr_ref result(m);
|
||||
proto_model& pm = m_mg->get_model();
|
||||
if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) {
|
||||
// evaluated from proto_model
|
||||
} else {
|
||||
exp_val = rational(0);
|
||||
}
|
||||
} else {
|
||||
exp_val = rational(0);
|
||||
}
|
||||
|
||||
if (exp_val.is_neg())
|
||||
exp_val = rational(0);
|
||||
|
||||
// Build the repeated string: base^exp_val
|
||||
if (exp_val.is_zero())
|
||||
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
|
||||
if (exp_val.is_one())
|
||||
return base_val;
|
||||
|
||||
// For small exponents, concatenate directly
|
||||
unsigned n_val = exp_val.get_unsigned();
|
||||
expr_ref acc(base_val);
|
||||
for (unsigned i = 1; i < n_val; ++i)
|
||||
acc = m_seq.str.mk_concat(acc, base_val);
|
||||
return acc;
|
||||
}
|
||||
|
||||
// fallback: use the underlying expression
|
||||
expr* e = n->get_expr();
|
||||
return e ? expr_ref(e, m) : expr_ref(m);
|
||||
|
|
|
|||
|
|
@ -61,6 +61,10 @@ namespace smt {
|
|||
// trail for GC protection of generated expressions
|
||||
expr_ref_vector m_trail;
|
||||
|
||||
// integer variable model from sat_path constraints
|
||||
model_ref m_int_model;
|
||||
model_generator* m_mg = nullptr;
|
||||
|
||||
// per-variable regex constraints: maps snode id -> intersected regex snode.
|
||||
// collected during init() from the state's str_mem list.
|
||||
u_map<euf::snode*> m_var_regex;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -241,6 +241,7 @@ Author:
|
|||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include <functional>
|
||||
#include "model/model.h"
|
||||
|
||||
namespace seq {
|
||||
|
||||
|
|
@ -265,6 +266,7 @@ namespace seq {
|
|||
virtual void assert_expr(expr* e) = 0;
|
||||
virtual void push() = 0;
|
||||
virtual void pop(unsigned num_scopes) = 0;
|
||||
virtual void get_model(model_ref& mdl) { mdl = nullptr; }
|
||||
};
|
||||
|
||||
// simplification result for constraint processing
|
||||
|
|
@ -373,6 +375,7 @@ namespace seq {
|
|||
};
|
||||
|
||||
// string variable substitution: var -> replacement
|
||||
// (can be used as well to substitute arbitrary nodes - like powers)
|
||||
// mirrors ZIPT's Subst
|
||||
struct nielsen_subst {
|
||||
euf::snode* m_var;
|
||||
|
|
@ -523,6 +526,11 @@ namespace seq {
|
|||
// Parikh filter: set to true once apply_parikh_to_node has been applied
|
||||
// to this node. Prevents duplicate constraint generation across DFS runs.
|
||||
bool m_parikh_applied = false;
|
||||
// number of int_constraints inherited from the parent node at clone time.
|
||||
// int_constraints[0..m_parent_ic_count) are already asserted at the
|
||||
// parent's solver scope; only [m_parent_ic_count..end) need to be
|
||||
// asserted when this node's solver scope is entered.
|
||||
unsigned m_parent_ic_count = 0;
|
||||
|
||||
public:
|
||||
nielsen_node(nielsen_graph* graph, unsigned id);
|
||||
|
|
@ -618,8 +626,10 @@ namespace seq {
|
|||
void apply_subst(euf::sgraph& sg, nielsen_subst const& s);
|
||||
|
||||
// simplify all constraints at this node and initialize status.
|
||||
// cur_path provides the path from root to this node so that the
|
||||
// LP solver can be queried for deterministic power cancellation.
|
||||
// Returns proceed, conflict, satisfied, or restart.
|
||||
simplify_result simplify_and_init(nielsen_graph& g);
|
||||
simplify_result simplify_and_init(nielsen_graph& g, svector<nielsen_edge*> const& cur_path = svector<nielsen_edge*>());
|
||||
|
||||
// true if all str_eqs are trivial and there are no str_mems
|
||||
bool is_satisfied() const;
|
||||
|
|
@ -665,11 +675,13 @@ namespace seq {
|
|||
unsigned m_num_simplify_conflict = 0;
|
||||
unsigned m_num_extensions = 0;
|
||||
unsigned m_num_fresh_vars = 0;
|
||||
unsigned m_num_arith_infeasible = 0;
|
||||
unsigned m_max_depth = 0;
|
||||
// modifier application counts
|
||||
unsigned m_mod_det = 0;
|
||||
unsigned m_mod_power_epsilon = 0;
|
||||
unsigned m_mod_num_cmp = 0;
|
||||
unsigned m_mod_split_power_elim = 0;
|
||||
unsigned m_mod_const_num_unwinding = 0;
|
||||
unsigned m_mod_eq_split = 0;
|
||||
unsigned m_mod_star_intr = 0;
|
||||
|
|
@ -686,6 +698,7 @@ namespace seq {
|
|||
// the overall Nielsen transformation graph
|
||||
// mirrors ZIPT's NielsenGraph
|
||||
class nielsen_graph {
|
||||
friend class nielsen_node;
|
||||
euf::sgraph& m_sg;
|
||||
region m_region;
|
||||
ptr_vector<nielsen_node> m_nodes;
|
||||
|
|
@ -822,6 +835,13 @@ namespace seq {
|
|||
// max_len == UINT_MAX means unbounded.
|
||||
void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);
|
||||
|
||||
// solve all integer constraints along the sat_path and return
|
||||
// a model mapping integer variables to concrete values.
|
||||
// Must be called after solve() returns sat.
|
||||
// Returns true if a satisfying model was found.
|
||||
// Caller takes ownership of the returned model pointer.
|
||||
bool solve_sat_path_ints(model_ref& mdl);
|
||||
|
||||
private:
|
||||
search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);
|
||||
|
||||
|
|
@ -890,6 +910,15 @@ namespace seq {
|
|||
// mirrors ZIPT's NumCmpModifier
|
||||
bool apply_num_cmp(nielsen_node* node);
|
||||
|
||||
// CommPower-based power elimination split: when one side starts with
|
||||
// a power w^p and CommPower finds c base-pattern occurrences on the
|
||||
// other side but the ordering between p and c is unknown, branch:
|
||||
// (1) p < c (2) c ≤ p
|
||||
// After branching, simplify_and_init's CommPower pass resolves the
|
||||
// cancellation deterministically.
|
||||
// mirrors ZIPT's SplitPowerElim + NumCmpModifier
|
||||
bool apply_split_power_elim(nielsen_node* node);
|
||||
|
||||
// constant numeric unwinding: for a power token u^n vs a constant
|
||||
// (non-variable), branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u).
|
||||
// mirrors ZIPT's ConstNumUnwindingModifier
|
||||
|
|
@ -933,7 +962,7 @@ namespace seq {
|
|||
euf::snode* find_power_token(nielsen_node* node) const;
|
||||
|
||||
// find a power token facing a constant (char) head
|
||||
bool find_power_vs_const(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;
|
||||
bool find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;
|
||||
|
||||
// find a power token facing a variable head
|
||||
bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const;
|
||||
|
|
@ -949,16 +978,26 @@ namespace seq {
|
|||
// Mirrors ZIPT's Constraint.Shared forwarding mechanism.
|
||||
void assert_root_constraints_to_solver();
|
||||
|
||||
// collect int_constraints along the path from root to the given node,
|
||||
// including constraints from edges and nodes.
|
||||
void collect_path_int_constraints(nielsen_node* node,
|
||||
svector<nielsen_edge*> const& cur_path,
|
||||
vector<int_constraint>& out);
|
||||
// Assert the int_constraints of `node` that are new relative to its
|
||||
// parent (indices [m_parent_ic_count..end)) into the current solver scope.
|
||||
// Called by search_dfs after simplify_and_init so that the newly derived
|
||||
// bounds become visible to subsequent check() and check_lp_le() calls.
|
||||
void assert_node_new_int_constraints(nielsen_node* node);
|
||||
|
||||
// check integer feasibility of the constraints along the current path.
|
||||
// returns true if feasible, false if infeasible.
|
||||
// returns true if feasible (including unknown), false only if l_false.
|
||||
// Precondition: all path constraints have been incrementally asserted to
|
||||
// m_solver by search_dfs via push/pop, so a plain check() suffices.
|
||||
// l_undef (resource limit / timeout) is treated as feasible so that the
|
||||
// search continues rather than reporting a false unsatisfiability.
|
||||
bool check_int_feasibility(nielsen_node* node, svector<nielsen_edge*> const& cur_path);
|
||||
|
||||
// check whether lhs <= rhs is implied by the path constraints.
|
||||
// mirrors ZIPT's NielsenNode.IsLe(): temporarily asserts NOT(lhs <= rhs)
|
||||
// and returns true iff the result is unsatisfiable (i.e., lhs <= rhs is
|
||||
// entailed). Path constraints are already in the solver incrementally.
|
||||
bool check_lp_le(expr* lhs, expr* rhs, nielsen_node* node, svector<nielsen_edge*> const& cur_path);
|
||||
|
||||
// create an integer constraint: lhs <kind> rhs
|
||||
int_constraint mk_int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep);
|
||||
|
||||
|
|
|
|||
|
|
@ -598,75 +598,83 @@ static void test_zipt_parikh() {
|
|||
static void test_tricky_str_equations() {
|
||||
std::cout << "test_tricky_str_equations\n";
|
||||
|
||||
// --- SAT: commutativity / rotation / symmetry ---
|
||||
// ---------------------------------------------------------------
|
||||
// SAT — Conjugacy equations: Xw₂ = w₁X
|
||||
// SAT iff w₂ is a rotation of w₁. In that case w₁ = qp, w₂ = pq
|
||||
// and X = (qp)^n · q is a family of solutions (n ≥ 0).
|
||||
// All of these are UNSAT for X = ε (the ground parts don't match),
|
||||
// so the solver must find a non-trivial witness.
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
// XY = YX (classic commutativity; witness: X="ab", Y="abab")
|
||||
VERIFY(eq_sat("XY", "YX"));
|
||||
// "ba" is a rotation of "ab" (p="b", q="a"). X = "a".
|
||||
VERIFY(eq_sat("Xba", "abX"));
|
||||
|
||||
// Xab = abX (X commutes with the word "ab"; witness: X="ab")
|
||||
VERIFY(eq_sat("Xab", "abX"));
|
||||
// "cb" is a rotation of "bc" (p="c", q="b"). X = "b".
|
||||
VERIFY(eq_sat("Xcb", "bcX"));
|
||||
|
||||
// XaY = YaX (swap-symmetric; witness: X=Y=any, e.g. X=Y="b")
|
||||
VERIFY(eq_sat("XaY", "YaX"));
|
||||
// "bca" is a rotation of "abc" (p="bc", q="a"). X = "a".
|
||||
VERIFY(eq_sat("Xbca", "abcX"));
|
||||
|
||||
// XYX = YXY (Markov-type; witness: X=Y)
|
||||
VERIFY(eq_sat("XYX", "YXY"));
|
||||
// "cab" is a rotation of "abc" (p="c", q="ab"). X = "ab".
|
||||
VERIFY(eq_sat("Xcab", "abcX"));
|
||||
|
||||
// XYZ = ZYX (reverse-palindrome; witness: X="a",Y="b",Z="a")
|
||||
VERIFY(eq_sat("XYZ", "ZYX"));
|
||||
// ---------------------------------------------------------------
|
||||
// SAT — Power decomposition (ε NOT a solution)
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
// XabY = YabX (rotation-like; witness: X="",Y="ab")
|
||||
VERIFY(eq_sat("XabY", "YabX"));
|
||||
// XX = aa ⇒ X = "a". (ε gives "" ≠ "aa")
|
||||
VERIFY(eq_sat("XX", "aa"));
|
||||
|
||||
// aXYa = aYXa (cancel outer 'a'; reduces to XY=YX; witness: X=Y="")
|
||||
VERIFY(eq_sat("aXYa", "aYXa"));
|
||||
// XbX = aba ⇒ 2|X| + 1 = 3 forces |X| = 1; X = "a".
|
||||
// (ε gives "b" ≠ "aba")
|
||||
VERIFY(eq_sat("XbX", "aba"));
|
||||
|
||||
// XaXb = YaYb (both halves share variable; witness: X=Y)
|
||||
VERIFY(eq_sat("XaXb", "YaYb"));
|
||||
// ---------------------------------------------------------------
|
||||
// SAT — Multi-variable (ε NOT a solution)
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
// abXba = Xabba (witness: X="" gives "abba"="abba")
|
||||
VERIFY(eq_sat("abXba", "Xabba"));
|
||||
// X = "b", Y = "a" gives "baab" = "baab". (ε gives "ab" ≠ "ba")
|
||||
VERIFY(eq_sat("XaYb", "bYaX"));
|
||||
|
||||
// --- UNSAT: first-character mismatch ---
|
||||
// X = "b", Y = "a" gives "abba" = "abba". (ε gives "ab" ≠ "ba")
|
||||
VERIFY(eq_sat("abXY", "YXba"));
|
||||
|
||||
// abXba = baXab (LHS starts 'a', RHS starts 'b')
|
||||
VERIFY(eq_unsat("abXba", "baXab"));
|
||||
// ---------------------------------------------------------------
|
||||
// UNSAT — Non-conjugate rotation
|
||||
// Xw₂ = w₁X where w₂ is NOT a rotation of w₁.
|
||||
// Heads are variable vs char, so never a trivial first-char clash.
|
||||
// GPowerIntr introduces periodicity, then the period boundaries
|
||||
// give a character mismatch.
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
// XabX = XbaX (cancel X prefix/suffix → "ab"="ba"; 'a'≠'b')
|
||||
VERIFY(eq_unsat("XabX", "XbaX"));
|
||||
// "cba" is the reverse of "abc", NOT a rotation.
|
||||
// Rotations of "abc" are: abc, bca, cab.
|
||||
VERIFY(eq_unsat("Xcba", "abcX"));
|
||||
|
||||
// --- UNSAT: mismatch exposed after cancellation ---
|
||||
// "acb" is a transposition of "abc", NOT a rotation.
|
||||
VERIFY(eq_unsat("Xacb", "abcX"));
|
||||
|
||||
// XaYb = XbYa (cancel X prefix → aYb=bYa; first char 'a'≠'b')
|
||||
VERIFY(eq_unsat("XaYb", "XbYa"));
|
||||
// ---------------------------------------------------------------
|
||||
// UNSAT — Induction via GPowerIntr
|
||||
// One side starts with a variable that also appears on the other
|
||||
// side behind a ground prefix → self-cycle. GPowerIntr forces
|
||||
// periodicity; all period branches yield character mismatches.
|
||||
// None of these has a trivial first-char or last-char clash.
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
// XaYbX = XbYaX (cancel X prefix+suffix → aYb=bYa; first char 'a'≠'b')
|
||||
VERIFY(eq_unsat("XaYbX", "XbYaX"));
|
||||
// Xa = bX: LHS head = X. Scan RHS: [b], X → self-cycle, base "b".
|
||||
// X = b^n ⇒ b^n·a = b^{n+1} ⇒ last chars a ≠ b ⊥.
|
||||
VERIFY(eq_unsat("Xa", "bX"));
|
||||
|
||||
// XaXbX = XbXaX (cancel X prefix+suffix → aXb=bXa; first char 'a'≠'b')
|
||||
VERIFY(eq_unsat("XaXbX", "XbXaX"));
|
||||
// acX = Xbc: RHS head = X. Scan LHS: [a,c], X → self-cycle, base "ac".
|
||||
// X = (ac)^n ⇒ ac = bc ⊥ (a ≠ b).
|
||||
// X = (ac)^n·a ⇒ aca = abc ⊥ (c ≠ b).
|
||||
VERIFY(eq_unsat("acX", "Xbc"));
|
||||
|
||||
// --- UNSAT: induction ---
|
||||
|
||||
// aXb = Xba (forces X=a^n; final step requires a=b)
|
||||
VERIFY(eq_unsat("aXb", "Xba"));
|
||||
|
||||
// XaY = YbX (a≠b; recursive unrolling forces a=b)
|
||||
VERIFY(eq_unsat("XaY", "YbX"));
|
||||
|
||||
// --- UNSAT: length parity ---
|
||||
|
||||
// XaX = YY (|XaX|=2|X|+1 is odd; |YY|=2|Y| is even)
|
||||
VERIFY(eq_unsat("XaX", "YY"));
|
||||
|
||||
// XaaX = YbY (|XaaX|=2|X|+2 is even; |YbY|=2|Y|+1 is odd)
|
||||
VERIFY(eq_unsat("XaaX", "YbY"));
|
||||
|
||||
// --- UNSAT: midpoint argument ---
|
||||
|
||||
// XaX = YbY (equal length forces |X|=|Y|; midpoint position |X|
|
||||
// holds 'a' in LHS and 'b' in RHS, but 'a'≠'b')
|
||||
VERIFY(eq_unsat("XaX", "YbY"));
|
||||
// bcX = Xab: RHS head = X. Scan LHS: [b,c], X → self-cycle, base "bc".
|
||||
// X = (bc)^n ⇒ bc = ab ⊥ (b ≠ a).
|
||||
// X = (bc)^n·b ⇒ bcb = bab ⊥ (c ≠ a).
|
||||
VERIFY(eq_unsat("bcX", "Xab"));
|
||||
|
||||
std::cout << " ok\n";
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue