mirror of
https://github.com/Z3Prover/z3
synced 2026-04-26 22:03:34 +00:00
Implement IntBounds/VarBoundWatcher + Constraint.Shared; fix pre-existing build errors
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
parent
07ee2f31ef
commit
47f9be0270
5 changed files with 710 additions and 23 deletions
|
|
@ -116,6 +116,8 @@ namespace seq {
|
|||
m_int_constraints.reset();
|
||||
m_char_diseqs.reset();
|
||||
m_char_ranges.reset();
|
||||
m_var_lb.reset();
|
||||
m_var_ub.reset();
|
||||
for (auto const& eq : parent.m_str_eq)
|
||||
m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep));
|
||||
for (auto const& mem : parent.m_str_mem)
|
||||
|
|
@ -133,6 +135,11 @@ namespace seq {
|
|||
for (auto const& kv : parent.m_char_ranges) {
|
||||
m_char_ranges.insert(kv.m_key, kv.m_value.clone());
|
||||
}
|
||||
// clone per-variable integer bounds
|
||||
for (auto const& kv : parent.m_var_lb)
|
||||
m_var_lb.insert(kv.m_key, kv.m_value);
|
||||
for (auto const& kv : parent.m_var_ub)
|
||||
m_var_ub.insert(kv.m_key, kv.m_value);
|
||||
}
|
||||
|
||||
void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) {
|
||||
|
|
@ -151,6 +158,8 @@ namespace seq {
|
|||
mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement);
|
||||
mem.m_dep |= s.m_dep;
|
||||
}
|
||||
// VarBoundWatcher: propagate bounds on s.m_var to variables in s.m_replacement
|
||||
watch_var_bounds(s);
|
||||
}
|
||||
|
||||
void nielsen_node::add_char_range(euf::snode* sym_char, char_set const& range) {
|
||||
|
|
@ -182,6 +191,185 @@ namespace seq {
|
|||
existing.push_back(other);
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// nielsen_node: IntBounds methods
|
||||
// mirrors ZIPT's AddLowerIntBound / AddHigherIntBound
|
||||
// -----------------------------------------------
|
||||
|
||||
unsigned nielsen_node::var_lb(euf::snode* var) const {
|
||||
if (!var) return 0;
|
||||
unsigned v = 0;
|
||||
m_var_lb.find(var->id(), v);
|
||||
return v;
|
||||
}
|
||||
|
||||
unsigned nielsen_node::var_ub(euf::snode* var) const {
|
||||
if (!var) return UINT_MAX;
|
||||
unsigned v = UINT_MAX;
|
||||
m_var_ub.find(var->id(), v);
|
||||
return v;
|
||||
}
|
||||
|
||||
bool nielsen_node::add_lower_int_bound(euf::snode* var, unsigned lb, dep_tracker const& dep) {
|
||||
if (!var || !var->is_var()) return false;
|
||||
unsigned id = var->id();
|
||||
// check against existing lower bound
|
||||
unsigned cur_lb = 0;
|
||||
m_var_lb.find(id, cur_lb);
|
||||
if (lb <= cur_lb) return false; // no tightening
|
||||
m_var_lb.insert(id, lb);
|
||||
// conflict if lb > current upper bound
|
||||
unsigned cur_ub = UINT_MAX;
|
||||
m_var_ub.find(id, cur_ub);
|
||||
if (lb > cur_ub) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::arithmetic;
|
||||
return true;
|
||||
}
|
||||
// add int_constraint: len(var) >= lb
|
||||
ast_manager& m = m_graph->sg().get_manager();
|
||||
seq_util& seq = m_graph->sg().get_seq_util();
|
||||
arith_util arith(m);
|
||||
expr_ref len_var(seq.str.mk_length(var->get_expr()), m);
|
||||
expr_ref bound(arith.mk_int(lb), m);
|
||||
m_int_constraints.push_back(int_constraint(len_var, bound, int_constraint_kind::ge, dep, m));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool nielsen_node::add_upper_int_bound(euf::snode* var, unsigned ub, dep_tracker const& dep) {
|
||||
if (!var || !var->is_var()) return false;
|
||||
unsigned id = var->id();
|
||||
// check against existing upper bound
|
||||
unsigned cur_ub = UINT_MAX;
|
||||
m_var_ub.find(id, cur_ub);
|
||||
if (ub >= cur_ub) return false; // no tightening
|
||||
m_var_ub.insert(id, ub);
|
||||
// conflict if current lower bound > ub
|
||||
unsigned cur_lb = 0;
|
||||
m_var_lb.find(id, cur_lb);
|
||||
if (cur_lb > ub) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::arithmetic;
|
||||
return true;
|
||||
}
|
||||
// add int_constraint: len(var) <= ub
|
||||
ast_manager& m = m_graph->sg().get_manager();
|
||||
seq_util& seq = m_graph->sg().get_seq_util();
|
||||
arith_util arith(m);
|
||||
expr_ref len_var(seq.str.mk_length(var->get_expr()), m);
|
||||
expr_ref bound(arith.mk_int(ub), m);
|
||||
m_int_constraints.push_back(int_constraint(len_var, bound, int_constraint_kind::le, dep, m));
|
||||
return true;
|
||||
}
|
||||
|
||||
// VarBoundWatcher: after applying substitution s, propagate bounds on s.m_var
|
||||
// to variables in s.m_replacement.
|
||||
// If s.m_var has bounds [lo, hi], and the replacement decomposes into
|
||||
// const_len concrete chars plus a list of variable tokens, then:
|
||||
// - for a single variable y: lo-const_len <= len(y) <= hi-const_len
|
||||
// - for multiple variables: each gets an upper bound hi-const_len
|
||||
// Mirrors ZIPT's VarBoundWatcher mechanism.
|
||||
void nielsen_node::watch_var_bounds(nielsen_subst const& s) {
|
||||
if (!s.m_var) return;
|
||||
unsigned id = s.m_var->id();
|
||||
unsigned lo = 0, hi = UINT_MAX;
|
||||
m_var_lb.find(id, lo);
|
||||
m_var_ub.find(id, hi);
|
||||
if (lo == 0 && hi == UINT_MAX) return; // no bounds to propagate
|
||||
|
||||
// decompose replacement into constant length + variable tokens
|
||||
if (!s.m_replacement) return;
|
||||
euf::snode_vector tokens;
|
||||
s.m_replacement->collect_tokens(tokens);
|
||||
|
||||
unsigned const_len = 0;
|
||||
euf::snode_vector var_tokens;
|
||||
for (euf::snode* t : tokens) {
|
||||
if (t->is_char() || t->is_unit()) {
|
||||
++const_len;
|
||||
} else if (t->is_var()) {
|
||||
var_tokens.push_back(t);
|
||||
} else {
|
||||
// power or unknown token: cannot propagate simply, abort
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (var_tokens.empty()) {
|
||||
// all concrete: check if const_len is within [lo, hi]
|
||||
if (const_len < lo || (hi != UINT_MAX && const_len > hi)) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::arithmetic;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (var_tokens.size() == 1) {
|
||||
euf::snode* y = var_tokens[0];
|
||||
// lo <= const_len + len(y) => len(y) >= lo - const_len (if lo > const_len)
|
||||
if (lo > const_len)
|
||||
add_lower_int_bound(y, lo - const_len, s.m_dep);
|
||||
// const_len + len(y) <= hi => len(y) <= hi - const_len
|
||||
if (hi != UINT_MAX) {
|
||||
if (const_len > hi) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::arithmetic;
|
||||
} else {
|
||||
add_upper_int_bound(y, hi - const_len, s.m_dep);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// multiple variables: propagate upper bound to each
|
||||
// (each variable contributes >= 0, so each <= hi - const_len)
|
||||
if (hi != UINT_MAX) {
|
||||
if (const_len > hi) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::arithmetic;
|
||||
return;
|
||||
}
|
||||
unsigned each_ub = hi - const_len;
|
||||
for (euf::snode* y : var_tokens)
|
||||
add_upper_int_bound(y, each_ub, s.m_dep);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize per-variable Parikh bounds from this node's regex memberships.
|
||||
// For each str_mem (str ∈ regex) with bounded regex length [min_len, max_len],
|
||||
// calls add_lower/upper_int_bound for the primary string variable (if str is
|
||||
// a single variable) or stores a bound on the length expression otherwise.
|
||||
void nielsen_node::init_var_bounds_from_mems() {
|
||||
for (str_mem const& mem : m_str_mem) {
|
||||
if (!mem.m_str || !mem.m_regex) continue;
|
||||
unsigned min_len = 0, max_len = UINT_MAX;
|
||||
m_graph->compute_regex_length_interval(mem.m_regex, min_len, max_len);
|
||||
if (min_len == 0 && max_len == UINT_MAX) continue;
|
||||
|
||||
// if str is a single variable, apply bounds directly
|
||||
if (mem.m_str->is_var()) {
|
||||
if (min_len > 0)
|
||||
add_lower_int_bound(mem.m_str, min_len, mem.m_dep);
|
||||
if (max_len < UINT_MAX)
|
||||
add_upper_int_bound(mem.m_str, max_len, mem.m_dep);
|
||||
} else {
|
||||
// str is a concatenation or other term: add as general int_constraints
|
||||
ast_manager& m = m_graph->sg().get_manager();
|
||||
arith_util arith(m);
|
||||
expr_ref len_str = m_graph->compute_length_expr(mem.m_str);
|
||||
if (min_len > 0) {
|
||||
expr_ref bound(arith.mk_int(min_len), m);
|
||||
m_int_constraints.push_back(
|
||||
int_constraint(len_str, bound, int_constraint_kind::ge, mem.m_dep, m));
|
||||
}
|
||||
if (max_len < UINT_MAX) {
|
||||
expr_ref bound(arith.mk_int(max_len), m);
|
||||
m_int_constraints.push_back(
|
||||
int_constraint(len_str, bound, int_constraint_kind::le, mem.m_dep, m));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void nielsen_node::apply_char_subst(euf::sgraph& sg, char_subst const& s) {
|
||||
if (!s.m_var) return;
|
||||
|
||||
|
|
@ -318,6 +506,7 @@ namespace seq {
|
|||
m_fresh_cnt = 0;
|
||||
m_num_input_eqs = 0;
|
||||
m_num_input_mems = 0;
|
||||
m_root_constraints_asserted = false;
|
||||
}
|
||||
|
||||
std::ostream& nielsen_graph::display(std::ostream& out) const {
|
||||
|
|
@ -802,6 +991,10 @@ namespace seq {
|
|||
if (wi < m_str_mem.size())
|
||||
m_str_mem.shrink(wi);
|
||||
|
||||
// IntBounds initialization: derive per-variable Parikh length bounds from
|
||||
// remaining regex memberships and add to m_int_constraints.
|
||||
init_var_bounds_from_mems();
|
||||
|
||||
if (is_satisfied())
|
||||
return simplify_result::satisfied;
|
||||
|
||||
|
|
@ -839,6 +1032,22 @@ namespace seq {
|
|||
// nielsen_graph: search
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void nielsen_graph::assert_root_constraints_to_solver() {
|
||||
if (m_root_constraints_asserted) return;
|
||||
m_root_constraints_asserted = true;
|
||||
// Constraint.Shared: assert all root-level length/Parikh constraints
|
||||
// to m_solver at the base level (no push/pop). These include:
|
||||
// - len(lhs) = len(rhs) for each non-trivial string equality
|
||||
// - len(str) >= min_len and len(str) <= max_len for each regex membership
|
||||
// - len(x) >= 0 for each variable appearing in the root constraints
|
||||
// Making these visible to the solver before the DFS allows arithmetic
|
||||
// pruning at every node, not just the root.
|
||||
vector<length_constraint> constraints;
|
||||
generate_length_constraints(constraints);
|
||||
for (auto const& lc : constraints)
|
||||
m_solver.assert_expr(lc.m_expr);
|
||||
}
|
||||
|
||||
nielsen_graph::search_result nielsen_graph::solve() {
|
||||
if (!m_root)
|
||||
return search_result::sat;
|
||||
|
|
@ -847,6 +1056,10 @@ namespace seq {
|
|||
m_sat_node = nullptr;
|
||||
m_sat_path.reset();
|
||||
|
||||
// Constraint.Shared: assert root-level length/Parikh constraints to the
|
||||
// solver at the base level, so they are visible during all feasibility checks.
|
||||
assert_root_constraints_to_solver();
|
||||
|
||||
// Iterative deepening: increment by 1 on each failure.
|
||||
// m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it.
|
||||
m_depth_bound = 3;
|
||||
|
|
|
|||
|
|
@ -164,9 +164,10 @@ Abstract:
|
|||
has no ConstraintsIntEq or ConstraintsIntLe in nielsen_node.
|
||||
- IntBounds / VarBoundWatcher: per-variable integer interval bounds and the
|
||||
watcher mechanism that reruns bound propagation when a string variable is
|
||||
substituted are not ported.
|
||||
substituted — PORTED as nielsen_node::{add_lower_int_bound,
|
||||
add_upper_int_bound, watch_var_bounds, init_var_bounds_from_mems}.
|
||||
- AddLowerIntBound() / AddHigherIntBound(): incremental interval tightening
|
||||
with restart signaling is not ported.
|
||||
— PORTED as the above add_lower/upper_int_bound methods.
|
||||
|
||||
Character-level handling:
|
||||
- CharSubst: character-level variable substitution (symbolic char -> concrete
|
||||
|
|
@ -214,7 +215,10 @@ Abstract:
|
|||
- GetSignature(): the constraint-pair signature used for subsumption
|
||||
candidate matching is not ported.
|
||||
- Constraint.Shared: the flag indicating whether a constraint should be
|
||||
forwarded to the outer solver is not ported.
|
||||
forwarded to the outer solver — PORTED as
|
||||
nielsen_graph::assert_root_constraints_to_solver(), called at the start
|
||||
of solve() to make all root-level length/Parikh constraints immediately
|
||||
visible to m_solver.
|
||||
- Interpretation: the model-extraction class mapping string and integer
|
||||
variables to concrete values is not ported.
|
||||
-----------------------------------------------------------------------
|
||||
|
|
@ -490,6 +494,12 @@ namespace seq {
|
|||
vector<str_mem> m_str_mem; // regex memberships
|
||||
vector<int_constraint> m_int_constraints; // integer equalities/inequalities (mirrors ZIPT's IntEq/IntLe)
|
||||
|
||||
// per-variable integer bounds for len(var). Mirrors ZIPT's IntBounds.
|
||||
// key: snode id of the string variable.
|
||||
// default lb = 0 (unrestricted); default ub = UINT_MAX (unrestricted).
|
||||
u_map<unsigned> m_var_lb; // lower bound: lb <= len(var)
|
||||
u_map<unsigned> m_var_ub; // upper bound: len(var) <= ub
|
||||
|
||||
// character constraints (mirrors ZIPT's DisEqualities and CharRanges)
|
||||
// key: snode id of the s_unit symbolic character
|
||||
u_map<ptr_vector<euf::snode>> m_char_diseqs; // ?c != {?d, ?e, ...}
|
||||
|
|
@ -528,6 +538,24 @@ namespace seq {
|
|||
vector<int_constraint> const& int_constraints() const { return m_int_constraints; }
|
||||
vector<int_constraint>& int_constraints() { return m_int_constraints; }
|
||||
|
||||
// IntBounds: tighten the lower bound for len(var).
|
||||
// Returns true if the bound was tightened (lb > current lower bound).
|
||||
// When tightened, adds an int_constraint len(var) >= lb to this node.
|
||||
// Sets arithmetic conflict if lb > current upper bound.
|
||||
// Mirrors ZIPT's AddLowerIntBound().
|
||||
bool add_lower_int_bound(euf::snode* var, unsigned lb, dep_tracker const& dep);
|
||||
|
||||
// IntBounds: tighten the upper bound for len(var).
|
||||
// Returns true if the bound was tightened (ub < current upper bound).
|
||||
// When tightened, adds an int_constraint len(var) <= ub to this node.
|
||||
// Sets arithmetic conflict if current lower bound > ub.
|
||||
// Mirrors ZIPT's AddHigherIntBound().
|
||||
bool add_upper_int_bound(euf::snode* var, unsigned ub, dep_tracker const& dep);
|
||||
|
||||
// Query current bounds for a variable (default: 0 / UINT_MAX if not set).
|
||||
unsigned var_lb(euf::snode* var) const;
|
||||
unsigned var_ub(euf::snode* var) const;
|
||||
|
||||
// character constraint access (mirrors ZIPT's DisEqualities / CharRanges)
|
||||
u_map<ptr_vector<euf::snode>> const& char_diseqs() const { return m_char_diseqs; }
|
||||
u_map<char_set> const& char_ranges() const { return m_char_ranges; }
|
||||
|
|
@ -603,6 +631,19 @@ namespace seq {
|
|||
// sets changed=true, and returns false.
|
||||
bool handle_empty_side(euf::sgraph& sg, euf::snode* non_empty_side,
|
||||
dep_tracker const& dep, bool& changed);
|
||||
|
||||
// VarBoundWatcher: after applying substitution s, propagate the bounds
|
||||
// of s.m_var to variables appearing in s.m_replacement.
|
||||
// When var has bounds [lo, hi], derives bounds for variables in replacement
|
||||
// using the known constant-length contribution of non-variable tokens.
|
||||
// Mirrors ZIPT's VarBoundWatcher re-check mechanism.
|
||||
void watch_var_bounds(nielsen_subst const& s);
|
||||
|
||||
// Initialize per-variable Parikh bounds from this node's regex memberships.
|
||||
// For each str_mem constraint (str ∈ regex) where regex has length bounds
|
||||
// [min_len, max_len], adds lower/upper bound constraints for len(str).
|
||||
// Called from simplify_and_init to populate IntBounds at node creation.
|
||||
void init_var_bounds_from_mems();
|
||||
};
|
||||
|
||||
// search statistics collected during Nielsen graph solving
|
||||
|
|
@ -662,6 +703,10 @@ namespace seq {
|
|||
// -----------------------------------------------
|
||||
simple_solver& m_solver;
|
||||
|
||||
// Constraint.Shared: guards re-assertion of root-level constraints.
|
||||
// Set to true after assert_root_constraints_to_solver() is first called.
|
||||
bool m_root_constraints_asserted = false;
|
||||
|
||||
public:
|
||||
// Construct with a caller-supplied solver. Ownership is NOT transferred;
|
||||
// the caller is responsible for keeping the solver alive.
|
||||
|
|
@ -754,6 +799,16 @@ namespace seq {
|
|||
// Also generates len(x) >= 0 for each variable appearing in the equations.
|
||||
void generate_length_constraints(vector<length_constraint>& constraints);
|
||||
|
||||
// build an arithmetic expression representing the length of an snode tree.
|
||||
// concatenations are expanded to sums, chars to 1, empty to 0,
|
||||
// variables to (str.len var_expr).
|
||||
expr_ref compute_length_expr(euf::snode* n);
|
||||
|
||||
// compute Parikh length interval [min_len, max_len] for a regex snode.
|
||||
// uses seq_util::rex min_length/max_length on the underlying expression.
|
||||
// max_len == UINT_MAX means unbounded.
|
||||
void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);
|
||||
|
||||
private:
|
||||
search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);
|
||||
|
||||
|
|
@ -860,20 +915,17 @@ namespace seq {
|
|||
// find a power token facing a variable head
|
||||
bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const;
|
||||
|
||||
// build an arithmetic expression representing the length of an snode tree.
|
||||
// concatenations are expanded to sums, chars to 1, empty to 0,
|
||||
// variables to (str.len var_expr).
|
||||
expr_ref compute_length_expr(euf::snode* n);
|
||||
|
||||
// compute Parikh length interval [min_len, max_len] for a regex snode.
|
||||
// uses seq_util::rex min_length/max_length on the underlying expression.
|
||||
// max_len == UINT_MAX means unbounded.
|
||||
void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);
|
||||
|
||||
// -----------------------------------------------
|
||||
// Integer feasibility subsolver methods
|
||||
// -----------------------------------------------
|
||||
|
||||
// Constraint.Shared: assert all root-level length/Parikh constraints to
|
||||
// m_solver at the base level (outside push/pop). Called once at the start
|
||||
// of solve(). Makes derived constraints immediately visible to m_solver
|
||||
// for arithmetic pruning at every DFS node, not just the root.
|
||||
// Mirrors ZIPT's Constraint.Shared forwarding mechanism.
|
||||
void assert_root_constraints_to_solver();
|
||||
|
||||
// collect int_constraints along the path from root to the given node,
|
||||
// including constraints from edges and nodes.
|
||||
void collect_path_int_constraints(nielsen_node* node,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue