mirror of
https://github.com/Z3Prover/z3
synced 2026-03-17 18:43:45 +00:00
Use length information during Nielsen saturation
This commit is contained in:
parent
e8354a783a
commit
3ad9c290fc
4 changed files with 321 additions and 3 deletions
|
|
@ -49,6 +49,7 @@ namespace smt {
|
|||
}
|
||||
|
||||
void assert_expr(expr* e) override {
|
||||
// std::cout << "Asserting: " << mk_pp(e, m_kernel.m()) << std::endl;
|
||||
m_kernel.assert_expr(e);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -233,8 +233,34 @@ namespace smt {
|
|||
if (exp_val.is_one())
|
||||
return base_val;
|
||||
|
||||
// For small exponents, concatenate directly
|
||||
// For small exponents, concatenate directly; for large ones,
|
||||
// build a concrete string constant to avoid enormous AST chains
|
||||
// that cause cleanup_expr to diverge.
|
||||
unsigned n_val = exp_val.get_unsigned();
|
||||
constexpr unsigned POWER_EXPAND_LIMIT = 1000;
|
||||
if (n_val > POWER_EXPAND_LIMIT) {
|
||||
// Try to extract a concrete character from the base (seq.unit(c))
|
||||
// and build a string literal directly (O(1) AST node).
|
||||
unsigned ch = 0;
|
||||
expr* unit_arg = nullptr;
|
||||
if (m_seq.str.is_unit(base_val, unit_arg) && m_seq.is_const_char(unit_arg, ch)) {
|
||||
svector<unsigned> buf(n_val, ch);
|
||||
zstring result(buf.size(), buf.data());
|
||||
return expr_ref(m_seq.str.mk_string(result), m);
|
||||
}
|
||||
// Also handle if base is already a string constant
|
||||
zstring base_str;
|
||||
if (m_seq.str.is_string(base_val, base_str) && base_str.length() > 0) {
|
||||
svector<unsigned> buf;
|
||||
for (unsigned i = 0; i < n_val; ++i)
|
||||
for (unsigned j = 0; j < base_str.length(); ++j)
|
||||
buf.push_back(base_str[j]);
|
||||
zstring result(buf.size(), buf.data());
|
||||
return expr_ref(m_seq.str.mk_string(result), m);
|
||||
}
|
||||
// Fallback: cap exponent to avoid divergence
|
||||
n_val = POWER_EXPAND_LIMIT;
|
||||
}
|
||||
expr_ref acc(base_val);
|
||||
for (unsigned i = 1; i < n_val; ++i)
|
||||
acc = m_seq.str.mk_concat(acc, base_val);
|
||||
|
|
|
|||
|
|
@ -442,7 +442,8 @@ namespace seq {
|
|||
|
||||
nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver):
|
||||
m_sg(sg),
|
||||
m_solver(solver) {
|
||||
m_solver(solver),
|
||||
m_len_vars(sg.get_manager()) {
|
||||
}
|
||||
|
||||
nielsen_graph::~nielsen_graph() {
|
||||
|
|
@ -519,6 +520,9 @@ namespace seq {
|
|||
m_num_input_eqs = 0;
|
||||
m_num_input_mems = 0;
|
||||
m_root_constraints_asserted = false;
|
||||
m_mod_cnt.reset();
|
||||
m_len_var_cache.clear();
|
||||
m_len_vars.reset();
|
||||
}
|
||||
|
||||
std::ostream& nielsen_graph::display(std::ostream& out) const {
|
||||
|
|
@ -1970,6 +1974,9 @@ namespace seq {
|
|||
// node into the current solver scope. Constraints inherited from the parent
|
||||
// (indices 0..m_parent_ic_count-1) are already present at the enclosing
|
||||
// scope level; only the newly-added tail needs to be asserted here.
|
||||
// Also generate per-node |LHS| = |RHS| length constraints for descendant
|
||||
// equations (root constraints are already at the base level).
|
||||
generate_node_length_constraints(node);
|
||||
assert_node_new_int_constraints(node);
|
||||
|
||||
// integer feasibility check: the solver now holds all path constraints
|
||||
|
|
@ -2017,9 +2024,27 @@ namespace seq {
|
|||
// inside the recursive call (above). On return, pop the scope so
|
||||
// that backtracking removes those assertions.
|
||||
m_solver.push();
|
||||
|
||||
// Lazily compute substitution length constraints (|x| = |u|) on first
|
||||
// traversal. This must happen before asserting side_int and before
|
||||
// bumping mod counts, so that LHS uses the parent's counts and RHS
|
||||
// uses the temporarily-bumped counts.
|
||||
if (!e->len_constraints_computed()) {
|
||||
add_subst_length_constraints(e);
|
||||
e->set_len_constraints_computed(true);
|
||||
}
|
||||
|
||||
for (auto const &ic : e->side_int())
|
||||
m_solver.assert_expr(int_constraint_to_expr(ic));
|
||||
|
||||
// Bump modification counts for the child's context.
|
||||
inc_edge_mod_counts(e);
|
||||
|
||||
search_result r = search_dfs(e->tgt(), e->is_progress() ? depth : depth + 1, cur_path);
|
||||
|
||||
// Restore modification counts on backtrack.
|
||||
dec_edge_mod_counts(e);
|
||||
|
||||
m_solver.pop(1);
|
||||
if (r == search_result::sat)
|
||||
return search_result::sat;
|
||||
|
|
@ -3533,7 +3558,18 @@ namespace seq {
|
|||
return expr_ref(arith.mk_add(left, right), m);
|
||||
}
|
||||
|
||||
// for variables and other terms, use symbolic (str.len expr)
|
||||
// For variables: consult modification counter.
|
||||
// mod_count > 0 means the variable has been reused by a non-eliminating
|
||||
// substitution; use a fresh integer variable to avoid the circular
|
||||
// |x| = 1 + |x| problem.
|
||||
if (n->is_var()) {
|
||||
unsigned mc = 0;
|
||||
m_mod_cnt.find(n->id(), mc);
|
||||
if (mc > 0)
|
||||
return get_or_create_len_var(n, mc);
|
||||
}
|
||||
|
||||
// for variables at mod_count 0 and other terms, use symbolic (str.len expr)
|
||||
return expr_ref(seq.str.mk_length(n->get_expr()), m);
|
||||
}
|
||||
|
||||
|
|
@ -3664,6 +3700,131 @@ namespace seq {
|
|||
return expr_ref(m.mk_true(), m);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Modification counter: substitution length tracking
|
||||
// mirrors ZIPT's LocalInfo.CurrentModificationCnt + NielsenEdge.IncModCount/DecModCount
|
||||
// + NielsenNode constructor length assertion logic
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
expr_ref nielsen_graph::get_or_create_len_var(euf::snode* var, unsigned mod_count) {
|
||||
ast_manager& m = m_sg.get_manager();
|
||||
SASSERT(var && var->is_var());
|
||||
SASSERT(mod_count > 0);
|
||||
|
||||
auto key = std::make_pair(var->id(), mod_count);
|
||||
auto it = m_len_var_cache.find(key);
|
||||
if (it != m_len_var_cache.end())
|
||||
return expr_ref(it->second, m);
|
||||
|
||||
// Create a fresh integer variable: len_<varname>_<mod_count>
|
||||
arith_util arith(m);
|
||||
std::string name = "len!" + std::to_string(var->id()) + "!" + std::to_string(mod_count);
|
||||
expr_ref fresh(m.mk_fresh_const(name.c_str(), arith.mk_int()), m);
|
||||
m_len_vars.push_back(fresh);
|
||||
m_len_var_cache.insert({key, fresh.get()});
|
||||
return fresh;
|
||||
}
|
||||
|
||||
void nielsen_graph::add_subst_length_constraints(nielsen_edge* e) {
|
||||
auto const& substs = e->subst();
|
||||
|
||||
// Quick check: any non-eliminating substitutions?
|
||||
bool has_non_elim = false;
|
||||
for (auto const& s : substs)
|
||||
if (!s.is_eliminating()) { has_non_elim = true; break; }
|
||||
if (!has_non_elim) return;
|
||||
|
||||
ast_manager& m = m_sg.get_manager();
|
||||
arith_util arith(m);
|
||||
|
||||
// Step 1: Compute LHS (|x|) for each non-eliminating substitution
|
||||
// using current m_mod_cnt (before bumping).
|
||||
// Also assert |x|_k >= 0 (mirrors ZIPT's NielsenNode constructor line 172).
|
||||
svector<std::pair<unsigned, expr*>> lhs_exprs;
|
||||
for (unsigned i = 0; i < substs.size(); ++i) {
|
||||
auto const& s = substs[i];
|
||||
if (s.is_eliminating()) continue;
|
||||
SASSERT(s.m_var && s.m_var->is_var());
|
||||
expr_ref lhs = compute_length_expr(s.m_var);
|
||||
lhs_exprs.push_back({i, lhs.get()});
|
||||
// Assert LHS >= 0
|
||||
e->add_side_int(int_constraint(lhs, arith.mk_int(0),
|
||||
int_constraint_kind::ge, s.m_dep, m));
|
||||
}
|
||||
|
||||
// Step 2: Bump mod counts for all non-eliminating variables at once.
|
||||
for (auto const& s : substs) {
|
||||
if (s.is_eliminating()) continue;
|
||||
unsigned id = s.m_var->id();
|
||||
unsigned prev = 0;
|
||||
m_mod_cnt.find(id, prev);
|
||||
m_mod_cnt.insert(id, prev + 1);
|
||||
}
|
||||
|
||||
// Step 3: Compute RHS (|u|) with bumped mod counts and add |x| = |u|.
|
||||
for (auto const& p : lhs_exprs) {
|
||||
unsigned idx = p.first;
|
||||
expr* lhs_expr = p.second;
|
||||
auto const& s = substs[idx];
|
||||
expr_ref rhs = compute_length_expr(s.m_replacement);
|
||||
e->add_side_int(int_constraint(lhs_expr, rhs, int_constraint_kind::eq,
|
||||
s.m_dep, m));
|
||||
|
||||
// Assert non-negativity for any fresh length variables in the RHS
|
||||
// (variables at mod_count > 0 that are newly created).
|
||||
euf::snode_vector tokens;
|
||||
s.m_replacement->collect_tokens(tokens);
|
||||
for (euf::snode* tok : tokens) {
|
||||
if (tok->is_var()) {
|
||||
unsigned mc = 0;
|
||||
m_mod_cnt.find(tok->id(), mc);
|
||||
if (mc > 0) {
|
||||
expr_ref len_var = get_or_create_len_var(tok, mc);
|
||||
e->add_side_int(int_constraint(len_var, arith.mk_int(0),
|
||||
int_constraint_kind::ge, s.m_dep, m));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Restore mod counts (temporary bump for computing RHS only).
|
||||
for (auto const& s : substs) {
|
||||
if (s.is_eliminating()) continue;
|
||||
unsigned id = s.m_var->id();
|
||||
unsigned prev = 0;
|
||||
m_mod_cnt.find(id, prev);
|
||||
SASSERT(prev >= 1);
|
||||
if (prev <= 1)
|
||||
m_mod_cnt.remove(id);
|
||||
else
|
||||
m_mod_cnt.insert(id, prev - 1);
|
||||
}
|
||||
}
|
||||
|
||||
void nielsen_graph::inc_edge_mod_counts(nielsen_edge* e) {
|
||||
for (auto const& s : e->subst()) {
|
||||
if (s.is_eliminating()) continue;
|
||||
unsigned id = s.m_var->id();
|
||||
unsigned prev = 0;
|
||||
m_mod_cnt.find(id, prev);
|
||||
m_mod_cnt.insert(id, prev + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void nielsen_graph::dec_edge_mod_counts(nielsen_edge* e) {
|
||||
for (auto const& s : e->subst()) {
|
||||
if (s.is_eliminating()) continue;
|
||||
unsigned id = s.m_var->id();
|
||||
unsigned prev = 0;
|
||||
m_mod_cnt.find(id, prev);
|
||||
SASSERT(prev >= 1);
|
||||
if (prev <= 1)
|
||||
m_mod_cnt.remove(id);
|
||||
else
|
||||
m_mod_cnt.insert(id, prev - 1);
|
||||
}
|
||||
}
|
||||
|
||||
void nielsen_graph::assert_node_new_int_constraints(nielsen_node* node) {
|
||||
// Assert only the int_constraints that are new to this node (beyond those
|
||||
// inherited from its parent via clone_from). The parent's constraints are
|
||||
|
|
@ -3674,6 +3835,78 @@ namespace seq {
|
|||
m_solver.assert_expr(int_constraint_to_expr(node->int_constraints()[i]));
|
||||
}
|
||||
|
||||
void nielsen_graph::generate_node_length_constraints(nielsen_node* node) {
|
||||
if (node->m_node_len_constraints_generated)
|
||||
return;
|
||||
node->m_node_len_constraints_generated = true;
|
||||
|
||||
// Skip the root node — its length constraints are already asserted
|
||||
// at the base solver level by assert_root_constraints_to_solver().
|
||||
if (node == m_root)
|
||||
return;
|
||||
|
||||
ast_manager& m = m_sg.get_manager();
|
||||
arith_util arith(m);
|
||||
uint_set seen_vars;
|
||||
|
||||
for (str_eq const& eq : node->str_eqs()) {
|
||||
if (eq.is_trivial())
|
||||
continue;
|
||||
|
||||
expr_ref len_lhs = compute_length_expr(eq.m_lhs);
|
||||
expr_ref len_rhs = compute_length_expr(eq.m_rhs);
|
||||
node->add_int_constraint(int_constraint(len_lhs, len_rhs,
|
||||
int_constraint_kind::eq, eq.m_dep, m));
|
||||
|
||||
// non-negativity for each variable (mod-count-aware)
|
||||
euf::snode_vector tokens;
|
||||
eq.m_lhs->collect_tokens(tokens);
|
||||
eq.m_rhs->collect_tokens(tokens);
|
||||
for (euf::snode* tok : tokens) {
|
||||
if (tok->is_var() && !seen_vars.contains(tok->id())) {
|
||||
seen_vars.insert(tok->id());
|
||||
expr_ref len_var = compute_length_expr(tok);
|
||||
node->add_int_constraint(int_constraint(len_var, arith.mk_int(0),
|
||||
int_constraint_kind::ge, eq.m_dep, m));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parikh interval bounds for regex memberships at this node
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
for (str_mem const& mem : node->str_mems()) {
|
||||
expr* re_expr = mem.m_regex->get_expr();
|
||||
if (!re_expr || !seq.is_re(re_expr))
|
||||
continue;
|
||||
|
||||
unsigned min_len = 0, max_len = UINT_MAX;
|
||||
compute_regex_length_interval(mem.m_regex, min_len, max_len);
|
||||
|
||||
expr_ref len_str = compute_length_expr(mem.m_str);
|
||||
|
||||
if (min_len > 0) {
|
||||
node->add_int_constraint(int_constraint(len_str, arith.mk_int(min_len),
|
||||
int_constraint_kind::ge, mem.m_dep, m));
|
||||
}
|
||||
if (max_len < UINT_MAX) {
|
||||
node->add_int_constraint(int_constraint(len_str, arith.mk_int(max_len),
|
||||
int_constraint_kind::le, mem.m_dep, m));
|
||||
}
|
||||
|
||||
// non-negativity for string-side variables
|
||||
euf::snode_vector tokens;
|
||||
mem.m_str->collect_tokens(tokens);
|
||||
for (euf::snode* tok : tokens) {
|
||||
if (tok->is_var() && !seen_vars.contains(tok->id())) {
|
||||
seen_vars.insert(tok->id());
|
||||
expr_ref len_var = compute_length_expr(tok);
|
||||
node->add_int_constraint(int_constraint(len_var, arith.mk_int(0),
|
||||
int_constraint_kind::ge, mem.m_dep, m));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool nielsen_graph::check_int_feasibility(nielsen_node* node, svector<nielsen_edge*> const& cur_path) {
|
||||
// In incremental mode the solver already holds all path constraints
|
||||
// (root length constraints at the base level, edge side_int and node
|
||||
|
|
|
|||
|
|
@ -241,6 +241,7 @@ Author:
|
|||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include "model/model.h"
|
||||
|
||||
namespace seq {
|
||||
|
|
@ -455,6 +456,7 @@ namespace seq {
|
|||
ptr_vector<str_mem> m_side_str_mem; // side constraints: regex memberships
|
||||
vector<int_constraint> m_side_int; // side constraints: integer equalities/inequalities
|
||||
bool m_is_progress; // does this edge represent progress?
|
||||
bool m_len_constraints_computed = false; // lazily computed substitution length constraints
|
||||
public:
|
||||
nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress);
|
||||
|
||||
|
|
@ -479,6 +481,9 @@ namespace seq {
|
|||
|
||||
bool is_progress() const { return m_is_progress; }
|
||||
|
||||
bool len_constraints_computed() const { return m_len_constraints_computed; }
|
||||
void set_len_constraints_computed(bool v) { m_len_constraints_computed = v; }
|
||||
|
||||
bool operator==(nielsen_edge const& other) const {
|
||||
return m_src == other.m_src && m_tgt == other.m_tgt;
|
||||
}
|
||||
|
|
@ -518,6 +523,7 @@ namespace seq {
|
|||
bool m_is_extended = false;
|
||||
backtrack_reason m_reason = backtrack_reason::unevaluated;
|
||||
bool m_is_progress = false;
|
||||
bool m_node_len_constraints_generated = false; // true after generate_node_length_constraints runs
|
||||
|
||||
// evaluation index for run tracking
|
||||
unsigned m_eval_idx = 0;
|
||||
|
|
@ -725,6 +731,26 @@ namespace seq {
|
|||
// Set to true after assert_root_constraints_to_solver() is first called.
|
||||
bool m_root_constraints_asserted = false;
|
||||
|
||||
// -----------------------------------------------
|
||||
// Modification counter for substitution length tracking.
|
||||
// mirrors ZIPT's LocalInfo.CurrentModificationCnt
|
||||
// -----------------------------------------------
|
||||
|
||||
// Maps snode id of string variable → current modification (reuse) count
|
||||
// along the DFS path. When a non-eliminating substitution x/u is applied
|
||||
// (x appears in u), x's count is bumped. This produces distinct length
|
||||
// variables for x before and after substitution, avoiding the unsatisfiable
|
||||
// |x| = 1 + |x| that results from reusing the same length symbol.
|
||||
u_map<unsigned> m_mod_cnt;
|
||||
|
||||
// Cache: (var snode id, modification count) → fresh integer variable.
|
||||
// Variables at mod_count 0 use str.len(var_expr) (standard form).
|
||||
// Variables at mod_count > 0 get a fresh Z3 integer constant.
|
||||
std::map<std::pair<unsigned, unsigned>, expr*> m_len_var_cache;
|
||||
|
||||
// Pins the fresh length variable expressions so they aren't garbage collected.
|
||||
expr_ref_vector m_len_vars;
|
||||
|
||||
public:
|
||||
// Construct with a caller-supplied solver. Ownership is NOT transferred;
|
||||
// the caller is responsible for keeping the solver alive.
|
||||
|
|
@ -966,6 +992,14 @@ namespace seq {
|
|||
// bounds become visible to subsequent check() and check_lp_le() calls.
|
||||
void assert_node_new_int_constraints(nielsen_node* node);
|
||||
|
||||
// Generate |LHS| = |RHS| length constraints for a non-root node's own
|
||||
// string equalities and add them as int_constraints on the node.
|
||||
// Called once per node (guarded by m_node_len_constraints_generated).
|
||||
// Uses compute_length_expr (mod-count-aware) so that variables with
|
||||
// non-zero modification counts get fresh length variables.
|
||||
// Mirrors ZIPT's Constraint.Shared forwarding for per-node equations.
|
||||
void generate_node_length_constraints(nielsen_node* node);
|
||||
|
||||
// check integer feasibility of the constraints along the current path.
|
||||
// returns true if feasible (including unknown), false only if l_false.
|
||||
// Precondition: all path constraints have been incrementally asserted to
|
||||
|
|
@ -991,6 +1025,30 @@ namespace seq {
|
|||
|
||||
// convert an int_constraint to an expr* assertion
|
||||
expr_ref int_constraint_to_expr(int_constraint const& ic);
|
||||
|
||||
// -----------------------------------------------
|
||||
// Modification counter methods for substitution length tracking.
|
||||
// mirrors ZIPT's NielsenEdge.IncModCount / DecModCount and
|
||||
// NielsenNode constructor length assertion logic.
|
||||
// -----------------------------------------------
|
||||
|
||||
// Get or create a fresh integer variable for len(var) at the given
|
||||
// modification count. Returns str.len(var_expr) when mod_count == 0.
|
||||
expr_ref get_or_create_len_var(euf::snode* var, unsigned mod_count);
|
||||
|
||||
// Compute and add |x| = |u| length constraints to an edge for all
|
||||
// its non-eliminating substitutions. Uses current m_mod_cnt.
|
||||
// Temporarily bumps m_mod_cnt for RHS computation, then restores.
|
||||
// Called lazily on first edge traversal in search_dfs.
|
||||
void add_subst_length_constraints(nielsen_edge* e);
|
||||
|
||||
// Bump modification counts for an edge's non-eliminating substitutions.
|
||||
// Called when entering an edge during DFS.
|
||||
void inc_edge_mod_counts(nielsen_edge* e);
|
||||
|
||||
// Restore modification counts for an edge's non-eliminating substitutions.
|
||||
// Called when backtracking from an edge during DFS.
|
||||
void dec_edge_mod_counts(nielsen_edge* e);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue