3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-05-16 23:25:36 +00:00

add draft for model construction

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2026-05-01 11:07:27 -07:00
parent c7ccca0873
commit 466bfea604
2 changed files with 602 additions and 0 deletions

468
src/smt/seq_model.cpp.draft Normal file
View file

@ -0,0 +1,468 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_model.cpp
Abstract:
Implementation of seq_model: model construction for the
Nielsen-based string solver.
Author:
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#include "smt/seq_model.h"
#include "smt/smt_context.h"
#include "smt/smt_model_generator.h"
#include "smt/smt_arith_value.h"
#include "smt/proto_model/proto_model.h"
#include "ast/ast_pp.h"
namespace smt {
static enode* find_root_enode(context& ctx, expr* e) {
if (!e)
return nullptr;
enode* n = ctx.find_enode(e);
return n ? n->get_root() : nullptr;
}
static bool is_model_dependency(context& ctx, enode* n) {
if (!n)
return false;
seq_util seq(ctx.get_manager());
if (seq.is_seq(n->get_sort()) || seq.is_re(n->get_sort()))
return false;
return ctx.is_relevant(n) || ctx.get_manager().is_value(n->get_expr());
}
class seq_snode_value_proc : public model_value_proc {
seq_model& m_owner;
enode* m_node;
euf::snode* m_snode;
ptr_vector<enode> m_dependencies;
public:
seq_snode_value_proc(seq_model& owner, enode* node, euf::snode* snode)
: m_owner(owner), m_node(node), m_snode(snode) {
m_owner.collect_dependencies(m_snode, m_dependencies);
}
void get_dependencies(buffer<model_value_dependency>& result) override {
for (enode* d : m_dependencies)
result.push_back(model_value_dependency(d));
}
app* mk_value(model_generator& mg, expr_ref_vector const& values) override {
SASSERT(values.size() == m_dependencies.size());
expr_ref val = m_owner.snode_to_value(m_snode, values);
if (!val)
val = m_owner.m_seq.str.mk_empty(m_node->get_expr()->get_sort());
m_owner.m_trail.push_back(val);
m_owner.m_factory->add_trail(val);
TRACE(seq, tout << "nseq seq_snode_value_proc: " << mk_pp(m_node->get_expr(), m_owner.m) << " -> " << mk_pp(val, m_owner.m) << "\n";);
return to_app(val);
}
};
seq_model::seq_model(ast_manager& m, context& ctx, seq_util& seq,
seq_rewriter& rw, euf::sgraph& sg)
: m(m), m_ctx(ctx), m_seq(seq), m_rewriter(rw), m_sg(sg), m_trail(m)
{}
void seq_model::init(model_generator& mg, seq::nielsen_graph& nielsen) {
TRACE(seq, nielsen.display(tout << nielsen.to_dot() << "\n"));
m_var_values.reset();
m_var_regex.reset();
m_trail.reset();
m_mg = &mg;
m_factory = alloc(seq_factory, m, m_seq.get_family_id(), mg.get_model());
mg.register_factory(m_factory);
register_existing_values(nielsen);
seq::nielsen_node* sat_node = nielsen.sat_node();
SASSERT(sat_node); // in case we report sat, this has to point to a satisfied Nielsen node!
collect_var_regex_constraints(sat_node);
// solve integer constraints from the sat_path FIRST so that
// m_int_model is available when snode_to_value evaluates power exponents
// VERIFY(nielsen.solve_sat_path_raw(m_int_model));
// extract variable assignments from the satisfying leaf's substitution path
extract_assignments(nielsen.sat_path());
}
model_value_proc* seq_model::mk_value(enode* n, model_generator& mg) {
app* e = n->get_expr();
if (!m_seq.is_seq(e) && !m_seq.is_re(e) && !m_seq.str.is_nth_u(e))
return nullptr;
// For regex-sorted enodes, return the expression itself as a model value.
// Regexes are interpreted as themselves in the model.
if (m_seq.is_re(e)) {
m_trail.push_back(e);
return alloc(expr_wrapper_proc, e);
}
// For nth_u (underspecified nth), return a fresh value of the element sort.
if (m_seq.str.is_nth_u(e)) {
sort* srt = e->get_sort();
expr* val = m_factory->get_fresh_value(srt);
if (val) {
m_trail.push_back(val);
return alloc(expr_wrapper_proc, to_app(val));
}
return nullptr;
}
// look up snode for this expression
euf::snode* sn = m_sg.find(e);
IF_VERBOSE(2, {
verbose_stream() << "nseq mk_value: expr=" << mk_bounded_pp(e, m, 2);
if (sn) verbose_stream() << " snode[" << sn->id() << "] kind=" << (int)sn->kind();
else verbose_stream() << " snode=null";
verbose_stream() << "\n";
});
expr_ref val(m);
if (sn)
return alloc(seq_snode_value_proc, *this, n, sn);
if (!val) {
// no assignment found — default to empty string
val = m_seq.str.mk_empty(e->get_sort());
}
if (val) {
m_trail.push_back(val);
m_factory->add_trail(val);
return alloc(expr_wrapper_proc, to_app(val));
}
return alloc(expr_wrapper_proc, to_app(m_seq.str.mk_empty(e->get_sort())));
}
void seq_model::finalize(model_generator& mg) {
m_var_values.reset();
m_var_regex.reset();
m_trail.reset();
m_mg = nullptr;
m_factory = nullptr;
}
void seq_model::extract_assignments(ptr_vector<seq::nielsen_edge> const& sat_path) {
IF_VERBOSE(1, verbose_stream() << "nseq extract_assignments: path length=" << sat_path.size() << "\n";);
// compose substitutions root-to-leaf.
// bindings[i] = (var_snode, current_value_snode).
// When a new substitution (s.m_var -> s.m_replacement) is applied,
// substitute s.m_var in all existing values, then record the new binding.
vector<std::pair<euf::snode*, euf::snode*>> bindings;
for (seq::nielsen_edge* e : sat_path) {
for (seq::nielsen_subst const& s : e->subst()) {
if (!s.m_var)
continue;
IF_VERBOSE(1, {
verbose_stream() << " subst: snode[" << s.m_var->id() << "]";
if (s.m_var->get_expr()) verbose_stream() << "=" << mk_bounded_pp(s.m_var->get_expr(), m, 2);
verbose_stream() << " -> snode[" << (s.m_replacement ? (int)s.m_replacement->id() : -1) << "]";
if (s.m_replacement && s.m_replacement->get_expr()) verbose_stream() << "=" << mk_bounded_pp(s.m_replacement->get_expr(), m, 2);
verbose_stream() << "\n";
});
for (auto& b : bindings)
b.second = m_sg.subst(b.second, s.m_var, s.m_replacement);
bindings.push_back({s.m_var, s.m_replacement});
}
}
IF_VERBOSE(1, verbose_stream() << "nseq extract_assignments: " << bindings.size() << " bindings\n";);
for (auto const &[var, replacement] : bindings) {
SASSERT(var);
unsigned id = var->first()->id();
if (m_var_values.contains(id))
continue;
expr_ref_vector values(m);
// TODO - this doesn't work.
// we need a way to track that var depends on replacement
// so the value computed for var is based on replacement's value.
expr_ref val = snode_to_value(replacement, values); // TODO
IF_VERBOSE(1, {
verbose_stream() << " var snode[" << id << "]";
if (var->get_expr()) verbose_stream() << "=" << mk_bounded_pp(var->get_expr(), m, 2);
verbose_stream() << " -> ";
if (val) verbose_stream() << mk_bounded_pp(val, m, 3); else verbose_stream() << "(null)";
verbose_stream() << "\n";
});
if (val) {
m_trail.push_back(val);
m_var_values.insert(id, val);
}
}
}
expr_ref seq_model::snode_to_value(euf::snode* n, expr_ref_vector const& values) {
SASSERT(n);
if (n->is_empty()) {
sort* srt = n->get_sort();
if (!srt)
srt = m_seq.str.mk_string_sort();
return expr_ref(m_seq.str.mk_empty(srt), m);
}
// NSB review
// we have to carefully figure out what to do/redo here.
// model construction in z3 is designed to be hierarchical.
// during model initialization solvers register depenendencies between enodes for model construction.
// The dependencies should be acyclic to enable bottom-up model construction.
// Values for dependencies are accessed in the model_value_proc class.
// For strings/sequences we have a natural way to record dependencies.
// unit/character nodes depend on the elements they contain.
if (n->is_char() || n->is_unit()) {
expr *e = n->get_expr();
SASSERT(m_seq.str.is_unit(e));
e = to_app(e)->get_arg(0);
unsigned c;
expr *dval = values.get(0);
if (m_seq.is_const_char(dval, c))
return expr_ref(m_seq.str.mk_string(zstring(c)), m);
return expr_ref(m_seq.str.mk_unit(dval), m);
}
if (n->is_var())
return expr_ref(get_var_value(n), m);
if (n->is_concat()) {
return expr_ref(m_seq.str.mk_concat(values, n->get_sort()), m);
}
if (n->is_power()) {
SASSERT(n->num_args() == 2);
SASSERT(values.size() == 2);
// Evaluate the base and exponent to produce a concrete string.
// The base is a string snode; the exponent is an integer expression
// whose value comes from the sat_path integer model.
expr* base_val = values.get(0);
expr *exp_expr = values.get(1);
rational exp_val(0);
arith_util arith(m);
// Try to evaluate exponent: first check if it's a numeral,
// then try the int model from sat_path constraints,
// finally fall back to the proto_model from model_generator.
bool has_val = false;
if (exp_expr && arith.is_numeral(exp_expr, exp_val)) {
has_val = true;
// already concrete
}
if (!has_val) {
arith_value avalue(m);
avalue.init(&m_ctx);
avalue.get_value(exp_expr, exp_val);
}
if (exp_val.is_neg())
exp_val = rational(0);
// Build the repeated string: base^exp_val
if (exp_val == 0) {
sort* srt = n->get_sort();
SASSERT(srt);
return expr_ref(m_seq.str.mk_empty(srt), m);
}
if (exp_val.is_one())
return expr_ref(base_val, m);
// For small exponents, concatenate directly; for large ones,
// return mk_power to avoid enormous AST chains.
constexpr unsigned POWER_EXPAND_LIMIT = 100;
if (exp_val > POWER_EXPAND_LIMIT)
return expr_ref(m_seq.str.mk_power(base_val, arith.mk_int(exp_val)), m);
unsigned n_val = exp_val.get_unsigned();
expr_ref acc(base_val, m);
for (unsigned i = 1; i < n_val; ++i)
acc = m_seq.str.mk_concat(acc, base_val);
return acc;
}
// fallback: use the underlying expression
return expr_ref(n->get_expr(), m);
}
void seq_model::collect_dependencies(euf::snode* n, ptr_vector<enode>& deps) const {
if (n->is_char() || n->is_unit())
deps.push_back(m_ctx.get_enode(n->arg(0)->get_expr()));
else if (n->is_concat()) {
for (unsigned i = 0; i < n->num_args(); ++i)
deps.push_back(m_ctx.get_enode(n->arg(i)->get_expr()));
}
else if (n->is_power()) {
deps.push_back(m_ctx.get_enode(n->arg(0)->get_expr()));
deps.push_back(m_ctx.get_enode(n->arg(1)->get_expr()));
}
}
void seq_model::register_existing_values(seq::nielsen_graph& nielsen) {
seq::nielsen_node const* root = nielsen.root();
if (!root)
return;
for (auto const& eq : root->str_eqs()) {
if (eq.m_lhs && eq.m_lhs->get_expr())
m_factory->register_value(eq.m_lhs->get_expr());
if (eq.m_rhs && eq.m_rhs->get_expr())
m_factory->register_value(eq.m_rhs->get_expr());
}
}
expr* seq_model::get_var_value(euf::snode* var) {
SASSERT(var);
unsigned key = var->first()->id();
expr* val = nullptr;
if (m_var_values.find(key, val))
return val;
// unconstrained or regex-constrained: delegate to mk_fresh_value
val = mk_fresh_value(var);
if (val) {
m_trail.push_back(val);
m_var_values.insert(key, val);
}
return val;
}
expr* seq_model::mk_fresh_value(euf::snode* var) {
SASSERT(var->get_expr());
if (!m_seq.is_seq(var->get_expr()))
return nullptr;
auto srt = var->get_expr()->get_sort();
// check if this variable has regex constraints
euf::snode* re = nullptr;
unsigned key = var->first()->id();
if (m_var_regex.find(key, re) && re) {
expr* re_expr = re->get_expr();
SASSERT(re_expr);
arith_util arith(m);
arith_value avalue(m);
avalue.init(&m_ctx);
expr_ref len_expr(m_seq.str.mk_length(var->get_expr()), m);
rational len_val;
bool has_len = avalue.get_value(len_expr, len_val);
if (has_len && len_val.is_unsigned()) {
unsigned n = len_val.get_unsigned();
expr_ref loop(m_seq.re.mk_loop(m_seq.re.mk_full_char(re_expr->get_sort()), n, n), m);
re_expr = m_seq.re.mk_inter(re_expr, loop);
}
expr_ref witness(m);
// We checked non-emptiness during Nielsen already
lbool wr = m_rewriter.some_seq_in_re(re_expr, witness);
if (wr == l_true && witness) {
m_trail.push_back(witness);
m_factory->register_value(witness);
return witness;
}
IF_VERBOSE(1, verbose_stream() << "witness extraction failed for " << mk_pp(var->get_expr(), m)
<< " : " << wr << " with len " << (has_len ? len_val.to_string() : "unknown") << "\n" << mk_pp(re_expr, m) << "\n");
UNREACHABLE();
}
// No regex constraint: try to respect the assigned length for the variable.
// This prevents invalid models such as len(x)=1 with x="".
arith_util arith(m);
expr_ref len_expr(m_seq.str.mk_length(var->get_expr()), m);
rational len_val;
arith_value avalue(m);
avalue.init(&m_ctx);
bool has_len = avalue.get_value(len_expr, len_val);
if (has_len) {
if (!len_val.is_int() || len_val.is_neg())
len_val = rational(0);
if (len_val.is_zero())
return m_seq.str.mk_empty(srt);
constexpr unsigned MAX_CONCRETE_WITNESS = 1024;
if (len_val.is_unsigned() && len_val.get_unsigned() <= MAX_CONCRETE_WITNESS) {
unsigned n = len_val.get_unsigned();
zstring w;
for (unsigned i = 0; i < n; ++i)
w += zstring('0');
expr* witness = m_seq.str.mk_string(w);
m_factory->register_value(witness);
return witness;
}
expr* base = m_seq.str.mk_string("0");
expr* witness = m_seq.str.mk_power(base, arith.mk_int(len_val));
m_factory->register_value(witness);
return witness;
}
// no regex constraint or witness generation failed: use empty string
return m_seq.str.mk_empty(srt);
}
void seq_model::collect_var_regex_constraints(seq::nielsen_node const* sat_node) {
SASSERT(sat_node);
for (auto const& mem : sat_node->str_mems()) {
SASSERT(mem.m_str && mem.m_regex);
if (mem.is_trivial(sat_node))
continue; // empty string in nullable regex: already satisfied, no variable to constrain
VERIFY(mem.is_primitive()); // everything else should have been eliminated already
euf::snode* first = mem.m_str->first();
unsigned id = first->id();
euf::snode* existing = nullptr;
if (m_var_regex.find(id, existing) && existing) {
// intersect with existing constraint:
// build re.inter(existing, new_regex)
expr* e1 = existing->get_expr();
expr* e2 = mem.m_regex->get_expr();
if (e1 && e2) {
expr_ref inter(m_seq.re.mk_inter(e1, e2), m);
euf::snode* inter_sn = m_sg.mk(inter);
SASSERT(inter_sn);
m_var_regex.insert(id, inter_sn);
}
}
else
m_var_regex.insert(id, mem.m_regex);
}
}
bool seq_model::validate_regex(tracked_str_mem const& mem, ::proto_model& mdl) {
if (!mem.m_str || !mem.m_regex)
return true;
expr* s_expr = mem.m_str->get_expr();
expr* r_expr = mem.m_regex->get_expr();
if (!s_expr || !r_expr)
return true;
expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
if (mdl.is_false(in_re)) {
IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: "
<< mk_bounded_pp(s_expr, m, 3)
<< " in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
return false;
}
return true;
}
}

134
src/smt/seq_model.h.draft Normal file
View file

@ -0,0 +1,134 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_model.h
Abstract:
Model construction for the Nielsen-based string solver (theory_nseq).
After the Nielsen graph search returns sat, this module extracts
variable-to-value assignments from the satisfying leaf node and
builds model_value_proc callbacks for the SMT model generator.
The workflow is:
1. init() — allocate seq_factory, register existing string literals,
and extract variable assignments from the satisfying Nielsen node.
2. mk_value(enode*) — return a model_value_proc that lazily builds
the concrete value for a given enode.
3. finalize() — clean up temporary state.
Author:
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#pragma once
#include "smt_context.h"
#include "ast/seq_decl_plugin.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
#include "smt/seq/seq_state.h" // tracked_str_mem
#include "model/seq_factory.h"
class proto_model;
namespace smt {
class enode;
class model_generator;
struct tracked_str_mem;
class model_value_proc;
class seq_snode_value_proc;
class seq_model {
friend class seq_snode_value_proc;
ast_manager& m;
context& m_ctx;
seq_util& m_seq;
seq_rewriter& m_rewriter;
euf::sgraph& m_sg;
// factory for generating fresh string/regex values
seq_factory* m_factory = nullptr;
// variable assignments extracted from the satisfying Nielsen node.
// maps snode id -> expr* (concrete value)
u_map<expr*> m_var_values;
// trail for GC protection of generated expressions
expr_ref_vector m_trail;
// integer variable model from sat_path constraints
model_ref m_int_model;
model_generator* m_mg = nullptr;
// per-variable regex constraints: maps snode id -> intersected regex snode.
// collected during init() from the state's str_mem list.
u_map<euf::snode*> m_var_regex;
public:
seq_model(ast_manager& m, context& ctx, seq_util& seq,
seq_rewriter& rw, euf::sgraph& sg);
// Phase 1: initialize model construction.
// Allocates seq_factory, registers it with mg, collects
// existing string literals, and extracts variable assignments
// from the satisfying Nielsen leaf node.
void init(model_generator& mg, seq::nielsen_graph& nielsen);
// Phase 2: build a model_value_proc for the given enode.
// Returns nullptr if the enode is not a sequence/string sort.
model_value_proc* mk_value(enode* n, model_generator& mg);
// Phase 3: clean up temporary model construction state.
void finalize(model_generator& mg);
// Validate that model assignments satisfy all regex membership
// constraints from the state. Checks positive and negative
// memberships. Returns true if all constraints pass.
bool validate_regex(tracked_str_mem const& mem, ::proto_model& mdl);
private:
// extract variable assignments from the sat path (root-to-leaf edges).
// Composes substitutions along the path to compute final var values.
void extract_assignments(ptr_vector<seq::nielsen_edge> const& sat_path);
// recursively substitute known variable assignments into an snode tree.
// Returns a concrete Z3 expression.
// Optionally uses pre-evaluated model values for
// enode dependencies (provided by model_generator).
expr_ref snode_to_value(euf::snode* n, expr_ref_vector const& values);
// Collect enode dependencies required to evaluate an snode value.
void collect_dependencies(euf::snode* n, ptr_vector<enode>& deps) const;
// register all string literals appearing in the constraint store
// with the factory to avoid collisions with fresh values.
void register_existing_values(seq::nielsen_graph& nielsen);
// look up or compute the value for an snode variable.
// If no assignment exists, delegates to mk_fresh_value.
expr* get_var_value(euf::snode* var);
// generate a fresh value for a variable, respecting regex
// membership constraints. If the variable has associated
// regex constraints (collected during init), generates a
// witness satisfying the intersection; otherwise falls back
// to a plain fresh value from the factory.
expr* mk_fresh_value(euf::snode* var);
// collect per-variable regex constraints from the state.
// For each positive str_mem, records the regex (or intersects
// with existing) into m_var_regex keyed by the string snode id.
void collect_var_regex_constraints(seq::nielsen_node const* sat_node);
};
}