mirror of
https://github.com/Z3Prover/z3
synced 2026-03-07 13:54:53 +00:00
first end-pass. Atomic
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
parent
13f9fec339
commit
5aa3713d19
15 changed files with 6160 additions and 209 deletions
|
|
@ -355,6 +355,22 @@ namespace euf {
|
|||
if (n)
|
||||
return n;
|
||||
|
||||
// decompose non-empty string constants into character chains
|
||||
// so that Nielsen graph can do prefix matching on them
|
||||
zstring s;
|
||||
if (m_seq.str.is_string(e, s) && !s.empty()) {
|
||||
snode* result = mk_char(s[s.length() - 1]);
|
||||
for (unsigned i = s.length() - 1; i-- > 0; )
|
||||
result = mk_concat(mk_char(s[i]), result);
|
||||
// register the original string expression as an alias
|
||||
unsigned eid = e->get_id();
|
||||
m_expr2snode.reserve(eid + 1, nullptr);
|
||||
m_expr2snode[eid] = result;
|
||||
m_alias_trail.push_back(eid);
|
||||
mk_enode(e);
|
||||
return result;
|
||||
}
|
||||
|
||||
snode_kind k = classify(e);
|
||||
|
||||
if (!is_app(e))
|
||||
|
|
@ -400,6 +416,7 @@ namespace euf {
|
|||
|
||||
void sgraph::push() {
|
||||
m_scopes.push_back(m_nodes.size());
|
||||
m_alias_trail_lim.push_back(m_alias_trail.size());
|
||||
++m_num_scopes;
|
||||
m_egraph.push();
|
||||
}
|
||||
|
|
@ -420,6 +437,15 @@ namespace euf {
|
|||
}
|
||||
m_nodes.shrink(old_sz);
|
||||
m_scopes.shrink(new_lvl);
|
||||
// undo alias entries (string constant decompositions)
|
||||
unsigned alias_old = m_alias_trail_lim[new_lvl];
|
||||
for (unsigned i = m_alias_trail.size(); i-- > alias_old; ) {
|
||||
unsigned eid = m_alias_trail[i];
|
||||
if (eid < m_expr2snode.size())
|
||||
m_expr2snode[eid] = nullptr;
|
||||
}
|
||||
m_alias_trail.shrink(alias_old);
|
||||
m_alias_trail_lim.shrink(new_lvl);
|
||||
m_num_scopes = new_lvl;
|
||||
m_egraph.pop(num_scopes);
|
||||
}
|
||||
|
|
@ -520,6 +546,25 @@ namespace euf {
|
|||
expr* ch = nullptr;
|
||||
if (m_seq.str.is_unit(elem_expr, ch))
|
||||
elem_expr = ch;
|
||||
|
||||
// If elem is a regex predicate (e.g., re.allchar from compute_minterms),
|
||||
// extract a representative character for the derivative.
|
||||
sort* seq_sort = nullptr, *ele_sort = nullptr;
|
||||
if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) {
|
||||
if (ele_sort != elem_expr->get_sort()) {
|
||||
expr* lo = nullptr, *hi = nullptr;
|
||||
if (m_seq.re.is_full_char(elem_expr)) {
|
||||
// re.allchar represents the entire alphabet; computing a derivative
|
||||
// w.r.t. a single character would be imprecise and could incorrectly
|
||||
// report fail. Return nullptr to prevent incorrect pruning.
|
||||
return nullptr;
|
||||
}
|
||||
else if (m_seq.re.is_range(elem_expr, lo, hi) && lo)
|
||||
elem_expr = lo;
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr);
|
||||
if (!result)
|
||||
return nullptr;
|
||||
|
|
|
|||
|
|
@ -97,6 +97,10 @@ namespace euf {
|
|||
// maps expression id to snode
|
||||
ptr_vector<snode> m_expr2snode;
|
||||
|
||||
// trail of alias entries (string constant → decomposed snode) for pop
|
||||
unsigned_vector m_alias_trail; // expression ids
|
||||
unsigned_vector m_alias_trail_lim; // scope boundaries
|
||||
|
||||
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
|
||||
snode_kind classify(expr* e) const;
|
||||
void compute_metadata(snode* n);
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ Module Name:
|
|||
|
||||
Abstract:
|
||||
|
||||
Implementation of nseq_model.
|
||||
Implementation of nseq_model: model construction for the
|
||||
Nielsen-based string solver.
|
||||
|
||||
Author:
|
||||
|
||||
|
|
@ -15,3 +16,292 @@ Author:
|
|||
|
||||
--*/
|
||||
#include "smt/nseq_model.h"
|
||||
#include "smt/theory_nseq.h"
|
||||
#include "smt/nseq_regex.h"
|
||||
#include "smt/nseq_state.h"
|
||||
#include "smt/smt_context.h"
|
||||
#include "smt/smt_model_generator.h"
|
||||
#include "smt/proto_model/proto_model.h"
|
||||
#include "ast/ast_pp.h"
|
||||
|
||||
namespace smt {
|
||||
|
||||
nseq_model::nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq,
|
||||
seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex)
|
||||
: m_th(th), m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_regex(regex), m_trail(m)
|
||||
{}
|
||||
|
||||
void nseq_model::init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state) {
|
||||
m_var_values.reset();
|
||||
m_var_regex.reset();
|
||||
m_trail.reset();
|
||||
|
||||
m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model());
|
||||
mg.register_factory(m_factory);
|
||||
|
||||
register_existing_values(nielsen);
|
||||
collect_var_regex_constraints(state);
|
||||
|
||||
// if the last solve returned sat, extract assignments from the
|
||||
// satisfying leaf node found during DFS.
|
||||
seq::nielsen_node const* root = nielsen.root();
|
||||
if (root && root->is_satisfied())
|
||||
extract_assignments(root);
|
||||
}
|
||||
|
||||
model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) {
|
||||
app* e = n->get_expr();
|
||||
if (!m_seq.is_seq(e) && !m_seq.is_re(e) && !m_seq.str.is_nth_u(e))
|
||||
return nullptr;
|
||||
|
||||
// For regex-sorted enodes, return the expression itself as a model value.
|
||||
// Regexes are interpreted as themselves in the model.
|
||||
if (m_seq.is_re(e)) {
|
||||
m_trail.push_back(e);
|
||||
return alloc(expr_wrapper_proc, e);
|
||||
}
|
||||
|
||||
// For nth_u (underspecified nth), return a fresh value of the element sort.
|
||||
if (m_seq.str.is_nth_u(e)) {
|
||||
sort* srt = e->get_sort();
|
||||
expr* val = m_factory->get_fresh_value(srt);
|
||||
if (val) {
|
||||
m_trail.push_back(val);
|
||||
return alloc(expr_wrapper_proc, to_app(val));
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// look up snode for this expression
|
||||
euf::snode* sn = m_sg.find(e);
|
||||
expr_ref val(m);
|
||||
if (sn)
|
||||
val = snode_to_value(sn);
|
||||
|
||||
if (!val) {
|
||||
// no assignment found — generate fresh value
|
||||
val = m_factory->get_fresh_value(e->get_sort());
|
||||
}
|
||||
|
||||
if (val) {
|
||||
m_trail.push_back(val);
|
||||
m_factory->add_trail(val);
|
||||
return alloc(expr_wrapper_proc, to_app(val));
|
||||
}
|
||||
|
||||
return alloc(expr_wrapper_proc, to_app(m_seq.str.mk_empty(e->get_sort())));
|
||||
}
|
||||
|
||||
void nseq_model::finalize(model_generator& mg) {
|
||||
m_var_values.reset();
|
||||
m_var_regex.reset();
|
||||
m_trail.reset();
|
||||
m_factory = nullptr;
|
||||
}
|
||||
|
||||
void nseq_model::extract_assignments(seq::nielsen_node const* node) {
|
||||
if (!node)
|
||||
return;
|
||||
for (auto const& eq : node->str_eqs()) {
|
||||
if (!eq.m_lhs || !eq.m_rhs)
|
||||
continue;
|
||||
if (eq.m_lhs->is_var() && !m_var_values.contains(eq.m_lhs->id())) {
|
||||
expr_ref val = snode_to_value(eq.m_rhs);
|
||||
if (val) {
|
||||
m_trail.push_back(val);
|
||||
m_var_values.insert(eq.m_lhs->id(), val);
|
||||
}
|
||||
}
|
||||
if (eq.m_rhs->is_var() && !m_var_values.contains(eq.m_rhs->id())) {
|
||||
expr_ref val = snode_to_value(eq.m_lhs);
|
||||
if (val) {
|
||||
m_trail.push_back(val);
|
||||
m_var_values.insert(eq.m_rhs->id(), val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expr_ref nseq_model::snode_to_value(euf::snode* n) {
|
||||
if (!n)
|
||||
return expr_ref(m);
|
||||
|
||||
if (n->is_empty())
|
||||
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
|
||||
|
||||
if (n->is_char() || n->is_unit()) {
|
||||
expr* e = n->get_expr();
|
||||
return e ? expr_ref(e, m) : expr_ref(m);
|
||||
}
|
||||
|
||||
if (n->is_var())
|
||||
return expr_ref(get_var_value(n), m);
|
||||
|
||||
if (n->is_concat()) {
|
||||
expr_ref lhs = snode_to_value(n->arg(0));
|
||||
expr_ref rhs = snode_to_value(n->arg(1));
|
||||
if (lhs && rhs)
|
||||
return expr_ref(m_seq.str.mk_concat(lhs, rhs), m);
|
||||
if (lhs) return lhs;
|
||||
if (rhs) return rhs;
|
||||
return expr_ref(m);
|
||||
}
|
||||
|
||||
// fallback: use the underlying expression
|
||||
expr* e = n->get_expr();
|
||||
return e ? expr_ref(e, m) : expr_ref(m);
|
||||
}
|
||||
|
||||
expr_ref nseq_model::generate_regex_witness(euf::snode* regex, unsigned depth) {
|
||||
if (!regex)
|
||||
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
|
||||
|
||||
// depth bound to prevent stack overflow on deep regexes
|
||||
if (depth > 1000) {
|
||||
sort* srt = m_seq.str.mk_string_sort();
|
||||
expr* fresh = m_factory->get_fresh_value(srt);
|
||||
return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m);
|
||||
}
|
||||
|
||||
// nullable regex: empty string is a valid witness
|
||||
if (m_regex.is_nullable(regex))
|
||||
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
|
||||
|
||||
// collect first-position characters
|
||||
euf::snode_vector chars;
|
||||
m_regex.collect_first_chars(regex, chars);
|
||||
|
||||
if (!chars.empty()) {
|
||||
// pick first concrete character, derive, and recurse
|
||||
euf::snode* c = chars[0];
|
||||
euf::snode* deriv = m_regex.derivative(regex, c);
|
||||
expr_ref tail = generate_regex_witness(deriv, depth + 1);
|
||||
if (tail && c->get_expr())
|
||||
return expr_ref(m_seq.str.mk_concat(c->get_expr(), tail), m);
|
||||
}
|
||||
|
||||
// fallback: return fresh value from factory (may not satisfy the regex,
|
||||
// but avoids returning empty string which definitely doesn't satisfy non-nullable regex)
|
||||
sort* srt = m_seq.str.mk_string_sort();
|
||||
expr* fresh = m_factory->get_fresh_value(srt);
|
||||
return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m);
|
||||
}
|
||||
|
||||
void nseq_model::register_existing_values(seq::nielsen_graph& nielsen) {
|
||||
seq::nielsen_node const* root = nielsen.root();
|
||||
if (!root)
|
||||
return;
|
||||
for (auto const& eq : root->str_eqs()) {
|
||||
if (eq.m_lhs && eq.m_lhs->get_expr())
|
||||
m_factory->register_value(eq.m_lhs->get_expr());
|
||||
if (eq.m_rhs && eq.m_rhs->get_expr())
|
||||
m_factory->register_value(eq.m_rhs->get_expr());
|
||||
}
|
||||
}
|
||||
|
||||
expr* nseq_model::get_var_value(euf::snode* var) {
|
||||
expr* val = nullptr;
|
||||
if (m_var_values.find(var->id(), val))
|
||||
return val;
|
||||
|
||||
// unconstrained or regex-constrained: delegate to mk_fresh_value
|
||||
val = mk_fresh_value(var);
|
||||
if (val) {
|
||||
m_trail.push_back(val);
|
||||
m_var_values.insert(var->id(), val);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
expr* nseq_model::mk_fresh_value(euf::snode* var) {
|
||||
// check if this variable has regex constraints
|
||||
euf::snode* re = nullptr;
|
||||
if (m_var_regex.find(var->id(), re) && re) {
|
||||
// generate a witness string satisfying the regex
|
||||
expr_ref witness = generate_regex_witness(re);
|
||||
if (witness) {
|
||||
m_trail.push_back(witness);
|
||||
m_factory->register_value(witness);
|
||||
return witness;
|
||||
}
|
||||
}
|
||||
|
||||
// no regex constraint or witness generation failed: plain fresh value
|
||||
sort* srt = m_seq.str.mk_string_sort();
|
||||
if (var->get_expr())
|
||||
srt = var->get_expr()->get_sort();
|
||||
return m_factory->get_fresh_value(srt);
|
||||
}
|
||||
|
||||
void nseq_model::collect_var_regex_constraints(nseq_state const& state) {
|
||||
for (auto const& mem : state.str_mems()) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
continue;
|
||||
// only collect for variable snodes (leaf variables needing assignment)
|
||||
if (!mem.m_str->is_var())
|
||||
continue;
|
||||
unsigned id = mem.m_str->id();
|
||||
euf::snode* existing = nullptr;
|
||||
if (m_var_regex.find(id, existing) && existing) {
|
||||
// intersect with existing constraint:
|
||||
// build re.inter(existing, new_regex)
|
||||
expr* e1 = existing->get_expr();
|
||||
expr* e2 = mem.m_regex->get_expr();
|
||||
if (e1 && e2) {
|
||||
expr_ref inter(m_seq.re.mk_inter(e1, e2), m);
|
||||
euf::snode* inter_sn = m_sg.mk(inter);
|
||||
if (inter_sn)
|
||||
m_var_regex.insert(id, inter_sn);
|
||||
}
|
||||
}
|
||||
else {
|
||||
m_var_regex.insert(id, mem.m_regex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool nseq_model::validate_regex(nseq_state const& state, ::proto_model& mdl) {
|
||||
bool ok = true;
|
||||
|
||||
// validate positive memberships: str ∈ regex
|
||||
for (auto const& mem : state.str_mems()) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
continue;
|
||||
expr* s_expr = mem.m_str->get_expr();
|
||||
expr* r_expr = mem.m_regex->get_expr();
|
||||
if (!s_expr || !r_expr)
|
||||
continue;
|
||||
|
||||
expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
|
||||
if (mdl.is_false(in_re)) {
|
||||
IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: "
|
||||
<< mk_bounded_pp(s_expr, m, 3)
|
||||
<< " in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
|
||||
ok = false;
|
||||
}
|
||||
}
|
||||
|
||||
// validate negative memberships: str ∉ regex
|
||||
for (auto const& entry : state.neg_mems()) {
|
||||
if (!entry.m_str || !entry.m_regex)
|
||||
continue;
|
||||
expr* s_expr = entry.m_str->get_expr();
|
||||
expr* r_expr = entry.m_regex->get_expr();
|
||||
if (!s_expr || !r_expr)
|
||||
continue;
|
||||
|
||||
expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
|
||||
expr_ref val(m);
|
||||
mdl.eval(in_re, val, true);
|
||||
if (val && m.is_true(val)) {
|
||||
IF_VERBOSE(0, verbose_stream() << "nseq model: negative membership violated: "
|
||||
<< mk_bounded_pp(s_expr, m, 3)
|
||||
<< " not in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
|
||||
ok = false;
|
||||
}
|
||||
}
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,18 @@ Module Name:
|
|||
|
||||
Abstract:
|
||||
|
||||
Model generation from solved Nielsen graph.
|
||||
Model construction for the Nielsen-based string solver (theory_nseq).
|
||||
|
||||
After the Nielsen graph search returns sat, this module extracts
|
||||
variable-to-value assignments from the satisfying leaf node and
|
||||
builds model_value_proc callbacks for the SMT model generator.
|
||||
|
||||
The workflow is:
|
||||
1. init() — allocate seq_factory, register existing string literals,
|
||||
and extract variable assignments from the satisfying Nielsen node.
|
||||
2. mk_value(enode*) — return a model_value_proc that lazily builds
|
||||
the concrete value for a given enode.
|
||||
3. finalize() — clean up temporary state.
|
||||
|
||||
Author:
|
||||
|
||||
|
|
@ -16,57 +27,99 @@ Author:
|
|||
--*/
|
||||
#pragma once
|
||||
|
||||
#include "ast/ast.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "util/zstring.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "smt/smt_types.h"
|
||||
#include "smt/seq/seq_nielsen.h"
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "model/seq_factory.h"
|
||||
|
||||
class proto_model;
|
||||
|
||||
namespace smt {
|
||||
|
||||
class theory_nseq;
|
||||
class nseq_regex;
|
||||
class nseq_state;
|
||||
class model_value_proc;
|
||||
|
||||
class nseq_model {
|
||||
ast_manager& m;
|
||||
seq_util m_seq;
|
||||
euf::sgraph& m_sg;
|
||||
unsigned m_fresh_counter = 0;
|
||||
theory_nseq& m_th;
|
||||
ast_manager& m;
|
||||
seq_util& m_seq;
|
||||
seq_rewriter& m_rewriter;
|
||||
euf::sgraph& m_sg;
|
||||
nseq_regex& m_regex;
|
||||
|
||||
// factory for generating fresh string/regex values
|
||||
seq_factory* m_factory = nullptr;
|
||||
|
||||
// variable assignments extracted from the satisfying Nielsen node.
|
||||
// maps snode id -> expr* (concrete value)
|
||||
u_map<expr*> m_var_values;
|
||||
|
||||
// trail for GC protection of generated expressions
|
||||
expr_ref_vector m_trail;
|
||||
|
||||
// per-variable regex constraints: maps snode id -> intersected regex snode.
|
||||
// collected during init() from the state's str_mem list.
|
||||
u_map<euf::snode*> m_var_regex;
|
||||
|
||||
public:
|
||||
nseq_model(ast_manager& m, euf::sgraph& sg) : m(m), m_seq(m), m_sg(sg) {}
|
||||
nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq,
|
||||
seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex);
|
||||
|
||||
// generate a fresh string value (used when a variable is unconstrained)
|
||||
expr_ref mk_fresh_value() {
|
||||
std::string name = "s!" + std::to_string(m_fresh_counter++);
|
||||
zstring zs(name.c_str());
|
||||
return expr_ref(m_seq.str.mk_string(zs), m);
|
||||
}
|
||||
// Phase 1: initialize model construction.
|
||||
// Allocates seq_factory, registers it with mg, collects
|
||||
// existing string literals, and extracts variable assignments
|
||||
// from the satisfying Nielsen leaf node.
|
||||
void init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state);
|
||||
|
||||
// extract variable assignments from a satisfied leaf node
|
||||
// Returns true if all variables got a valid assignment
|
||||
bool extract_assignments(seq::nielsen_node* node,
|
||||
std::vector<std::pair<euf::snode*, expr*>>& assignment) {
|
||||
if (!node)
|
||||
return false;
|
||||
for (auto const& eq : node->str_eqs()) {
|
||||
if (!eq.m_lhs || !eq.m_rhs)
|
||||
continue;
|
||||
if (eq.m_lhs->is_var() && eq.m_rhs->get_expr()) {
|
||||
assignment.emplace_back(eq.m_lhs, eq.m_rhs->get_expr());
|
||||
}
|
||||
else if (eq.m_rhs->is_var() && eq.m_lhs->get_expr()) {
|
||||
assignment.emplace_back(eq.m_rhs, eq.m_lhs->get_expr());
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Phase 2: build a model_value_proc for the given enode.
|
||||
// Returns nullptr if the enode is not a sequence/string sort.
|
||||
model_value_proc* mk_value(enode* n, model_generator& mg);
|
||||
|
||||
// validate that a regex membership constraint is satisfied by the assignment
|
||||
bool validate_regex(seq::str_mem const& mem,
|
||||
obj_map<euf::snode, expr*> const& assignment) {
|
||||
// stub: assume valid for now
|
||||
return true;
|
||||
}
|
||||
// Phase 3: clean up temporary model construction state.
|
||||
void finalize(model_generator& mg);
|
||||
|
||||
// Validate that model assignments satisfy all regex membership
|
||||
// constraints from the state. Checks positive and negative
|
||||
// memberships. Returns true if all constraints pass.
|
||||
bool validate_regex(nseq_state const& state, ::proto_model& mdl);
|
||||
|
||||
private:
|
||||
// extract variable assignments from a satisfying Nielsen node.
|
||||
// Walks str_eqs looking for x = value patterns and records them.
|
||||
void extract_assignments(seq::nielsen_node const* node);
|
||||
|
||||
// recursively substitute known variable assignments into an snode tree.
|
||||
// Returns a concrete Z3 expression.
|
||||
expr_ref snode_to_value(euf::snode* n);
|
||||
|
||||
// generate a concrete witness string for a regex.
|
||||
// Uses nullable check and first-char collection to build
|
||||
// a minimal satisfying string. depth bounds recursion.
|
||||
expr_ref generate_regex_witness(euf::snode* regex, unsigned depth = 0);
|
||||
|
||||
// register all string literals appearing in the constraint store
|
||||
// with the factory to avoid collisions with fresh values.
|
||||
void register_existing_values(seq::nielsen_graph& nielsen);
|
||||
|
||||
// look up or compute the value for an snode variable.
|
||||
// If no assignment exists, delegates to mk_fresh_value.
|
||||
expr* get_var_value(euf::snode* var);
|
||||
|
||||
// generate a fresh value for a variable, respecting regex
|
||||
// membership constraints. If the variable has associated
|
||||
// regex constraints (collected during init), generates a
|
||||
// witness satisfying the intersection; otherwise falls back
|
||||
// to a plain fresh value from the factory.
|
||||
expr* mk_fresh_value(euf::snode* var);
|
||||
|
||||
// collect per-variable regex constraints from the state.
|
||||
// For each positive str_mem, records the regex (or intersects
|
||||
// with existing) into m_var_regex keyed by the string snode id.
|
||||
void collect_var_regex_constraints(nseq_state const& state);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ Module Name:
|
|||
|
||||
Abstract:
|
||||
|
||||
Implementation of nseq_regex.
|
||||
Lazy regex membership processing for the Nielsen-based string solver.
|
||||
|
||||
Author:
|
||||
|
||||
|
|
@ -15,3 +15,394 @@ Author:
|
|||
|
||||
--*/
|
||||
#include "smt/nseq_regex.h"
|
||||
|
||||
namespace smt {
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Regex emptiness checking (structural analysis)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nseq_regex::is_empty_regex(euf::snode* re) const {
|
||||
if (!re)
|
||||
return false;
|
||||
// direct empty language constant
|
||||
if (re->is_fail())
|
||||
return true;
|
||||
// kinds that are never empty
|
||||
if (re->is_star() || re->is_to_re() ||
|
||||
re->is_full_char() || re->is_full_seq())
|
||||
return false;
|
||||
// loop with lo == 0 accepts ε
|
||||
if (re->is_loop() && re->is_nullable())
|
||||
return false;
|
||||
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
expr* e = re->get_expr();
|
||||
if (!e)
|
||||
return false;
|
||||
|
||||
expr* r1, * r2;
|
||||
// union is empty iff both children are empty
|
||||
if (seq.re.is_union(e, r1, r2)) {
|
||||
SASSERT(re->num_args() == 2);
|
||||
return is_empty_regex(re->arg(0)) && is_empty_regex(re->arg(1));
|
||||
}
|
||||
// regex concat is empty if either child is empty
|
||||
if (seq.re.is_concat(e, r1, r2)) {
|
||||
SASSERT(re->num_args() == 2);
|
||||
return is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1));
|
||||
}
|
||||
// intersection is empty if either child is empty
|
||||
if (seq.re.is_intersection(e, r1, r2)) {
|
||||
SASSERT(re->num_args() == 2);
|
||||
if (is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1)))
|
||||
return true;
|
||||
}
|
||||
// complement of full_seq is empty
|
||||
if (re->is_complement() && re->num_args() == 1 && re->arg(0)->is_full_seq())
|
||||
return true;
|
||||
// loop(empty, lo, _) with lo > 0 is empty
|
||||
if (re->is_loop() && re->num_args() >= 1 && is_empty_regex(re->arg(0)))
|
||||
return !re->is_nullable(); // empty if not nullable (i.e., lo > 0)
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Cycle detection
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nseq_regex::detect_cycle(seq::str_mem const& mem) const {
|
||||
return extract_cycle(mem) != nullptr;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Ground prefix consumption
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
nseq_regex::simplify_status nseq_regex::simplify_ground_prefix(seq::str_mem& mem) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
return simplify_status::ok;
|
||||
|
||||
while (mem.m_str && !mem.m_str->is_empty()) {
|
||||
euf::snode* first = mem.m_str->first();
|
||||
if (!first || !first->is_char())
|
||||
break;
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, first);
|
||||
if (!deriv)
|
||||
break;
|
||||
if (deriv->is_fail())
|
||||
return simplify_status::conflict;
|
||||
mem.m_str = m_sg.drop_first(mem.m_str);
|
||||
mem.m_regex = deriv;
|
||||
}
|
||||
|
||||
// check final state
|
||||
if (mem.m_str && mem.m_str->is_empty()) {
|
||||
if (mem.m_regex->is_nullable())
|
||||
return simplify_status::satisfied;
|
||||
return simplify_status::conflict;
|
||||
}
|
||||
|
||||
return simplify_status::ok;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Ground suffix consumption (best-effort)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
nseq_regex::simplify_status nseq_regex::simplify_ground_suffix(seq::str_mem& mem) {
|
||||
// Suffix consumption via reverse derivatives is complex.
|
||||
// For now, only handle the case where the entire string is ground:
|
||||
// consume all characters from the front (which covers trailing chars
|
||||
// when the string is fully ground).
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
return simplify_status::ok;
|
||||
if (!mem.m_str->is_ground())
|
||||
return simplify_status::ok;
|
||||
|
||||
// If the string is ground, simplify_ground_prefix handles everything.
|
||||
return simplify_ground_prefix(mem);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Trivial checks
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
int nseq_regex::check_trivial(seq::str_mem const& mem) const {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
return 0;
|
||||
// regex is ∅ => always conflict
|
||||
if (is_empty_regex(mem.m_regex))
|
||||
return -1;
|
||||
// regex is Σ* => always satisfied
|
||||
if (is_full_regex(mem.m_regex))
|
||||
return 1;
|
||||
// empty string checks
|
||||
if (mem.m_str->is_empty()) {
|
||||
if (mem.m_regex->is_nullable())
|
||||
return 1;
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Minterm computation with filtering
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void nseq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) {
|
||||
if (!regex)
|
||||
return;
|
||||
|
||||
// compute raw minterms from the regex predicates
|
||||
euf::snode_vector raw;
|
||||
m_sg.compute_minterms(regex, raw);
|
||||
|
||||
// filter: keep only minterms that are non-fail (non-empty character class).
|
||||
// note: minterms are regex character-class expressions, not concrete
|
||||
// characters, so we cannot compute Brzozowski derivatives with them.
|
||||
// callers should compute derivatives using concrete or fresh chars.
|
||||
for (euf::snode* mt : raw) {
|
||||
if (!mt || mt->is_fail())
|
||||
continue;
|
||||
minterms.push_back(mt);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Collect first characters
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void nseq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) {
|
||||
if (!re)
|
||||
return;
|
||||
|
||||
// to_re(s): extract first character of the string body
|
||||
if (re->is_to_re()) {
|
||||
euf::snode* body = re->arg(0);
|
||||
if (body && !body->is_empty()) {
|
||||
euf::snode* first = body->first();
|
||||
if (first && first->is_char()) {
|
||||
bool dup = false;
|
||||
for (euf::snode* c : chars)
|
||||
if (c == first) { dup = true; break; }
|
||||
if (!dup)
|
||||
chars.push_back(first);
|
||||
}
|
||||
// Handle string literals (classified as s_other in sgraph)
|
||||
else if (first && first->get_expr()) {
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
zstring s;
|
||||
if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) {
|
||||
euf::snode* ch = m_sg.mk_char(s[0]);
|
||||
bool dup = false;
|
||||
for (euf::snode* c : chars)
|
||||
if (c == ch) { dup = true; break; }
|
||||
if (!dup)
|
||||
chars.push_back(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// leaf cases: produce representative characters for character classes
|
||||
if (re->is_full_char()) {
|
||||
// full character set (.): use 'a' as representative
|
||||
euf::snode* ch = m_sg.mk_char('a');
|
||||
bool dup = false;
|
||||
for (euf::snode* c : chars)
|
||||
if (c == ch) { dup = true; break; }
|
||||
if (!dup)
|
||||
chars.push_back(ch);
|
||||
return;
|
||||
}
|
||||
|
||||
// re.range(lo, hi): use lo as representative
|
||||
if (re->get_expr()) {
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
expr* lo = nullptr, *hi = nullptr;
|
||||
if (seq.re.is_range(re->get_expr(), lo, hi) && lo) {
|
||||
zstring s;
|
||||
unsigned ch_val = 'a';
|
||||
if (seq.is_const_char(lo, ch_val)) {
|
||||
euf::snode* ch = m_sg.mk_char(ch_val);
|
||||
bool dup = false;
|
||||
for (euf::snode* c : chars)
|
||||
if (c == ch) { dup = true; break; }
|
||||
if (!dup)
|
||||
chars.push_back(ch);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (re->is_fail() || re->is_full_seq())
|
||||
return;
|
||||
|
||||
// recurse into children (handles union, concat, star, loop, etc.)
|
||||
for (unsigned i = 0; i < re->num_args(); ++i)
|
||||
collect_first_chars(re->arg(i), chars);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Membership processing
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nseq_regex::process_str_mem(seq::str_mem const& mem,
|
||||
vector<seq::str_mem>& out_mems) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
return true;
|
||||
// empty string: check nullable
|
||||
if (mem.m_str->is_empty())
|
||||
return mem.m_regex->is_nullable();
|
||||
|
||||
// consume ground prefix: derive regex by each leading concrete char
|
||||
seq::str_mem working = mem;
|
||||
simplify_status st = simplify_ground_prefix(working);
|
||||
if (st == simplify_status::conflict)
|
||||
return false;
|
||||
if (st == simplify_status::satisfied)
|
||||
return true;
|
||||
|
||||
// after ground prefix consumption, if the front is still a concrete
|
||||
// character we can take one more step (shouldn't happen after
|
||||
// simplify_ground_prefix, but guard defensively)
|
||||
euf::snode* first = working.m_str->first();
|
||||
if (first && first->is_char()) {
|
||||
seq::str_mem derived = derive(working, first);
|
||||
if (is_empty_regex(derived.m_regex))
|
||||
return false;
|
||||
out_mems.push_back(derived);
|
||||
return true;
|
||||
}
|
||||
|
||||
// string starts with a non-ground element (variable or unit):
|
||||
// return the simplified constraint for the Nielsen graph to expand
|
||||
// via character-split modifiers.
|
||||
out_mems.push_back(working);
|
||||
return true;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// History recording
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
seq::str_mem nseq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) {
|
||||
// Build a history chain by prepending the new regex entry to the
|
||||
// existing history. Uses regex-concat as a cons cell:
|
||||
// new_history = re.concat(history_re, old_history)
|
||||
// where arg(0) is the latest entry and arg(1) is the tail.
|
||||
// If old_history is nullptr, the new entry becomes the terminal leaf.
|
||||
euf::snode* new_history = history_re;
|
||||
if (mem.m_history && history_re) {
|
||||
expr* re_expr = history_re->get_expr();
|
||||
expr* old_expr = mem.m_history->get_expr();
|
||||
if (re_expr && old_expr) {
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
expr_ref chain(seq.re.mk_concat(re_expr, old_expr), m_sg.get_manager());
|
||||
new_history = m_sg.mk(chain);
|
||||
}
|
||||
}
|
||||
return seq::str_mem(mem.m_str, mem.m_regex, new_history, mem.m_id, mem.m_dep);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Cycle detection
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* nseq_regex::extract_cycle(seq::str_mem const& mem) const {
|
||||
// Walk the history chain looking for a repeated regex.
|
||||
// A cycle exists when the current regex matches a regex in the history.
|
||||
if (!mem.m_regex || !mem.m_history)
|
||||
return nullptr;
|
||||
|
||||
euf::snode* current = mem.m_regex;
|
||||
euf::snode* hist = mem.m_history;
|
||||
|
||||
// Walk the history chain up to a bounded depth.
|
||||
// The history is structured as a chain of regex snapshots connected
|
||||
// via the sgraph's regex-concat: each level's arg(0) is a snapshot
|
||||
// and arg(1) is the tail. A leaf (non-concat) is a terminal entry.
|
||||
unsigned bound = 1000;
|
||||
while (hist && bound-- > 0) {
|
||||
euf::snode* entry = hist;
|
||||
euf::snode* tail = nullptr;
|
||||
|
||||
// If the history node is a regex concat, decompose it:
|
||||
// arg(0) is the regex snapshot, arg(1) is the rest of the chain
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
if (hist->is_concat() && hist->get_expr() &&
|
||||
seq.re.is_concat(hist->get_expr())) {
|
||||
entry = hist->arg(0);
|
||||
tail = hist->arg(1);
|
||||
}
|
||||
|
||||
// Check pointer equality (fast, covers normalized regexes)
|
||||
if (entry == current)
|
||||
return entry;
|
||||
// Check expression-level equality as fallback
|
||||
if (entry->get_expr() && current->get_expr() &&
|
||||
entry->get_expr() == current->get_expr())
|
||||
return entry;
|
||||
|
||||
hist = tail;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Stabilizer from cycle
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* nseq_regex::stabilizer_from_cycle(euf::snode* cycle_regex,
|
||||
euf::snode* current_regex) {
|
||||
if (!cycle_regex || !current_regex)
|
||||
return nullptr;
|
||||
|
||||
// The stabilizer is the Kleene star of the "cycle body" regex.
|
||||
// If the cycle regex and current regex are the same (pointer equal),
|
||||
// the stabilizer is cycle_regex* (Kleene star).
|
||||
// This mirrors ZIPT's StabilizerFromCycle which extracts the
|
||||
// regex between the cycle entry and current point and wraps it in *.
|
||||
|
||||
// Build cycle_regex* via the sgraph's expression factory
|
||||
expr* re_expr = cycle_regex->get_expr();
|
||||
if (!re_expr)
|
||||
return nullptr;
|
||||
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
expr_ref star_expr(seq.re.mk_star(re_expr), m_sg.get_manager());
|
||||
return m_sg.mk(star_expr);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Stabilizer-based subsumption
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nseq_regex::try_subsume(seq::str_mem const& mem) {
|
||||
// Check if the derivation history exhibits a cycle, and if so,
|
||||
// whether the current regex is subsumed by the stabilizer.
|
||||
euf::snode* cycle = extract_cycle(mem);
|
||||
if (!cycle)
|
||||
return false;
|
||||
|
||||
euf::snode* stab = stabilizer_from_cycle(cycle, mem.m_regex);
|
||||
if (!stab)
|
||||
return false;
|
||||
|
||||
// A constraint x ∈ R is subsumed when R ⊆ stab.
|
||||
// For the simple case where cycle == current regex,
|
||||
// R ⊆ R* is always true (since R* accepts everything R does, and more).
|
||||
// This handles the common idempotent cycle case.
|
||||
if (cycle == mem.m_regex)
|
||||
return true;
|
||||
|
||||
// More sophisticated subsumption checks (regex containment)
|
||||
// would require a regex inclusion decision procedure.
|
||||
// For now, only handle the pointer-equality case.
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,20 @@ Module Name:
|
|||
|
||||
Abstract:
|
||||
|
||||
Regex membership handling using Brzozowski derivatives.
|
||||
Processes str_mem constraints after character consumption.
|
||||
Lazy regex membership processing for the Nielsen-based string solver.
|
||||
|
||||
Provides Brzozowski derivative computation, ground prefix/suffix
|
||||
consumption, cycle detection in derivation histories, and
|
||||
stabilizer-based subsumption for regex membership constraints.
|
||||
|
||||
Ports the following ZIPT StrMem operations:
|
||||
- SimplifyCharRegex / SimplifyDir (ground prefix/suffix consumption)
|
||||
- ExtractCycle / StabilizerFromCycle (cycle detection and widening)
|
||||
- TrySubsume (stabilizer-based subsumption)
|
||||
|
||||
The class wraps sgraph operations (brzozowski_deriv, compute_minterms,
|
||||
drop_first, etc.) and provides a higher-level interface for
|
||||
nielsen_graph and theory_nseq.
|
||||
|
||||
Author:
|
||||
|
||||
|
|
@ -28,39 +40,146 @@ namespace smt {
|
|||
public:
|
||||
nseq_regex(euf::sgraph& sg) : m_sg(sg) {}
|
||||
|
||||
// check if a regex snode represents the empty language
|
||||
bool is_empty_regex(euf::snode* re) const {
|
||||
return re && re->is_fail();
|
||||
euf::sgraph& sg() { return m_sg; }
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Basic regex predicates
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// check if regex is the empty language (∅ / re.empty).
|
||||
// performs structural analysis beyond is_fail() to detect
|
||||
// derived emptiness (e.g., union of empties, concat with empty).
|
||||
bool is_empty_regex(euf::snode* re) const;
|
||||
|
||||
// check if regex is the full language (Σ* / re.all)
|
||||
bool is_full_regex(euf::snode* re) const {
|
||||
return re && re->is_full_seq();
|
||||
}
|
||||
|
||||
// compute derivative of regex re with respect to char elem and
|
||||
// return a new str_mem for the resulting constraint
|
||||
// check if regex accepts the empty string
|
||||
bool is_nullable(euf::snode* re) const {
|
||||
return re && re->is_nullable();
|
||||
}
|
||||
|
||||
// check if regex is ground (no string variables)
|
||||
bool is_ground(euf::snode* re) const {
|
||||
return re && re->is_ground();
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Derivative computation
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// compute Brzozowski derivative of regex w.r.t. character element.
|
||||
// returns nullptr on failure.
|
||||
euf::snode* derivative(euf::snode* re, euf::snode* elem) {
|
||||
return m_sg.brzozowski_deriv(re, elem);
|
||||
}
|
||||
|
||||
// compute derivative of a str_mem constraint: advance past one character.
|
||||
// the string side is shortened by drop_first and the regex is derived.
|
||||
seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) {
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem);
|
||||
euf::snode* new_str = m_sg.drop_first(mem.m_str);
|
||||
return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep);
|
||||
}
|
||||
|
||||
// process a regex membership constraint after one character has been consumed
|
||||
// returns false if the resulting regex is empty (conflict)
|
||||
bool process_str_mem(seq::str_mem const& mem,
|
||||
vector<seq::str_mem>& out_mems) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
return true;
|
||||
// if regex does not accept the empty string and the string side is empty, conflict
|
||||
if (mem.m_str->is_empty()) {
|
||||
return mem.m_regex->is_nullable();
|
||||
}
|
||||
// compute minterms for the regex
|
||||
euf::snode_vector minterms;
|
||||
m_sg.compute_minterms(mem.m_regex, minterms);
|
||||
for (euf::snode* ch : minterms) {
|
||||
seq::str_mem new_mem = derive(mem, ch);
|
||||
if (!is_empty_regex(new_mem.m_regex))
|
||||
out_mems.push_back(new_mem);
|
||||
}
|
||||
return true;
|
||||
// -----------------------------------------------------------------
|
||||
// Ground prefix/suffix consumption
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
enum class simplify_status { ok, conflict, satisfied };
|
||||
|
||||
// consume ground characters from the front of mem.m_str by computing
|
||||
// Brzozowski derivatives against mem.m_regex.
|
||||
// stops when:
|
||||
// - the string front is not a concrete character (ok)
|
||||
// - a derivative produces ∅ (conflict)
|
||||
// - the string becomes empty and regex is nullable (satisfied)
|
||||
// - the string becomes empty and regex is not nullable (conflict)
|
||||
// modifies mem in-place.
|
||||
simplify_status simplify_ground_prefix(seq::str_mem& mem);
|
||||
|
||||
// consume ground characters from the back of mem.m_str by computing
|
||||
// reverse derivatives. modifies mem in-place.
|
||||
// (reverse derivatives require regex reversal; this is a best-effort
|
||||
// simplification that handles the common case of trailing constants.)
|
||||
simplify_status simplify_ground_suffix(seq::str_mem& mem);
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Trivial checks
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// quick check for trivially sat/unsat membership.
|
||||
// returns 1 if satisfied (empty string in nullable regex, or full regex)
|
||||
// returns -1 if conflicting (empty string in non-nullable, or ∅ regex)
|
||||
// returns 0 if undetermined
|
||||
int check_trivial(seq::str_mem const& mem) const;
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Minterm and character computation
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// compute minterms (character class partition) from regex
|
||||
void compute_minterms(euf::snode* re, euf::snode_vector& minterms) {
|
||||
m_sg.compute_minterms(re, minterms);
|
||||
}
|
||||
|
||||
// compute minterms for character splitting, filtering out empty
|
||||
// (fail) minterms. Minterms are regex character-class expressions
|
||||
// forming a partition of the alphabet; callers use them to drive
|
||||
// fresh-variable creation in character-split modifiers.
|
||||
void get_minterms(euf::snode* regex, euf::snode_vector& minterms);
|
||||
|
||||
// collect concrete first-position characters from a regex.
|
||||
// extracts characters reachable from to_re leaves and simple ranges.
|
||||
void collect_first_chars(euf::snode* re, euf::snode_vector& chars);
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Membership processing
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// process a str_mem constraint by consuming ground characters from
|
||||
// the string front via Brzozowski derivatives. If the entire ground
|
||||
// prefix is consumed and the constraint is neither satisfied nor
|
||||
// conflicting, the (simplified) constraint is pushed to out_mems
|
||||
// for the Nielsen graph to expand via character-split modifiers.
|
||||
// returns false if the constraint is immediately conflicting
|
||||
// (empty string in non-nullable regex, or derivative yields ∅).
|
||||
bool process_str_mem(seq::str_mem const& mem,
|
||||
vector<seq::str_mem>& out_mems);
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Cycle detection and stabilizers
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// record current regex in the derivation history of a str_mem.
|
||||
// the history tracks a chain of (regex, id) pairs for cycle detection.
|
||||
// returns the updated str_mem.
|
||||
seq::str_mem record_history(seq::str_mem const& mem, euf::snode* history_re);
|
||||
|
||||
// check if the derivation history of mem contains a cycle, i.e.,
|
||||
// the same regex id appears twice in the history chain.
|
||||
// if found, returns the cycle entry point regex; nullptr otherwise.
|
||||
euf::snode* extract_cycle(seq::str_mem const& mem) const;
|
||||
|
||||
// check if the derivation history exhibits a cycle.
|
||||
// returns true when the current regex matches a previously seen regex
|
||||
// in the history chain. used to trigger stabilizer introduction.
|
||||
bool detect_cycle(seq::str_mem const& mem) const;
|
||||
|
||||
// compute a Kleene star stabilizer from a cycle.
|
||||
// given the regex at the cycle point and the current regex,
|
||||
// builds r* that over-approximates any number of cycle iterations.
|
||||
// returns nullptr if no stabilizer can be computed.
|
||||
euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex,
|
||||
euf::snode* current_regex);
|
||||
|
||||
// try to subsume a str_mem constraint using stabilizer-based
|
||||
// reasoning: if extract_cycle finds a cycle, check whether
|
||||
// the current regex is already covered by the stabilizer.
|
||||
// returns true if the constraint can be dropped.
|
||||
bool try_subsume(seq::str_mem const& mem);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,15 +21,48 @@ Author:
|
|||
#include "util/vector.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "smt/seq/seq_nielsen.h"
|
||||
#include "smt/smt_literal.h"
|
||||
|
||||
namespace smt {
|
||||
|
||||
class enode;
|
||||
|
||||
// source info for a string equality (the two enodes whose merge caused it)
|
||||
struct eq_source {
|
||||
enode* m_n1;
|
||||
enode* m_n2;
|
||||
};
|
||||
|
||||
// source info for a regex membership (the literal that asserted it)
|
||||
struct mem_source {
|
||||
literal m_lit;
|
||||
};
|
||||
|
||||
// source info for a string disequality
|
||||
struct diseq_source {
|
||||
enode* m_n1;
|
||||
enode* m_n2;
|
||||
};
|
||||
|
||||
// negative regex membership: ¬(str in regex)
|
||||
struct neg_mem_entry {
|
||||
euf::snode* m_str;
|
||||
euf::snode* m_regex;
|
||||
literal m_lit;
|
||||
};
|
||||
|
||||
class nseq_state {
|
||||
euf::sgraph& m_sg;
|
||||
vector<seq::str_eq> m_str_eqs;
|
||||
vector<seq::str_mem> m_str_mems;
|
||||
vector<eq_source> m_eq_sources;
|
||||
vector<mem_source> m_mem_sources;
|
||||
vector<diseq_source> m_diseqs;
|
||||
vector<neg_mem_entry> m_neg_mems;
|
||||
unsigned_vector m_str_eq_lim;
|
||||
unsigned_vector m_str_mem_lim;
|
||||
unsigned_vector m_diseq_lim;
|
||||
unsigned_vector m_neg_mem_lim;
|
||||
unsigned m_next_mem_id = 0;
|
||||
|
||||
public:
|
||||
|
|
@ -38,37 +71,68 @@ namespace smt {
|
|||
void push() {
|
||||
m_str_eq_lim.push_back(m_str_eqs.size());
|
||||
m_str_mem_lim.push_back(m_str_mems.size());
|
||||
m_diseq_lim.push_back(m_diseqs.size());
|
||||
m_neg_mem_lim.push_back(m_neg_mems.size());
|
||||
}
|
||||
|
||||
void pop(unsigned n) {
|
||||
for (unsigned i = 0; i < n; ++i) {
|
||||
m_str_eqs.shrink(m_str_eq_lim.back());
|
||||
m_eq_sources.shrink(m_str_eq_lim.back());
|
||||
m_str_eq_lim.pop_back();
|
||||
m_str_mems.shrink(m_str_mem_lim.back());
|
||||
m_mem_sources.shrink(m_str_mem_lim.back());
|
||||
m_str_mem_lim.pop_back();
|
||||
m_diseqs.shrink(m_diseq_lim.back());
|
||||
m_diseq_lim.pop_back();
|
||||
m_neg_mems.shrink(m_neg_mem_lim.back());
|
||||
m_neg_mem_lim.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
void add_str_eq(euf::snode* lhs, euf::snode* rhs) {
|
||||
void add_str_eq(euf::snode* lhs, euf::snode* rhs, enode* n1, enode* n2) {
|
||||
seq::dep_tracker dep;
|
||||
m_str_eqs.push_back(seq::str_eq(lhs, rhs, dep));
|
||||
m_eq_sources.push_back({n1, n2});
|
||||
}
|
||||
|
||||
void add_str_mem(euf::snode* str, euf::snode* regex) {
|
||||
void add_str_mem(euf::snode* str, euf::snode* regex, literal lit) {
|
||||
seq::dep_tracker dep;
|
||||
m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep));
|
||||
m_mem_sources.push_back({lit});
|
||||
}
|
||||
|
||||
void add_diseq(enode* n1, enode* n2) {
|
||||
m_diseqs.push_back({n1, n2});
|
||||
}
|
||||
|
||||
void add_neg_mem(euf::snode* str, euf::snode* regex, literal lit) {
|
||||
m_neg_mems.push_back({str, regex, lit});
|
||||
}
|
||||
|
||||
vector<seq::str_eq> const& str_eqs() const { return m_str_eqs; }
|
||||
vector<seq::str_mem> const& str_mems() const { return m_str_mems; }
|
||||
vector<diseq_source> const& diseqs() const { return m_diseqs; }
|
||||
vector<neg_mem_entry> const& neg_mems() const { return m_neg_mems; }
|
||||
|
||||
bool empty() const { return m_str_eqs.empty() && m_str_mems.empty(); }
|
||||
eq_source const& get_eq_source(unsigned i) const { return m_eq_sources[i]; }
|
||||
mem_source const& get_mem_source(unsigned i) const { return m_mem_sources[i]; }
|
||||
diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; }
|
||||
neg_mem_entry const& get_neg_mem(unsigned i) const { return m_neg_mems[i]; }
|
||||
|
||||
bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_neg_mems.empty() && m_diseqs.empty(); }
|
||||
|
||||
void reset() {
|
||||
m_str_eqs.reset();
|
||||
m_str_mems.reset();
|
||||
m_eq_sources.reset();
|
||||
m_mem_sources.reset();
|
||||
m_diseqs.reset();
|
||||
m_neg_mems.reset();
|
||||
m_str_eq_lim.reset();
|
||||
m_str_mem_lim.reset();
|
||||
m_diseq_lim.reset();
|
||||
m_neg_mem_lim.reset();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -183,22 +183,27 @@ Abstract:
|
|||
detection during character substitution are not ported.
|
||||
|
||||
Modifier hierarchy (Constraints/Modifier/):
|
||||
- All ~15 Modifier subclasses driving graph expansion are not ported:
|
||||
VarNielsenModifier, ConstNielsenModifier, DirectedNielsenModifier,
|
||||
EqSplitModifier, RegexVarSplitModifier, RegexCharSplitModifier,
|
||||
StarIntrModifier, PowerSplitModifier, GPowerIntrModifier,
|
||||
NumCmpModifier, NumUnwindingModifier, PowerEpsilonModifier,
|
||||
DecomposeModifier, CombinedModifier, DetModifier.
|
||||
- The modifier pattern (each Modifier produces one or more child nodes by
|
||||
applying substitutions + side conditions to the parent node) is not ported.
|
||||
- 13 Modifier subclasses driving graph expansion are ported as
|
||||
apply_* methods in generate_extensions, matching ZIPT's TypeOrder
|
||||
priority: DetModifier(1), PowerEpsilonModifier(2), NumCmpModifier(3),
|
||||
ConstNumUnwindingModifier(4), EqSplitModifier(5), StarIntrModifier(6),
|
||||
GPowerIntrModifier(7), ConstNielsenModifier(8), RegexCharSplitModifier(9),
|
||||
RegexVarSplitModifier(10), PowerSplitModifier(11), VarNielsenModifier(12),
|
||||
VarNumUnwindingModifier(13).
|
||||
- NOT PORTED: DirectedNielsenModifier, DecomposeModifier, CombinedModifier.
|
||||
- NumCmp, ConstNumUnwinding, VarNumUnwinding are approximated (no PDD
|
||||
integer polynomial infrastructure; power tokens are replaced with ε
|
||||
or peeled with fresh variables instead of exact exponent arithmetic).
|
||||
|
||||
Search procedure:
|
||||
- NielsenNode.GraphExpansion(): the recursive search with iterative deepening
|
||||
(depth-bounded DFS with SAT/UNSAT/CYCLIC return codes) is not ported.
|
||||
- NielsenNode.SimplifyAndInit(): the simplification-and-initialization pass
|
||||
run at node creation is not ported.
|
||||
- NielsenGraph.Check(): the top-level entry point with iterative deepening,
|
||||
inner solver setup and subsumption-node lookup is not ported.
|
||||
- NielsenGraph.Check() / NielsenNode.GraphExpansion(): ported as
|
||||
nielsen_graph::solve() (iterative deepening, 6 rounds starting at
|
||||
depth 10, doubling) and search_dfs() (depth-bounded DFS with
|
||||
eval_idx cycle detection and node status tracking). The inner solver
|
||||
setup and subsumption-node lookup within Check() are not ported.
|
||||
- NielsenNode.SimplifyAndInit(): ported as
|
||||
nielsen_node::simplify_and_init() with prefix matching, symbol clash,
|
||||
empty propagation, and Brzozowski derivative consumption.
|
||||
- NielsenGraph.FindExisting(): the subsumption cache lookup over
|
||||
subsumptionCandidates is not ported.
|
||||
|
||||
|
|
@ -231,6 +236,7 @@ Author:
|
|||
#include "util/vector.h"
|
||||
#include "util/uint_set.h"
|
||||
#include "ast/ast.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
|
||||
|
|
@ -281,6 +287,9 @@ namespace seq {
|
|||
bool is_superset(dep_tracker const& other) const;
|
||||
bool empty() const;
|
||||
|
||||
// collect indices of all set bits into 'indices'
|
||||
void get_set_bits(unsigned_vector& indices) const;
|
||||
|
||||
bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; }
|
||||
bool operator!=(dep_tracker const& other) const { return !(*this == other); }
|
||||
};
|
||||
|
|
@ -353,6 +362,24 @@ namespace seq {
|
|||
}
|
||||
};
|
||||
|
||||
// kind of length constraint determines propagation strategy
|
||||
enum class length_kind {
|
||||
nonneg, // len(x) >= 0: unconditional axiom
|
||||
eq, // len(lhs) = len(rhs): conditional on string equality
|
||||
bound // Parikh bound: conditional on regex membership
|
||||
};
|
||||
|
||||
// arithmetic length constraint derived from string equations
|
||||
struct length_constraint {
|
||||
expr_ref m_expr; // arithmetic expression (e.g., len(x) + len(y) = len(a) + 1)
|
||||
dep_tracker m_dep; // tracks which input constraints contributed
|
||||
length_kind m_kind; // determines propagation strategy
|
||||
|
||||
length_constraint(ast_manager& m): m_expr(m), m_kind(length_kind::nonneg) {}
|
||||
length_constraint(expr* e, dep_tracker const& dep, length_kind kind, ast_manager& m):
|
||||
m_expr(e, m), m_dep(dep), m_kind(kind) {}
|
||||
};
|
||||
|
||||
// edge in the Nielsen graph connecting two nodes
|
||||
// mirrors ZIPT's NielsenEdge
|
||||
class nielsen_edge {
|
||||
|
|
@ -469,6 +496,39 @@ namespace seq {
|
|||
|
||||
// true if other's constraint set is a subset of this node's
|
||||
bool is_subsumed_by(nielsen_node const& other) const;
|
||||
|
||||
// true if any constraint has opaque (s_other) terms that
|
||||
// the Nielsen graph cannot decompose
|
||||
bool has_opaque_terms() const;
|
||||
};
|
||||
|
||||
// search statistics collected during Nielsen graph solving
|
||||
struct nielsen_stats {
|
||||
unsigned m_num_solve_calls = 0;
|
||||
unsigned m_num_dfs_nodes = 0;
|
||||
unsigned m_num_sat = 0;
|
||||
unsigned m_num_unsat = 0;
|
||||
unsigned m_num_unknown = 0;
|
||||
unsigned m_num_simplify_conflict = 0;
|
||||
unsigned m_num_subsumptions = 0;
|
||||
unsigned m_num_extensions = 0;
|
||||
unsigned m_num_fresh_vars = 0;
|
||||
unsigned m_max_depth = 0;
|
||||
// modifier application counts
|
||||
unsigned m_mod_det = 0;
|
||||
unsigned m_mod_power_epsilon = 0;
|
||||
unsigned m_mod_num_cmp = 0;
|
||||
unsigned m_mod_const_num_unwinding = 0;
|
||||
unsigned m_mod_eq_split = 0;
|
||||
unsigned m_mod_star_intr = 0;
|
||||
unsigned m_mod_gpower_intr = 0;
|
||||
unsigned m_mod_const_nielsen = 0;
|
||||
unsigned m_mod_regex_char_split = 0;
|
||||
unsigned m_mod_regex_var_split = 0;
|
||||
unsigned m_mod_power_split = 0;
|
||||
unsigned m_mod_var_nielsen = 0;
|
||||
unsigned m_mod_var_num_unwinding = 0;
|
||||
void reset() { memset(this, 0, sizeof(nielsen_stats)); }
|
||||
};
|
||||
|
||||
// the overall Nielsen transformation graph
|
||||
|
|
@ -482,6 +542,10 @@ namespace seq {
|
|||
unsigned m_run_idx = 0;
|
||||
unsigned m_depth_bound = 0;
|
||||
unsigned m_next_mem_id = 0;
|
||||
unsigned m_fresh_cnt = 0;
|
||||
unsigned m_num_input_eqs = 0;
|
||||
unsigned m_num_input_mems = 0;
|
||||
nielsen_stats m_stats;
|
||||
|
||||
public:
|
||||
nielsen_graph(euf::sgraph& sg);
|
||||
|
|
@ -519,6 +583,10 @@ namespace seq {
|
|||
// generate next unique regex membership id
|
||||
unsigned next_mem_id() { return m_next_mem_id++; }
|
||||
|
||||
// number of input constraints (for dep_tracker bit mapping)
|
||||
unsigned num_input_eqs() const { return m_num_input_eqs; }
|
||||
unsigned num_input_mems() const { return m_num_input_mems; }
|
||||
|
||||
// display for debugging
|
||||
std::ostream& display(std::ostream& out) const;
|
||||
|
||||
|
|
@ -541,8 +609,111 @@ namespace seq {
|
|||
// collect dependency information from conflicting constraints
|
||||
void collect_conflict_deps(dep_tracker& deps) const;
|
||||
|
||||
// explain a conflict: partition the set bits into str_eq indices
|
||||
// (bits 0..num_eqs-1) and str_mem indices (bits num_eqs..num_eqs+num_mems-1).
|
||||
// Must be called after solve() returns unsat.
|
||||
void explain_conflict(unsigned_vector& eq_indices, unsigned_vector& mem_indices) const;
|
||||
|
||||
// accumulated search statistics
|
||||
nielsen_stats const& stats() const { return m_stats; }
|
||||
void reset_stats() { m_stats.reset(); }
|
||||
|
||||
// generate arithmetic length constraints from the root node's string
|
||||
// equalities and regex memberships. For each non-trivial equation lhs = rhs,
|
||||
// produces len(lhs) = len(rhs) by expanding concatenations into sums.
|
||||
// For each regex membership str in regex, produces Parikh interval
|
||||
// constraints: len(str) >= min_len and len(str) <= max_len.
|
||||
// Also generates len(x) >= 0 for each variable appearing in the equations.
|
||||
void generate_length_constraints(vector<length_constraint>& constraints);
|
||||
|
||||
private:
|
||||
search_result search_dfs(nielsen_node* node, unsigned depth);
|
||||
|
||||
// create a fresh variable with a unique name
|
||||
euf::snode* mk_fresh_var();
|
||||
|
||||
// deterministic modifier: var = ε, same-head cancel
|
||||
bool apply_det_modifier(nielsen_node* node);
|
||||
|
||||
// const nielsen modifier: char vs var (2 branches per case)
|
||||
bool apply_const_nielsen(nielsen_node* node);
|
||||
|
||||
// variable Nielsen modifier: var vs var, all progress (3 branches)
|
||||
bool apply_var_nielsen(nielsen_node* node);
|
||||
|
||||
// eq split modifier: var vs var (3 branches)
|
||||
bool apply_eq_split(nielsen_node* node);
|
||||
|
||||
// apply regex character split modifier to a node.
|
||||
// for a str_mem constraint x·s ∈ R where x is a variable:
|
||||
// (1) x → c·z for each char c accepted by R at first position
|
||||
// (2) x → ε (x is empty)
|
||||
// returns true if children were generated.
|
||||
bool apply_regex_char_split(nielsen_node* node);
|
||||
|
||||
// power epsilon modifier: for a power token u^n in an equation,
|
||||
// branch: (1) base u = ε, (2) power is empty (n = 0 semantics).
|
||||
// mirrors ZIPT's PowerEpsilonModifier
|
||||
bool apply_power_epsilon(nielsen_node* node);
|
||||
|
||||
// numeric comparison modifier: for equations involving power tokens
|
||||
// u^m and u^n with the same base, branch on m < n vs n <= m.
|
||||
// mirrors ZIPT's NumCmpModifier
|
||||
bool apply_num_cmp(nielsen_node* node);
|
||||
|
||||
// constant numeric unwinding: for a power token u^n vs a constant
|
||||
// (non-variable), branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u).
|
||||
// mirrors ZIPT's ConstNumUnwindingModifier
|
||||
bool apply_const_num_unwinding(nielsen_node* node);
|
||||
|
||||
// star introduction: for a str_mem x·s ∈ R where a cycle is detected
|
||||
// (backedge exists), introduce stabilizer: x ∈ base* with x split.
|
||||
// mirrors ZIPT's StarIntrModifier
|
||||
bool apply_star_intr(nielsen_node* node);
|
||||
|
||||
// generalized power introduction: for a variable x matched against
|
||||
// a ground repeated pattern, introduce x = base^n · prefix(base)
|
||||
// with fresh power variable n and side constraint n >= 0.
|
||||
// mirrors ZIPT's GPowerIntrModifier
|
||||
bool apply_gpower_intr(nielsen_node* node);
|
||||
|
||||
// regex variable split: for str_mem x·s ∈ R where x is a variable,
|
||||
// split using minterms: x → ε, or x → c·x' for each minterm c.
|
||||
// More general than regex_char_split, uses minterm partitioning.
|
||||
// mirrors ZIPT's RegexVarSplitModifier
|
||||
bool apply_regex_var_split(nielsen_node* node);
|
||||
|
||||
// power split: for a variable x facing a power token u^n,
|
||||
// branch: x = u^m · prefix(u) with m < n, or x = u^n · x.
|
||||
// mirrors ZIPT's PowerSplitModifier
|
||||
bool apply_power_split(nielsen_node* node);
|
||||
|
||||
// variable numeric unwinding: for a power token u^n vs a variable,
|
||||
// branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u).
|
||||
// mirrors ZIPT's VarNumUnwindingModifier
|
||||
bool apply_var_num_unwinding(nielsen_node* node);
|
||||
|
||||
// collect concrete first-position characters from a regex snode
|
||||
void collect_first_chars(euf::snode* re, euf::snode_vector& chars);
|
||||
|
||||
// find the first power token in any str_eq at this node
|
||||
euf::snode* find_power_token(nielsen_node* node) const;
|
||||
|
||||
// find a power token facing a constant (char) head
|
||||
bool find_power_vs_const(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;
|
||||
|
||||
// find a power token facing a variable head
|
||||
bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const;
|
||||
|
||||
// build an arithmetic expression representing the length of an snode tree.
|
||||
// concatenations are expanded to sums, chars to 1, empty to 0,
|
||||
// variables to (str.len var_expr).
|
||||
expr_ref compute_length_expr(euf::snode* n);
|
||||
|
||||
// compute Parikh length interval [min_len, max_len] for a regex snode.
|
||||
// uses seq_util::rex min_length/max_length on the underlying expression.
|
||||
// max_len == UINT_MAX means unbounded.
|
||||
void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,10 @@ Author:
|
|||
--*/
|
||||
#include "smt/theory_nseq.h"
|
||||
#include "smt/smt_context.h"
|
||||
#include "smt/smt_justification.h"
|
||||
#include "smt/proto_model/proto_model.h"
|
||||
#include "ast/array_decl_plugin.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "util/statistics.h"
|
||||
|
||||
namespace smt {
|
||||
|
|
@ -26,43 +30,102 @@ namespace smt {
|
|||
m_seq(ctx.get_manager()),
|
||||
m_autil(ctx.get_manager()),
|
||||
m_rewriter(ctx.get_manager()),
|
||||
m_arith_value(ctx.get_manager()),
|
||||
m_egraph(ctx.get_manager()),
|
||||
m_sgraph(ctx.get_manager(), m_egraph),
|
||||
m_nielsen(m_sgraph),
|
||||
m_state(m_sgraph)
|
||||
m_state(m_sgraph),
|
||||
m_regex(m_sgraph),
|
||||
m_model(*this, ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex)
|
||||
{}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Initialization
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void theory_nseq::init() {
|
||||
m_arith_value.init(&get_context());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Internalization
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool theory_nseq::internalize_atom(app* atom, bool /*gate_ctx*/) {
|
||||
context& ctx = get_context();
|
||||
ast_manager& m = get_manager();
|
||||
|
||||
// str.in_re atoms are boolean predicates: register as bool_var
|
||||
// so that assign_eh fires when the SAT solver assigns them.
|
||||
// Following theory_seq: create a bool_var directly without an enode
|
||||
// for the str.in_re predicate (avoids needing to internalize the regex arg).
|
||||
if (m_seq.str.is_in_re(atom)) {
|
||||
expr* str_arg = atom->get_arg(0);
|
||||
mk_var(ensure_enode(str_arg));
|
||||
if (!ctx.b_internalized(atom)) {
|
||||
bool_var bv = ctx.mk_bool_var(atom);
|
||||
ctx.set_var_theory(bv, get_id());
|
||||
ctx.mark_as_relevant(bv);
|
||||
}
|
||||
get_snode(str_arg);
|
||||
return true;
|
||||
}
|
||||
return internalize_term(atom);
|
||||
}
|
||||
|
||||
theory_var theory_nseq::mk_var(enode* n) {
|
||||
expr* o = n->get_expr();
|
||||
if (!m_seq.is_seq(o) && !m_seq.is_re(o) && !m_seq.str.is_nth_u(o))
|
||||
return null_theory_var;
|
||||
if (is_attached_to_var(n))
|
||||
return n->get_th_var(get_id());
|
||||
theory_var v = theory::mk_var(n);
|
||||
get_context().attach_th_var(n, this, v);
|
||||
get_context().mark_as_relevant(n);
|
||||
return v;
|
||||
}
|
||||
|
||||
bool theory_nseq::internalize_term(app* term) {
|
||||
context& ctx = get_context();
|
||||
ast_manager& m = get_manager();
|
||||
|
||||
// ensure children are internalized first
|
||||
for (expr* arg : *term) {
|
||||
if (is_app(arg) && m_seq.is_seq(arg)) {
|
||||
ctx.internalize(arg, false);
|
||||
}
|
||||
// ensure ALL children are internalized (following theory_seq pattern)
|
||||
for (auto arg : *term)
|
||||
mk_var(ensure_enode(arg));
|
||||
|
||||
if (ctx.e_internalized(term)) {
|
||||
mk_var(ctx.get_enode(term));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!ctx.e_internalized(term)) {
|
||||
ctx.mk_enode(term, false, m.is_bool(term), true);
|
||||
if (m.is_bool(term)) {
|
||||
bool_var bv = ctx.mk_bool_var(term);
|
||||
ctx.set_var_theory(bv, get_id());
|
||||
ctx.mark_as_relevant(bv);
|
||||
}
|
||||
|
||||
enode* en = ctx.get_enode(term);
|
||||
if (!is_attached_to_var(en)) {
|
||||
theory_var v = mk_var(en);
|
||||
(void)v;
|
||||
enode* en;
|
||||
if (ctx.e_internalized(term)) {
|
||||
en = ctx.get_enode(term);
|
||||
}
|
||||
else {
|
||||
en = ctx.mk_enode(term, false, m.is_bool(term), true);
|
||||
}
|
||||
mk_var(en);
|
||||
|
||||
// register in our private sgraph
|
||||
get_snode(term);
|
||||
|
||||
// track higher-order terms for lazy unfolding
|
||||
expr* ho_f = nullptr, *ho_s = nullptr, *ho_b = nullptr, *ho_i = nullptr;
|
||||
if (m_seq.str.is_map(term, ho_f, ho_s) ||
|
||||
m_seq.str.is_mapi(term, ho_f, ho_i, ho_s) ||
|
||||
m_seq.str.is_foldl(term, ho_f, ho_b, ho_s) ||
|
||||
m_seq.str.is_foldli(term, ho_f, ho_i, ho_b, ho_s)) {
|
||||
m_ho_terms.push_back(term);
|
||||
ensure_length_var(ho_s);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -73,16 +136,73 @@ namespace smt {
|
|||
void theory_nseq::new_eq_eh(theory_var v1, theory_var v2) {
|
||||
expr* e1 = get_enode(v1)->get_expr();
|
||||
expr* e2 = get_enode(v2)->get_expr();
|
||||
if (m_seq.is_re(e1)) {
|
||||
++m_num_unhandled_bool;
|
||||
return;
|
||||
}
|
||||
if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2))
|
||||
return;
|
||||
euf::snode* s1 = get_snode(e1);
|
||||
euf::snode* s2 = get_snode(e2);
|
||||
if (s1 && s2)
|
||||
m_state.add_str_eq(s1, s2);
|
||||
if (s1 && s2) {
|
||||
unsigned idx = m_state.str_eqs().size();
|
||||
m_state.add_str_eq(s1, s2, get_enode(v1), get_enode(v2));
|
||||
m_prop_queue.push_back({prop_item::eq_prop, idx});
|
||||
}
|
||||
}
|
||||
|
||||
void theory_nseq::new_diseq_eh(theory_var /*v1*/, theory_var /*v2*/) {
|
||||
// not handled in this initial skeleton
|
||||
void theory_nseq::new_diseq_eh(theory_var v1, theory_var v2) {
|
||||
expr* e1 = get_enode(v1)->get_expr();
|
||||
expr* e2 = get_enode(v2)->get_expr();
|
||||
if (m_seq.is_re(e1)) {
|
||||
// regex disequality: nseq cannot verify language non-equivalence
|
||||
++m_num_unhandled_bool;
|
||||
return;
|
||||
}
|
||||
if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2))
|
||||
return;
|
||||
unsigned idx = m_state.diseqs().size();
|
||||
m_state.add_diseq(get_enode(v1), get_enode(v2));
|
||||
m_prop_queue.push_back({prop_item::diseq_prop, idx});
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Boolean assignment notification
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void theory_nseq::assign_eh(bool_var v, bool is_true) {
|
||||
context& ctx = get_context();
|
||||
expr* e = ctx.bool_var2expr(v);
|
||||
expr* s = nullptr;
|
||||
expr* re = nullptr;
|
||||
if (!m_seq.str.is_in_re(e, s, re)) {
|
||||
// Track unhandled boolean string predicates (prefixof, contains, etc.)
|
||||
if (is_app(e) && to_app(e)->get_family_id() == m_seq.get_family_id())
|
||||
++m_num_unhandled_bool;
|
||||
return;
|
||||
}
|
||||
euf::snode* sn_str = get_snode(s);
|
||||
euf::snode* sn_re = get_snode(re);
|
||||
if (!sn_str || !sn_re)
|
||||
return;
|
||||
|
||||
if (is_true) {
|
||||
unsigned idx = m_state.str_mems().size();
|
||||
literal lit(v, false);
|
||||
m_state.add_str_mem(sn_str, sn_re, lit);
|
||||
m_prop_queue.push_back({prop_item::pos_mem_prop, idx});
|
||||
}
|
||||
else {
|
||||
unsigned idx = m_state.neg_mems().size();
|
||||
literal lit(v, true);
|
||||
m_state.add_neg_mem(sn_str, sn_re, lit);
|
||||
m_prop_queue.push_back({prop_item::neg_mem_prop, idx});
|
||||
}
|
||||
|
||||
TRACE(seq, tout << "nseq assign_eh: " << (is_true ? "" : "¬")
|
||||
<< "str.in_re "
|
||||
<< mk_bounded_pp(s, get_manager(), 3) << " in "
|
||||
<< mk_bounded_pp(re, get_manager(), 3) << "\n";);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
@ -93,12 +213,139 @@ namespace smt {
|
|||
theory::push_scope_eh();
|
||||
m_state.push();
|
||||
m_sgraph.push();
|
||||
m_prop_lim.push_back(m_prop_queue.size());
|
||||
m_ho_lim.push_back(m_ho_terms.size());
|
||||
m_unhandled_bool_lim.push_back(m_num_unhandled_bool);
|
||||
}
|
||||
|
||||
void theory_nseq::pop_scope_eh(unsigned num_scopes) {
|
||||
theory::pop_scope_eh(num_scopes);
|
||||
m_state.pop(num_scopes);
|
||||
m_sgraph.pop(num_scopes);
|
||||
unsigned new_sz = m_prop_lim[m_prop_lim.size() - num_scopes];
|
||||
m_prop_queue.shrink(new_sz);
|
||||
m_prop_lim.shrink(m_prop_lim.size() - num_scopes);
|
||||
if (m_prop_qhead > m_prop_queue.size())
|
||||
m_prop_qhead = m_prop_queue.size();
|
||||
unsigned ho_sz = m_ho_lim[m_ho_lim.size() - num_scopes];
|
||||
m_ho_terms.shrink(ho_sz);
|
||||
m_ho_lim.shrink(m_ho_lim.size() - num_scopes);
|
||||
m_num_unhandled_bool = m_unhandled_bool_lim[m_unhandled_bool_lim.size() - num_scopes];
|
||||
m_unhandled_bool_lim.shrink(m_unhandled_bool_lim.size() - num_scopes);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Propagation: eager eq/diseq/literal dispatch
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool theory_nseq::can_propagate() {
|
||||
return m_prop_qhead < m_prop_queue.size();
|
||||
}
|
||||
|
||||
void theory_nseq::propagate() {
|
||||
context& ctx = get_context();
|
||||
while (m_prop_qhead < m_prop_queue.size() && !ctx.inconsistent()) {
|
||||
prop_item const& item = m_prop_queue[m_prop_qhead++];
|
||||
switch (item.m_kind) {
|
||||
case prop_item::eq_prop:
|
||||
propagate_eq(item.m_idx);
|
||||
break;
|
||||
case prop_item::diseq_prop:
|
||||
propagate_diseq(item.m_idx);
|
||||
break;
|
||||
case prop_item::pos_mem_prop:
|
||||
propagate_pos_mem(item.m_idx);
|
||||
break;
|
||||
case prop_item::neg_mem_prop:
|
||||
propagate_neg_mem(item.m_idx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void theory_nseq::propagate_eq(unsigned idx) {
|
||||
// When s1 = s2 is learned, ensure len(s1) and len(s2) are
|
||||
// internalized so congruence closure propagates len(s1) = len(s2).
|
||||
eq_source const& src = m_state.get_eq_source(idx);
|
||||
ensure_length_var(src.m_n1->get_expr());
|
||||
ensure_length_var(src.m_n2->get_expr());
|
||||
}
|
||||
|
||||
void theory_nseq::propagate_diseq(unsigned idx) {
|
||||
// Disequalities are recorded for use during final_check.
|
||||
// No eager propagation beyond recording.
|
||||
TRACE(seq,
|
||||
auto const& d = m_state.get_diseq(idx);
|
||||
tout << "nseq diseq: "
|
||||
<< mk_bounded_pp(d.m_n1->get_expr(), get_manager(), 3)
|
||||
<< " != "
|
||||
<< mk_bounded_pp(d.m_n2->get_expr(), get_manager(), 3) << "\n";);
|
||||
}
|
||||
|
||||
void theory_nseq::propagate_pos_mem(unsigned idx) {
|
||||
auto const& mem = m_state.str_mems()[idx];
|
||||
auto const& src = m_state.get_mem_source(idx);
|
||||
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
return;
|
||||
|
||||
// regex is ∅ → conflict
|
||||
if (m_regex.is_empty_regex(mem.m_regex)) {
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
lits.push_back(src.m_lit);
|
||||
set_conflict(eqs, lits);
|
||||
return;
|
||||
}
|
||||
|
||||
// empty string in non-nullable regex → conflict
|
||||
if (mem.m_str->is_empty() && !mem.m_regex->is_nullable()) {
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
lits.push_back(src.m_lit);
|
||||
set_conflict(eqs, lits);
|
||||
return;
|
||||
}
|
||||
|
||||
// ensure length term exists for the string argument
|
||||
expr* s_expr = mem.m_str->get_expr();
|
||||
if (s_expr)
|
||||
ensure_length_var(s_expr);
|
||||
}
|
||||
|
||||
void theory_nseq::propagate_neg_mem(unsigned idx) {
|
||||
auto const& entry = m_state.get_neg_mem(idx);
|
||||
|
||||
if (!entry.m_str || !entry.m_regex)
|
||||
return;
|
||||
|
||||
// ¬(s in Σ*) is always false → conflict
|
||||
if (m_regex.is_full_regex(entry.m_regex)) {
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
lits.push_back(entry.m_lit);
|
||||
set_conflict(eqs, lits);
|
||||
return;
|
||||
}
|
||||
|
||||
// ¬(ε in R) where R is nullable → conflict
|
||||
if (entry.m_str->is_empty() && entry.m_regex->is_nullable()) {
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
lits.push_back(entry.m_lit);
|
||||
set_conflict(eqs, lits);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void theory_nseq::ensure_length_var(expr* e) {
|
||||
if (!e || !m_seq.is_seq(e))
|
||||
return;
|
||||
context& ctx = get_context();
|
||||
ast_manager& m = get_manager();
|
||||
expr_ref len(m_seq.str.mk_length(e), m);
|
||||
if (!ctx.e_internalized(len))
|
||||
ctx.internalize(len, false);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
@ -107,30 +354,162 @@ namespace smt {
|
|||
|
||||
void theory_nseq::populate_nielsen_graph() {
|
||||
m_nielsen.reset();
|
||||
seq::nielsen_node* root = m_nielsen.mk_node();
|
||||
m_nielsen.set_root(root);
|
||||
for (auto const& eq : m_state.str_eqs())
|
||||
root->add_str_eq(eq);
|
||||
for (auto const& mem : m_state.str_mems())
|
||||
root->add_str_mem(mem);
|
||||
m_nielsen_to_state_mem.reset();
|
||||
|
||||
// transfer string equalities from state to nielsen graph root
|
||||
for (auto const& eq : m_state.str_eqs()) {
|
||||
m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs);
|
||||
}
|
||||
|
||||
// transfer regex memberships, pre-processing through nseq_regex
|
||||
// to consume ground prefixes via Brzozowski derivatives
|
||||
for (unsigned state_idx = 0; state_idx < m_state.str_mems().size(); ++state_idx) {
|
||||
auto const& mem = m_state.str_mems()[state_idx];
|
||||
int triv = m_regex.check_trivial(mem);
|
||||
if (triv > 0)
|
||||
continue; // trivially satisfied, skip
|
||||
if (triv < 0) {
|
||||
// trivially unsat: add anyway so solve() detects conflict
|
||||
m_nielsen.add_str_mem(mem.m_str, mem.m_regex);
|
||||
m_nielsen_to_state_mem.push_back(state_idx);
|
||||
continue;
|
||||
}
|
||||
// pre-process: consume ground prefix characters
|
||||
vector<seq::str_mem> processed;
|
||||
if (!m_regex.process_str_mem(mem, processed)) {
|
||||
// conflict during ground prefix consumption
|
||||
m_nielsen.add_str_mem(mem.m_str, mem.m_regex);
|
||||
m_nielsen_to_state_mem.push_back(state_idx);
|
||||
continue;
|
||||
}
|
||||
for (auto const& pm : processed) {
|
||||
m_nielsen.add_str_mem(pm.m_str, pm.m_regex);
|
||||
m_nielsen_to_state_mem.push_back(state_idx);
|
||||
}
|
||||
}
|
||||
|
||||
TRACE(seq, tout << "nseq populate: " << m_state.str_eqs().size() << " eqs, "
|
||||
<< m_state.str_mems().size() << " mems -> nielsen root with "
|
||||
<< m_nielsen.num_input_eqs() << " eqs, "
|
||||
<< m_nielsen.num_input_mems() << " mems\n";);
|
||||
}
|
||||
|
||||
final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) {
|
||||
// Always assert non-negativity for all string theory vars,
|
||||
// even when there are no string equations/memberships.
|
||||
if (assert_nonneg_for_all_vars())
|
||||
return FC_CONTINUE;
|
||||
|
||||
// If there are unhandled boolean string predicates (prefixof, contains, etc.)
|
||||
// we cannot declare sat — return unknown.
|
||||
if (has_unhandled_preds())
|
||||
return FC_GIVEUP;
|
||||
|
||||
if (m_state.empty() && m_ho_terms.empty())
|
||||
return FC_DONE;
|
||||
|
||||
// unfold higher-order terms when sequence structure is known
|
||||
if (unfold_ho_terms())
|
||||
return FC_CONTINUE;
|
||||
|
||||
if (m_state.empty())
|
||||
return FC_DONE;
|
||||
// For now, give up if there are string constraints.
|
||||
// The full search will be wired in once the Nielsen algorithms are complete.
|
||||
|
||||
populate_nielsen_graph();
|
||||
++m_num_nodes_explored;
|
||||
|
||||
// assert length constraints derived from string equalities
|
||||
if (assert_length_constraints())
|
||||
return FC_CONTINUE;
|
||||
|
||||
++m_num_final_checks;
|
||||
|
||||
auto result = m_nielsen.solve();
|
||||
|
||||
if (result == seq::nielsen_graph::search_result::sat) {
|
||||
// Nielsen found a consistent assignment for positive constraints.
|
||||
// If there are negative memberships or disequalities we haven't verified,
|
||||
// we cannot soundly declare sat.
|
||||
if (!m_state.neg_mems().empty() || !m_state.diseqs().empty())
|
||||
return FC_GIVEUP;
|
||||
return FC_DONE;
|
||||
}
|
||||
|
||||
if (result == seq::nielsen_graph::search_result::unsat) {
|
||||
explain_nielsen_conflict();
|
||||
return FC_CONTINUE;
|
||||
}
|
||||
|
||||
return FC_GIVEUP;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Conflict explanation
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void theory_nseq::deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits) {
|
||||
context& ctx = get_context();
|
||||
unsigned_vector bits;
|
||||
deps.get_set_bits(bits);
|
||||
unsigned num_input_eqs = m_nielsen.num_input_eqs();
|
||||
for (unsigned b : bits) {
|
||||
if (b < num_input_eqs) {
|
||||
eq_source const& src = m_state.get_eq_source(b);
|
||||
if (src.m_n1->get_root() == src.m_n2->get_root())
|
||||
eqs.push_back({src.m_n1, src.m_n2});
|
||||
}
|
||||
else {
|
||||
unsigned mem_idx = b - num_input_eqs;
|
||||
if (mem_idx < m_nielsen_to_state_mem.size()) {
|
||||
unsigned state_mem_idx = m_nielsen_to_state_mem[mem_idx];
|
||||
mem_source const& src = m_state.get_mem_source(state_mem_idx);
|
||||
if (ctx.get_assignment(src.m_lit) == l_true)
|
||||
lits.push_back(src.m_lit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void theory_nseq::add_conflict_clause(seq::dep_tracker const& deps) {
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
deps_to_lits(deps, eqs, lits);
|
||||
++m_num_conflicts;
|
||||
set_conflict(eqs, lits);
|
||||
}
|
||||
|
||||
void theory_nseq::explain_nielsen_conflict() {
|
||||
seq::dep_tracker deps;
|
||||
m_nielsen.collect_conflict_deps(deps);
|
||||
add_conflict_clause(deps);
|
||||
}
|
||||
|
||||
void theory_nseq::set_conflict(enode_pair_vector const& eqs, literal_vector const& lits) {
|
||||
context& ctx = get_context();
|
||||
TRACE(seq, tout << "nseq conflict: " << eqs.size() << " eqs, " << lits.size() << " lits\n";);
|
||||
ctx.set_conflict(
|
||||
ctx.mk_justification(
|
||||
ext_theory_conflict_justification(
|
||||
get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data(), 0, nullptr)));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Model generation
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void theory_nseq::init_model(model_generator& /*mg*/) {
|
||||
// stub – no model assignment for now
|
||||
void theory_nseq::init_model(model_generator& mg) {
|
||||
m_model.init(mg, m_nielsen, m_state);
|
||||
}
|
||||
|
||||
model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) {
|
||||
return m_model.mk_value(n, mg);
|
||||
}
|
||||
|
||||
void theory_nseq::finalize_model(model_generator& mg) {
|
||||
m_model.finalize(mg);
|
||||
}
|
||||
|
||||
void theory_nseq::validate_model(proto_model& mdl) {
|
||||
m_model.validate_regex(m_state, mdl);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
@ -139,14 +518,47 @@ namespace smt {
|
|||
|
||||
void theory_nseq::collect_statistics(::statistics& st) const {
|
||||
st.update("nseq conflicts", m_num_conflicts);
|
||||
st.update("nseq nodes explored", m_num_nodes_explored);
|
||||
st.update("nseq depth increases", m_num_depth_increases);
|
||||
st.update("nseq final checks", m_num_final_checks);
|
||||
st.update("nseq length axioms", m_num_length_axioms);
|
||||
|
||||
// Nielsen graph search metrics
|
||||
auto const& ns = m_nielsen.stats();
|
||||
st.update("nseq solve calls", ns.m_num_solve_calls);
|
||||
st.update("nseq dfs nodes", ns.m_num_dfs_nodes);
|
||||
st.update("nseq sat", ns.m_num_sat);
|
||||
st.update("nseq unsat", ns.m_num_unsat);
|
||||
st.update("nseq unknown", ns.m_num_unknown);
|
||||
st.update("nseq simplify clash", ns.m_num_simplify_conflict);
|
||||
st.update("nseq subsumptions", ns.m_num_subsumptions);
|
||||
st.update("nseq extensions", ns.m_num_extensions);
|
||||
st.update("nseq fresh vars", ns.m_num_fresh_vars);
|
||||
st.update("nseq max depth", ns.m_max_depth);
|
||||
|
||||
// modifier breakdown
|
||||
st.update("nseq mod det", ns.m_mod_det);
|
||||
st.update("nseq mod power epsilon", ns.m_mod_power_epsilon);
|
||||
st.update("nseq mod num cmp", ns.m_mod_num_cmp);
|
||||
st.update("nseq mod const num unwind", ns.m_mod_const_num_unwinding);
|
||||
st.update("nseq mod eq split", ns.m_mod_eq_split);
|
||||
st.update("nseq mod star intr", ns.m_mod_star_intr);
|
||||
st.update("nseq mod gpower intr", ns.m_mod_gpower_intr);
|
||||
st.update("nseq mod const nielsen", ns.m_mod_const_nielsen);
|
||||
st.update("nseq mod regex char", ns.m_mod_regex_char_split);
|
||||
st.update("nseq mod regex var", ns.m_mod_regex_var_split);
|
||||
st.update("nseq mod power split", ns.m_mod_power_split);
|
||||
st.update("nseq mod var nielsen", ns.m_mod_var_nielsen);
|
||||
st.update("nseq mod var num unwind", ns.m_mod_var_num_unwinding);
|
||||
st.update("nseq ho unfolds", m_num_ho_unfolds);
|
||||
}
|
||||
|
||||
void theory_nseq::display(std::ostream& out) const {
|
||||
out << "theory_nseq\n";
|
||||
out << " str_eqs: " << m_state.str_eqs().size() << "\n";
|
||||
out << " str_mems: " << m_state.str_mems().size() << "\n";
|
||||
out << " str_eqs: " << m_state.str_eqs().size() << "\n";
|
||||
out << " str_mems: " << m_state.str_mems().size() << "\n";
|
||||
out << " diseqs: " << m_state.diseqs().size() << "\n";
|
||||
out << " neg_mems: " << m_state.neg_mems().size() << "\n";
|
||||
out << " prop_queue: " << m_prop_qhead << "/" << m_prop_queue.size() << "\n";
|
||||
out << " ho_terms: " << m_ho_terms.size() << "\n";
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
@ -157,6 +569,129 @@ namespace smt {
|
|||
return alloc(theory_nseq, *ctx);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Higher-order term unfolding (seq.map, seq.foldl, etc.)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool theory_nseq::unfold_ho_terms() {
|
||||
if (m_ho_terms.empty())
|
||||
return false;
|
||||
|
||||
context& ctx = get_context();
|
||||
ast_manager& m = get_manager();
|
||||
bool progress = false;
|
||||
|
||||
unsigned sz = m_ho_terms.size();
|
||||
for (unsigned i = 0; i < sz; ++i) {
|
||||
app* term = m_ho_terms[i];
|
||||
expr* f = nullptr, *s = nullptr, *b = nullptr, *idx = nullptr;
|
||||
|
||||
if (!m_seq.str.is_map(term, f, s) &&
|
||||
!m_seq.str.is_mapi(term, f, idx, s) &&
|
||||
!m_seq.str.is_foldl(term, f, b, s) &&
|
||||
!m_seq.str.is_foldli(term, f, idx, b, s))
|
||||
continue;
|
||||
|
||||
if (!ctx.e_internalized(s))
|
||||
continue;
|
||||
|
||||
// Find a structural representative in s's equivalence class
|
||||
enode* s_root = ctx.get_enode(s)->get_root();
|
||||
expr* repr = nullptr;
|
||||
enode* curr = s_root;
|
||||
do {
|
||||
expr* e = curr->get_expr();
|
||||
expr *a1, *a2;
|
||||
if (m_seq.str.is_empty(e) ||
|
||||
m_seq.str.is_unit(e, a1) ||
|
||||
m_seq.str.is_concat(e, a1, a2)) {
|
||||
repr = e;
|
||||
break;
|
||||
}
|
||||
curr = curr->get_next();
|
||||
} while (curr != s_root);
|
||||
|
||||
if (!repr)
|
||||
continue;
|
||||
|
||||
// Build ho_term with structural seq arg, then rewrite
|
||||
expr_ref ho_repr(m);
|
||||
if (m_seq.str.is_map(term))
|
||||
ho_repr = m_seq.str.mk_map(f, repr);
|
||||
else if (m_seq.str.is_mapi(term))
|
||||
ho_repr = m_seq.str.mk_mapi(f, idx, repr);
|
||||
else if (m_seq.str.is_foldl(term))
|
||||
ho_repr = m_seq.str.mk_foldl(f, b, repr);
|
||||
else
|
||||
ho_repr = m_seq.str.mk_foldli(f, idx, b, repr);
|
||||
|
||||
expr_ref rewritten(m);
|
||||
br_status st = m_rewriter.mk_app_core(
|
||||
to_app(ho_repr)->get_decl(),
|
||||
to_app(ho_repr)->get_num_args(),
|
||||
to_app(ho_repr)->get_args(),
|
||||
rewritten);
|
||||
|
||||
if (st == BR_FAILED)
|
||||
continue;
|
||||
|
||||
// Internalize both the structural ho_term and its rewrite
|
||||
if (!ctx.e_internalized(ho_repr))
|
||||
ctx.internalize(ho_repr, false);
|
||||
if (!ctx.e_internalized(rewritten))
|
||||
ctx.internalize(rewritten, false);
|
||||
|
||||
enode* ho_en = ctx.get_enode(ho_repr);
|
||||
enode* res_en = ctx.get_enode(rewritten);
|
||||
|
||||
if (ho_en->get_root() == res_en->get_root())
|
||||
continue;
|
||||
|
||||
// Assert tautological axiom: ho_repr = rewritten
|
||||
// Congruence closure merges map(f,s) with map(f,repr)
|
||||
// since s = repr in the E-graph.
|
||||
expr_ref eq_expr(m.mk_eq(ho_repr, rewritten), m);
|
||||
if (!ctx.b_internalized(eq_expr))
|
||||
ctx.internalize(eq_expr, true);
|
||||
literal eq_lit = ctx.get_literal(eq_expr);
|
||||
if (ctx.get_assignment(eq_lit) != l_true) {
|
||||
ctx.mk_th_axiom(get_id(), 1, &eq_lit);
|
||||
TRACE(seq, tout << "nseq ho unfold: "
|
||||
<< mk_bounded_pp(ho_repr, m, 3) << " = "
|
||||
<< mk_bounded_pp(rewritten, m, 3) << "\n";);
|
||||
++m_num_ho_unfolds;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
// For map/mapi: propagate length preservation
|
||||
for (unsigned i = 0; i < sz; ++i) {
|
||||
app* term = m_ho_terms[i];
|
||||
expr* f = nullptr, *s = nullptr, *idx = nullptr;
|
||||
bool is_map = m_seq.str.is_map(term, f, s);
|
||||
bool is_mapi = !is_map && m_seq.str.is_mapi(term, f, idx, s);
|
||||
if (!is_map && !is_mapi)
|
||||
continue;
|
||||
if (!m_seq.is_seq(term))
|
||||
continue;
|
||||
|
||||
// len(map(f, s)) = len(s)
|
||||
expr_ref len_map(m_seq.str.mk_length(term), m);
|
||||
expr_ref len_s(m_seq.str.mk_length(s), m);
|
||||
expr_ref len_eq(m.mk_eq(len_map, len_s), m);
|
||||
if (!ctx.b_internalized(len_eq))
|
||||
ctx.internalize(len_eq, true);
|
||||
literal len_lit = ctx.get_literal(len_eq);
|
||||
if (ctx.get_assignment(len_lit) != l_true) {
|
||||
ctx.mk_th_axiom(get_id(), 1, &len_lit);
|
||||
++m_num_length_axioms;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Helpers
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
@ -168,4 +703,136 @@ namespace smt {
|
|||
return s;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Arithmetic value queries
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool theory_nseq::get_num_value(expr* e, rational& val) const {
|
||||
return m_arith_value.get_value_equiv(e, val) && val.is_int();
|
||||
}
|
||||
|
||||
bool theory_nseq::lower_bound(expr* e, rational& lo) const {
|
||||
bool is_strict = true;
|
||||
return m_arith_value.get_lo(e, lo, is_strict) && !is_strict && lo.is_int();
|
||||
}
|
||||
|
||||
bool theory_nseq::upper_bound(expr* e, rational& hi) const {
|
||||
bool is_strict = true;
|
||||
return m_arith_value.get_up(e, hi, is_strict) && !is_strict && hi.is_int();
|
||||
}
|
||||
|
||||
bool theory_nseq::get_length(expr* e, rational& val) {
|
||||
ast_manager& m = get_manager();
|
||||
rational val1;
|
||||
expr* e1 = nullptr;
|
||||
expr* e2 = nullptr;
|
||||
ptr_vector<expr> todo;
|
||||
todo.push_back(e);
|
||||
val.reset();
|
||||
zstring s;
|
||||
while (!todo.empty()) {
|
||||
expr* c = todo.back();
|
||||
todo.pop_back();
|
||||
if (m_seq.str.is_concat(c, e1, e2)) {
|
||||
todo.push_back(e1);
|
||||
todo.push_back(e2);
|
||||
}
|
||||
else if (m_seq.str.is_unit(c))
|
||||
val += rational(1);
|
||||
else if (m_seq.str.is_empty(c))
|
||||
continue;
|
||||
else if (m_seq.str.is_string(c, s))
|
||||
val += rational(s.length());
|
||||
else {
|
||||
expr_ref len(m_seq.str.mk_length(c), m);
|
||||
if (m_arith_value.get_value(len, val1) && !val1.is_neg())
|
||||
val += val1;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return val.is_int();
|
||||
}
|
||||
|
||||
void theory_nseq::add_length_axiom(literal lit) {
|
||||
context& ctx = get_context();
|
||||
ctx.mark_as_relevant(lit);
|
||||
ctx.mk_th_axiom(get_id(), 1, &lit);
|
||||
++m_num_length_axioms;
|
||||
}
|
||||
|
||||
bool theory_nseq::propagate_length_lemma(literal lit, seq::length_constraint const& lc) {
|
||||
context& ctx = get_context();
|
||||
ast_manager& m = get_manager();
|
||||
|
||||
// unconditional constraints: assert as theory axiom
|
||||
if (lc.m_kind == seq::length_kind::nonneg) {
|
||||
add_length_axiom(lit);
|
||||
return true;
|
||||
}
|
||||
|
||||
// conditional constraints: propagate with justification from dep_tracker
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
deps_to_lits(lc.m_dep, eqs, lits);
|
||||
|
||||
ctx.mark_as_relevant(lit);
|
||||
justification* js = ctx.mk_justification(
|
||||
ext_theory_propagation_justification(
|
||||
get_id(), ctx,
|
||||
lits.size(), lits.data(),
|
||||
eqs.size(), eqs.data(),
|
||||
lit));
|
||||
ctx.assign(lit, js);
|
||||
|
||||
TRACE(seq, tout << "nseq length propagation: " << mk_pp(lc.m_expr, m)
|
||||
<< " (" << eqs.size() << " eqs, " << lits.size() << " lits)\n";);
|
||||
++m_num_length_axioms;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool theory_nseq::assert_nonneg_for_all_vars() {
|
||||
ast_manager& m = get_manager();
|
||||
context& ctx = get_context();
|
||||
arith_util arith(m);
|
||||
bool new_axiom = false;
|
||||
unsigned nv = get_num_vars();
|
||||
for (unsigned v = 0; v < nv; ++v) {
|
||||
expr* e = get_enode(v)->get_expr();
|
||||
if (!m_seq.is_seq(e))
|
||||
continue;
|
||||
expr_ref len_var(m_seq.str.mk_length(e), m);
|
||||
expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m);
|
||||
if (!ctx.b_internalized(ge_zero))
|
||||
ctx.internalize(ge_zero, true);
|
||||
literal lit = ctx.get_literal(ge_zero);
|
||||
if (ctx.get_assignment(lit) != l_true) {
|
||||
add_length_axiom(lit);
|
||||
new_axiom = true;
|
||||
}
|
||||
}
|
||||
return new_axiom;
|
||||
}
|
||||
|
||||
bool theory_nseq::assert_length_constraints() {
|
||||
ast_manager& m = get_manager();
|
||||
context& ctx = get_context();
|
||||
vector<seq::length_constraint> constraints;
|
||||
m_nielsen.generate_length_constraints(constraints);
|
||||
|
||||
bool new_axiom = false;
|
||||
for (auto const& lc : constraints) {
|
||||
expr* e = lc.m_expr;
|
||||
if (!ctx.b_internalized(e))
|
||||
ctx.internalize(e, true);
|
||||
literal lit = ctx.get_literal(e);
|
||||
if (ctx.get_assignment(lit) != l_true) {
|
||||
TRACE(seq, tout << "nseq length lemma: " << mk_pp(e, m) << "\n";);
|
||||
propagate_length_lemma(lit, lc);
|
||||
new_axiom = true;
|
||||
}
|
||||
}
|
||||
return new_axiom;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,42 +35,99 @@ namespace smt {
|
|||
seq_util m_seq;
|
||||
arith_util m_autil;
|
||||
seq_rewriter m_rewriter;
|
||||
arith_value m_arith_value;
|
||||
euf::egraph m_egraph; // private egraph (not shared with smt context)
|
||||
euf::sgraph m_sgraph; // private sgraph
|
||||
seq::nielsen_graph m_nielsen;
|
||||
nseq_state m_state;
|
||||
nseq_regex m_regex; // regex membership pre-processing
|
||||
nseq_model m_model; // model construction helper
|
||||
|
||||
// propagation queue
|
||||
struct prop_item {
|
||||
enum kind_t { eq_prop, diseq_prop, pos_mem_prop, neg_mem_prop } m_kind;
|
||||
unsigned m_idx;
|
||||
};
|
||||
svector<prop_item> m_prop_queue;
|
||||
unsigned m_prop_qhead = 0;
|
||||
unsigned_vector m_prop_lim; // saved queue sizes for push/pop
|
||||
|
||||
// statistics
|
||||
unsigned m_num_conflicts = 0;
|
||||
unsigned m_num_nodes_explored = 0;
|
||||
unsigned m_num_depth_increases = 0;
|
||||
unsigned m_num_final_checks = 0;
|
||||
unsigned m_num_length_axioms = 0;
|
||||
|
||||
// map from context enode to private sgraph snode
|
||||
obj_map<expr, euf::snode*> m_expr2snode;
|
||||
|
||||
// mapping from nielsen mem index to state mem index
|
||||
// (populated during populate_nielsen_graph, used in deps_to_lits)
|
||||
unsigned_vector m_nielsen_to_state_mem;
|
||||
|
||||
// higher-order terms (seq.map, seq.mapi, seq.foldl, seq.foldli)
|
||||
ptr_vector<app> m_ho_terms;
|
||||
unsigned_vector m_ho_lim; // push/pop limits for m_ho_terms
|
||||
unsigned m_num_ho_unfolds = 0;
|
||||
|
||||
// unhandled boolean string predicates (prefixof, suffixof, contains, etc.)
|
||||
unsigned m_num_unhandled_bool = 0;
|
||||
unsigned_vector m_unhandled_bool_lim;
|
||||
|
||||
bool has_unhandled_preds() const { return m_num_unhandled_bool > 0; }
|
||||
|
||||
// required virtual methods
|
||||
bool internalize_atom(app* a, bool gate_ctx) override;
|
||||
bool internalize_term(app* term) override;
|
||||
theory_var mk_var(enode* n) override;
|
||||
void new_eq_eh(theory_var v1, theory_var v2) override;
|
||||
void new_diseq_eh(theory_var v1, theory_var v2) override;
|
||||
theory* mk_fresh(context* ctx) override;
|
||||
void display(std::ostream& out) const override;
|
||||
|
||||
// optional overrides
|
||||
bool can_propagate() override { return false; }
|
||||
void propagate() override {}
|
||||
bool can_propagate() override;
|
||||
void propagate() override;
|
||||
void init() override;
|
||||
void assign_eh(bool_var v, bool is_true) override;
|
||||
final_check_status final_check_eh(unsigned) override;
|
||||
void push_scope_eh() override;
|
||||
void pop_scope_eh(unsigned num_scopes) override;
|
||||
void init_model(model_generator& mg) override;
|
||||
model_value_proc* mk_value(enode* n, model_generator& mg) override;
|
||||
void finalize_model(model_generator& mg) override;
|
||||
void validate_model(proto_model& mdl) override;
|
||||
void collect_statistics(::statistics& st) const override;
|
||||
|
||||
char const* get_name() const override { return "nseq"; }
|
||||
|
||||
// private helpers
|
||||
void populate_nielsen_graph();
|
||||
void explain_nielsen_conflict();
|
||||
void deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits);
|
||||
void add_conflict_clause(seq::dep_tracker const& deps);
|
||||
void set_conflict(enode_pair_vector const& eqs, literal_vector const& lits);
|
||||
euf::snode* get_snode(expr* e);
|
||||
|
||||
// propagation dispatch helpers
|
||||
void propagate_eq(unsigned idx);
|
||||
void propagate_diseq(unsigned idx);
|
||||
void propagate_pos_mem(unsigned idx);
|
||||
void propagate_neg_mem(unsigned idx);
|
||||
void ensure_length_var(expr* e);
|
||||
|
||||
// higher-order term unfolding
|
||||
bool unfold_ho_terms();
|
||||
|
||||
// arithmetic value queries for length reasoning
|
||||
bool get_num_value(expr* e, rational& val) const;
|
||||
bool lower_bound(expr* e, rational& lo) const;
|
||||
bool upper_bound(expr* e, rational& hi) const;
|
||||
bool get_length(expr* e, rational& val);
|
||||
void add_length_axiom(literal lit);
|
||||
bool propagate_length_lemma(literal lit, seq::length_constraint const& lc);
|
||||
bool assert_nonneg_for_all_vars();
|
||||
bool assert_length_constraints();
|
||||
|
||||
public:
|
||||
theory_nseq(context& ctx);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -133,6 +133,7 @@ add_executable(test-z3
|
|||
sls_seq_plugin.cpp
|
||||
seq_nielsen.cpp
|
||||
nseq_basic.cpp
|
||||
nseq_regex.cpp
|
||||
small_object_allocator.cpp
|
||||
smt2print_parse.cpp
|
||||
smt_context.cpp
|
||||
|
|
|
|||
|
|
@ -288,6 +288,7 @@ int main(int argc, char ** argv) {
|
|||
TST(sls_seq_plugin);
|
||||
TST(seq_nielsen);
|
||||
TST(nseq_basic);
|
||||
TST(nseq_regex);
|
||||
TST(ho_matcher);
|
||||
TST(finite_set);
|
||||
TST(finite_set_rewriter);
|
||||
|
|
|
|||
|
|
@ -100,10 +100,119 @@ static void test_nseq_node_satisfied() {
|
|||
std::cout << " ok\n";
|
||||
}
|
||||
|
||||
// Test 5: symbol clash conflict ("a" = "b" is unsat)
|
||||
static void test_nseq_symbol_clash() {
|
||||
std::cout << "test_nseq_symbol_clash\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::nielsen_graph ng(sg);
|
||||
|
||||
euf::snode* a = sg.mk_char('a');
|
||||
euf::snode* b = sg.mk_char('b');
|
||||
ng.add_str_eq(a, b);
|
||||
|
||||
auto r = ng.solve();
|
||||
SASSERT(r == seq::nielsen_graph::search_result::unsat);
|
||||
|
||||
// verify conflict explanation returns the equality index
|
||||
unsigned_vector eq_idx, mem_idx;
|
||||
ng.explain_conflict(eq_idx, mem_idx);
|
||||
SASSERT(eq_idx.size() == 1);
|
||||
SASSERT(eq_idx[0] == 0);
|
||||
SASSERT(mem_idx.empty());
|
||||
std::cout << " ok: symbol clash detected as unsat\n";
|
||||
}
|
||||
|
||||
// Test 6: variable equality x = x is sat
|
||||
static void test_nseq_var_eq_self() {
|
||||
std::cout << "test_nseq_var_eq_self\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::nielsen_graph ng(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
ng.add_str_eq(x, x);
|
||||
|
||||
auto r = ng.solve();
|
||||
SASSERT(r == seq::nielsen_graph::search_result::sat);
|
||||
std::cout << " ok: x = x solved as sat\n";
|
||||
}
|
||||
|
||||
// Test 7: x·a = x·b is unsat (prefix match then clash)
|
||||
static void test_nseq_prefix_clash() {
|
||||
std::cout << "test_nseq_prefix_clash\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::nielsen_graph ng(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* a = sg.mk_char('a');
|
||||
euf::snode* b = sg.mk_char('b');
|
||||
euf::snode* xa = sg.mk_concat(x, a);
|
||||
euf::snode* xb = sg.mk_concat(x, b);
|
||||
|
||||
ng.add_str_eq(xa, xb);
|
||||
auto r = ng.solve();
|
||||
SASSERT(r == seq::nielsen_graph::search_result::unsat);
|
||||
std::cout << " ok: x·a = x·b detected as unsat\n";
|
||||
}
|
||||
|
||||
// Test 8: a·x = a·y has solutions (not unsat)
|
||||
static void test_nseq_const_nielsen_solvable() {
|
||||
std::cout << "test_nseq_const_nielsen_solvable\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::nielsen_graph ng(sg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* a = sg.mk_char('a');
|
||||
euf::snode* ax = sg.mk_concat(a, x);
|
||||
euf::snode* ay = sg.mk_concat(a, y);
|
||||
|
||||
ng.add_str_eq(ax, ay);
|
||||
auto r = ng.solve();
|
||||
// a·x = a·y simplifies to x = y which is satisfiable (x = y = ε)
|
||||
SASSERT(r == seq::nielsen_graph::search_result::sat);
|
||||
std::cout << " ok: a·x = a·y solved as sat\n";
|
||||
}
|
||||
|
||||
// Test 9: length mismatch - "ab" = "a" is unsat
|
||||
static void test_nseq_length_mismatch() {
|
||||
std::cout << "test_nseq_length_mismatch\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq::nielsen_graph ng(sg);
|
||||
|
||||
euf::snode* a = sg.mk_char('a');
|
||||
euf::snode* b = sg.mk_char('b');
|
||||
euf::snode* ab = sg.mk_concat(a, b);
|
||||
|
||||
ng.add_str_eq(ab, a);
|
||||
auto r = ng.solve();
|
||||
SASSERT(r == seq::nielsen_graph::search_result::unsat);
|
||||
std::cout << " ok: ab = a detected as unsat\n";
|
||||
}
|
||||
|
||||
void tst_nseq_basic() {
|
||||
test_nseq_instantiation();
|
||||
test_nseq_param_validation();
|
||||
test_nseq_simplification();
|
||||
test_nseq_node_satisfied();
|
||||
test_nseq_symbol_clash();
|
||||
test_nseq_var_eq_self();
|
||||
test_nseq_prefix_clash();
|
||||
test_nseq_const_nielsen_solvable();
|
||||
test_nseq_length_mismatch();
|
||||
std::cout << "nseq_basic: all tests passed\n";
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue