3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 13:54:53 +00:00

first end-pass. Atomic

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2026-03-04 02:05:26 -08:00
parent 13f9fec339
commit 5aa3713d19
15 changed files with 6160 additions and 209 deletions

View file

@ -355,6 +355,22 @@ namespace euf {
if (n)
return n;
// decompose non-empty string constants into character chains
// so that Nielsen graph can do prefix matching on them
zstring s;
if (m_seq.str.is_string(e, s) && !s.empty()) {
snode* result = mk_char(s[s.length() - 1]);
for (unsigned i = s.length() - 1; i-- > 0; )
result = mk_concat(mk_char(s[i]), result);
// register the original string expression as an alias
unsigned eid = e->get_id();
m_expr2snode.reserve(eid + 1, nullptr);
m_expr2snode[eid] = result;
m_alias_trail.push_back(eid);
mk_enode(e);
return result;
}
snode_kind k = classify(e);
if (!is_app(e))
@ -400,6 +416,7 @@ namespace euf {
void sgraph::push() {
m_scopes.push_back(m_nodes.size());
m_alias_trail_lim.push_back(m_alias_trail.size());
++m_num_scopes;
m_egraph.push();
}
@ -420,6 +437,15 @@ namespace euf {
}
m_nodes.shrink(old_sz);
m_scopes.shrink(new_lvl);
// undo alias entries (string constant decompositions)
unsigned alias_old = m_alias_trail_lim[new_lvl];
for (unsigned i = m_alias_trail.size(); i-- > alias_old; ) {
unsigned eid = m_alias_trail[i];
if (eid < m_expr2snode.size())
m_expr2snode[eid] = nullptr;
}
m_alias_trail.shrink(alias_old);
m_alias_trail_lim.shrink(new_lvl);
m_num_scopes = new_lvl;
m_egraph.pop(num_scopes);
}
@ -520,6 +546,25 @@ namespace euf {
expr* ch = nullptr;
if (m_seq.str.is_unit(elem_expr, ch))
elem_expr = ch;
// If elem is a regex predicate (e.g., re.allchar from compute_minterms),
// extract a representative character for the derivative.
sort* seq_sort = nullptr, *ele_sort = nullptr;
if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) {
if (ele_sort != elem_expr->get_sort()) {
expr* lo = nullptr, *hi = nullptr;
if (m_seq.re.is_full_char(elem_expr)) {
// re.allchar represents the entire alphabet; computing a derivative
// w.r.t. a single character would be imprecise and could incorrectly
// report fail. Return nullptr to prevent incorrect pruning.
return nullptr;
}
else if (m_seq.re.is_range(elem_expr, lo, hi) && lo)
elem_expr = lo;
else
return nullptr;
}
}
expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr);
if (!result)
return nullptr;

View file

@ -97,6 +97,10 @@ namespace euf {
// maps expression id to snode
ptr_vector<snode> m_expr2snode;
// trail of alias entries (string constant → decomposed snode) for pop
unsigned_vector m_alias_trail; // expression ids
unsigned_vector m_alias_trail_lim; // scope boundaries
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
snode_kind classify(expr* e) const;
void compute_metadata(snode* n);

View file

@ -7,7 +7,8 @@ Module Name:
Abstract:
Implementation of nseq_model.
Implementation of nseq_model: model construction for the
Nielsen-based string solver.
Author:
@ -15,3 +16,292 @@ Author:
--*/
#include "smt/nseq_model.h"
#include "smt/theory_nseq.h"
#include "smt/nseq_regex.h"
#include "smt/nseq_state.h"
#include "smt/smt_context.h"
#include "smt/smt_model_generator.h"
#include "smt/proto_model/proto_model.h"
#include "ast/ast_pp.h"
namespace smt {
nseq_model::nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq,
seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex)
: m_th(th), m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_regex(regex), m_trail(m)
{}
void nseq_model::init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state) {
m_var_values.reset();
m_var_regex.reset();
m_trail.reset();
m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model());
mg.register_factory(m_factory);
register_existing_values(nielsen);
collect_var_regex_constraints(state);
// if the last solve returned sat, extract assignments from the
// satisfying leaf node found during DFS.
seq::nielsen_node const* root = nielsen.root();
if (root && root->is_satisfied())
extract_assignments(root);
}
model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) {
app* e = n->get_expr();
if (!m_seq.is_seq(e) && !m_seq.is_re(e) && !m_seq.str.is_nth_u(e))
return nullptr;
// For regex-sorted enodes, return the expression itself as a model value.
// Regexes are interpreted as themselves in the model.
if (m_seq.is_re(e)) {
m_trail.push_back(e);
return alloc(expr_wrapper_proc, e);
}
// For nth_u (underspecified nth), return a fresh value of the element sort.
if (m_seq.str.is_nth_u(e)) {
sort* srt = e->get_sort();
expr* val = m_factory->get_fresh_value(srt);
if (val) {
m_trail.push_back(val);
return alloc(expr_wrapper_proc, to_app(val));
}
return nullptr;
}
// look up snode for this expression
euf::snode* sn = m_sg.find(e);
expr_ref val(m);
if (sn)
val = snode_to_value(sn);
if (!val) {
// no assignment found — generate fresh value
val = m_factory->get_fresh_value(e->get_sort());
}
if (val) {
m_trail.push_back(val);
m_factory->add_trail(val);
return alloc(expr_wrapper_proc, to_app(val));
}
return alloc(expr_wrapper_proc, to_app(m_seq.str.mk_empty(e->get_sort())));
}
void nseq_model::finalize(model_generator& mg) {
m_var_values.reset();
m_var_regex.reset();
m_trail.reset();
m_factory = nullptr;
}
void nseq_model::extract_assignments(seq::nielsen_node const* node) {
if (!node)
return;
for (auto const& eq : node->str_eqs()) {
if (!eq.m_lhs || !eq.m_rhs)
continue;
if (eq.m_lhs->is_var() && !m_var_values.contains(eq.m_lhs->id())) {
expr_ref val = snode_to_value(eq.m_rhs);
if (val) {
m_trail.push_back(val);
m_var_values.insert(eq.m_lhs->id(), val);
}
}
if (eq.m_rhs->is_var() && !m_var_values.contains(eq.m_rhs->id())) {
expr_ref val = snode_to_value(eq.m_lhs);
if (val) {
m_trail.push_back(val);
m_var_values.insert(eq.m_rhs->id(), val);
}
}
}
}
expr_ref nseq_model::snode_to_value(euf::snode* n) {
if (!n)
return expr_ref(m);
if (n->is_empty())
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
if (n->is_char() || n->is_unit()) {
expr* e = n->get_expr();
return e ? expr_ref(e, m) : expr_ref(m);
}
if (n->is_var())
return expr_ref(get_var_value(n), m);
if (n->is_concat()) {
expr_ref lhs = snode_to_value(n->arg(0));
expr_ref rhs = snode_to_value(n->arg(1));
if (lhs && rhs)
return expr_ref(m_seq.str.mk_concat(lhs, rhs), m);
if (lhs) return lhs;
if (rhs) return rhs;
return expr_ref(m);
}
// fallback: use the underlying expression
expr* e = n->get_expr();
return e ? expr_ref(e, m) : expr_ref(m);
}
expr_ref nseq_model::generate_regex_witness(euf::snode* regex, unsigned depth) {
if (!regex)
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
// depth bound to prevent stack overflow on deep regexes
if (depth > 1000) {
sort* srt = m_seq.str.mk_string_sort();
expr* fresh = m_factory->get_fresh_value(srt);
return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m);
}
// nullable regex: empty string is a valid witness
if (m_regex.is_nullable(regex))
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
// collect first-position characters
euf::snode_vector chars;
m_regex.collect_first_chars(regex, chars);
if (!chars.empty()) {
// pick first concrete character, derive, and recurse
euf::snode* c = chars[0];
euf::snode* deriv = m_regex.derivative(regex, c);
expr_ref tail = generate_regex_witness(deriv, depth + 1);
if (tail && c->get_expr())
return expr_ref(m_seq.str.mk_concat(c->get_expr(), tail), m);
}
// fallback: return fresh value from factory (may not satisfy the regex,
// but avoids returning empty string which definitely doesn't satisfy non-nullable regex)
sort* srt = m_seq.str.mk_string_sort();
expr* fresh = m_factory->get_fresh_value(srt);
return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m);
}
void nseq_model::register_existing_values(seq::nielsen_graph& nielsen) {
seq::nielsen_node const* root = nielsen.root();
if (!root)
return;
for (auto const& eq : root->str_eqs()) {
if (eq.m_lhs && eq.m_lhs->get_expr())
m_factory->register_value(eq.m_lhs->get_expr());
if (eq.m_rhs && eq.m_rhs->get_expr())
m_factory->register_value(eq.m_rhs->get_expr());
}
}
expr* nseq_model::get_var_value(euf::snode* var) {
expr* val = nullptr;
if (m_var_values.find(var->id(), val))
return val;
// unconstrained or regex-constrained: delegate to mk_fresh_value
val = mk_fresh_value(var);
if (val) {
m_trail.push_back(val);
m_var_values.insert(var->id(), val);
}
return val;
}
expr* nseq_model::mk_fresh_value(euf::snode* var) {
// check if this variable has regex constraints
euf::snode* re = nullptr;
if (m_var_regex.find(var->id(), re) && re) {
// generate a witness string satisfying the regex
expr_ref witness = generate_regex_witness(re);
if (witness) {
m_trail.push_back(witness);
m_factory->register_value(witness);
return witness;
}
}
// no regex constraint or witness generation failed: plain fresh value
sort* srt = m_seq.str.mk_string_sort();
if (var->get_expr())
srt = var->get_expr()->get_sort();
return m_factory->get_fresh_value(srt);
}
void nseq_model::collect_var_regex_constraints(nseq_state const& state) {
for (auto const& mem : state.str_mems()) {
if (!mem.m_str || !mem.m_regex)
continue;
// only collect for variable snodes (leaf variables needing assignment)
if (!mem.m_str->is_var())
continue;
unsigned id = mem.m_str->id();
euf::snode* existing = nullptr;
if (m_var_regex.find(id, existing) && existing) {
// intersect with existing constraint:
// build re.inter(existing, new_regex)
expr* e1 = existing->get_expr();
expr* e2 = mem.m_regex->get_expr();
if (e1 && e2) {
expr_ref inter(m_seq.re.mk_inter(e1, e2), m);
euf::snode* inter_sn = m_sg.mk(inter);
if (inter_sn)
m_var_regex.insert(id, inter_sn);
}
}
else {
m_var_regex.insert(id, mem.m_regex);
}
}
}
bool nseq_model::validate_regex(nseq_state const& state, ::proto_model& mdl) {
bool ok = true;
// validate positive memberships: str ∈ regex
for (auto const& mem : state.str_mems()) {
if (!mem.m_str || !mem.m_regex)
continue;
expr* s_expr = mem.m_str->get_expr();
expr* r_expr = mem.m_regex->get_expr();
if (!s_expr || !r_expr)
continue;
expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
if (mdl.is_false(in_re)) {
IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: "
<< mk_bounded_pp(s_expr, m, 3)
<< " in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
ok = false;
}
}
// validate negative memberships: str ∉ regex
for (auto const& entry : state.neg_mems()) {
if (!entry.m_str || !entry.m_regex)
continue;
expr* s_expr = entry.m_str->get_expr();
expr* r_expr = entry.m_regex->get_expr();
if (!s_expr || !r_expr)
continue;
expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
expr_ref val(m);
mdl.eval(in_re, val, true);
if (val && m.is_true(val)) {
IF_VERBOSE(0, verbose_stream() << "nseq model: negative membership violated: "
<< mk_bounded_pp(s_expr, m, 3)
<< " not in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
ok = false;
}
}
return ok;
}
}

View file

@ -7,7 +7,18 @@ Module Name:
Abstract:
Model generation from solved Nielsen graph.
Model construction for the Nielsen-based string solver (theory_nseq).
After the Nielsen graph search returns sat, this module extracts
variable-to-value assignments from the satisfying leaf node and
builds model_value_proc callbacks for the SMT model generator.
The workflow is:
1. init() allocate seq_factory, register existing string literals,
and extract variable assignments from the satisfying Nielsen node.
2. mk_value(enode*) return a model_value_proc that lazily builds
the concrete value for a given enode.
3. finalize() clean up temporary state.
Author:
@ -16,57 +27,99 @@ Author:
--*/
#pragma once
#include "ast/ast.h"
#include "ast/seq_decl_plugin.h"
#include "util/zstring.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/smt_types.h"
#include "smt/seq/seq_nielsen.h"
#include <vector>
#include <utility>
#include "model/seq_factory.h"
class proto_model;
namespace smt {
class theory_nseq;
class nseq_regex;
class nseq_state;
class model_value_proc;
class nseq_model {
ast_manager& m;
seq_util m_seq;
euf::sgraph& m_sg;
unsigned m_fresh_counter = 0;
theory_nseq& m_th;
ast_manager& m;
seq_util& m_seq;
seq_rewriter& m_rewriter;
euf::sgraph& m_sg;
nseq_regex& m_regex;
// factory for generating fresh string/regex values
seq_factory* m_factory = nullptr;
// variable assignments extracted from the satisfying Nielsen node.
// maps snode id -> expr* (concrete value)
u_map<expr*> m_var_values;
// trail for GC protection of generated expressions
expr_ref_vector m_trail;
// per-variable regex constraints: maps snode id -> intersected regex snode.
// collected during init() from the state's str_mem list.
u_map<euf::snode*> m_var_regex;
public:
nseq_model(ast_manager& m, euf::sgraph& sg) : m(m), m_seq(m), m_sg(sg) {}
nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq,
seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex);
// generate a fresh string value (used when a variable is unconstrained)
expr_ref mk_fresh_value() {
std::string name = "s!" + std::to_string(m_fresh_counter++);
zstring zs(name.c_str());
return expr_ref(m_seq.str.mk_string(zs), m);
}
// Phase 1: initialize model construction.
// Allocates seq_factory, registers it with mg, collects
// existing string literals, and extracts variable assignments
// from the satisfying Nielsen leaf node.
void init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state);
// extract variable assignments from a satisfied leaf node
// Returns true if all variables got a valid assignment
bool extract_assignments(seq::nielsen_node* node,
std::vector<std::pair<euf::snode*, expr*>>& assignment) {
if (!node)
return false;
for (auto const& eq : node->str_eqs()) {
if (!eq.m_lhs || !eq.m_rhs)
continue;
if (eq.m_lhs->is_var() && eq.m_rhs->get_expr()) {
assignment.emplace_back(eq.m_lhs, eq.m_rhs->get_expr());
}
else if (eq.m_rhs->is_var() && eq.m_lhs->get_expr()) {
assignment.emplace_back(eq.m_rhs, eq.m_lhs->get_expr());
}
}
return true;
}
// Phase 2: build a model_value_proc for the given enode.
// Returns nullptr if the enode is not a sequence/string sort.
model_value_proc* mk_value(enode* n, model_generator& mg);
// validate that a regex membership constraint is satisfied by the assignment
bool validate_regex(seq::str_mem const& mem,
obj_map<euf::snode, expr*> const& assignment) {
// stub: assume valid for now
return true;
}
// Phase 3: clean up temporary model construction state.
void finalize(model_generator& mg);
// Validate that model assignments satisfy all regex membership
// constraints from the state. Checks positive and negative
// memberships. Returns true if all constraints pass.
bool validate_regex(nseq_state const& state, ::proto_model& mdl);
private:
// extract variable assignments from a satisfying Nielsen node.
// Walks str_eqs looking for x = value patterns and records them.
void extract_assignments(seq::nielsen_node const* node);
// recursively substitute known variable assignments into an snode tree.
// Returns a concrete Z3 expression.
expr_ref snode_to_value(euf::snode* n);
// generate a concrete witness string for a regex.
// Uses nullable check and first-char collection to build
// a minimal satisfying string. depth bounds recursion.
expr_ref generate_regex_witness(euf::snode* regex, unsigned depth = 0);
// register all string literals appearing in the constraint store
// with the factory to avoid collisions with fresh values.
void register_existing_values(seq::nielsen_graph& nielsen);
// look up or compute the value for an snode variable.
// If no assignment exists, delegates to mk_fresh_value.
expr* get_var_value(euf::snode* var);
// generate a fresh value for a variable, respecting regex
// membership constraints. If the variable has associated
// regex constraints (collected during init), generates a
// witness satisfying the intersection; otherwise falls back
// to a plain fresh value from the factory.
expr* mk_fresh_value(euf::snode* var);
// collect per-variable regex constraints from the state.
// For each positive str_mem, records the regex (or intersects
// with existing) into m_var_regex keyed by the string snode id.
void collect_var_regex_constraints(nseq_state const& state);
};
}

View file

@ -7,7 +7,7 @@ Module Name:
Abstract:
Implementation of nseq_regex.
Lazy regex membership processing for the Nielsen-based string solver.
Author:
@ -15,3 +15,394 @@ Author:
--*/
#include "smt/nseq_regex.h"
namespace smt {
// -----------------------------------------------------------------------
// Regex emptiness checking (structural analysis)
// -----------------------------------------------------------------------
bool nseq_regex::is_empty_regex(euf::snode* re) const {
if (!re)
return false;
// direct empty language constant
if (re->is_fail())
return true;
// kinds that are never empty
if (re->is_star() || re->is_to_re() ||
re->is_full_char() || re->is_full_seq())
return false;
// loop with lo == 0 accepts ε
if (re->is_loop() && re->is_nullable())
return false;
seq_util& seq = m_sg.get_seq_util();
expr* e = re->get_expr();
if (!e)
return false;
expr* r1, * r2;
// union is empty iff both children are empty
if (seq.re.is_union(e, r1, r2)) {
SASSERT(re->num_args() == 2);
return is_empty_regex(re->arg(0)) && is_empty_regex(re->arg(1));
}
// regex concat is empty if either child is empty
if (seq.re.is_concat(e, r1, r2)) {
SASSERT(re->num_args() == 2);
return is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1));
}
// intersection is empty if either child is empty
if (seq.re.is_intersection(e, r1, r2)) {
SASSERT(re->num_args() == 2);
if (is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1)))
return true;
}
// complement of full_seq is empty
if (re->is_complement() && re->num_args() == 1 && re->arg(0)->is_full_seq())
return true;
// loop(empty, lo, _) with lo > 0 is empty
if (re->is_loop() && re->num_args() >= 1 && is_empty_regex(re->arg(0)))
return !re->is_nullable(); // empty if not nullable (i.e., lo > 0)
return false;
}
// -----------------------------------------------------------------------
// Cycle detection
// -----------------------------------------------------------------------
bool nseq_regex::detect_cycle(seq::str_mem const& mem) const {
return extract_cycle(mem) != nullptr;
}
// -----------------------------------------------------------------------
// Ground prefix consumption
// -----------------------------------------------------------------------
nseq_regex::simplify_status nseq_regex::simplify_ground_prefix(seq::str_mem& mem) {
if (!mem.m_str || !mem.m_regex)
return simplify_status::ok;
while (mem.m_str && !mem.m_str->is_empty()) {
euf::snode* first = mem.m_str->first();
if (!first || !first->is_char())
break;
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, first);
if (!deriv)
break;
if (deriv->is_fail())
return simplify_status::conflict;
mem.m_str = m_sg.drop_first(mem.m_str);
mem.m_regex = deriv;
}
// check final state
if (mem.m_str && mem.m_str->is_empty()) {
if (mem.m_regex->is_nullable())
return simplify_status::satisfied;
return simplify_status::conflict;
}
return simplify_status::ok;
}
// -----------------------------------------------------------------------
// Ground suffix consumption (best-effort)
// -----------------------------------------------------------------------
nseq_regex::simplify_status nseq_regex::simplify_ground_suffix(seq::str_mem& mem) {
// Suffix consumption via reverse derivatives is complex.
// For now, only handle the case where the entire string is ground:
// consume all characters from the front (which covers trailing chars
// when the string is fully ground).
if (!mem.m_str || !mem.m_regex)
return simplify_status::ok;
if (!mem.m_str->is_ground())
return simplify_status::ok;
// If the string is ground, simplify_ground_prefix handles everything.
return simplify_ground_prefix(mem);
}
// -----------------------------------------------------------------------
// Trivial checks
// -----------------------------------------------------------------------
int nseq_regex::check_trivial(seq::str_mem const& mem) const {
if (!mem.m_str || !mem.m_regex)
return 0;
// regex is ∅ => always conflict
if (is_empty_regex(mem.m_regex))
return -1;
// regex is Σ* => always satisfied
if (is_full_regex(mem.m_regex))
return 1;
// empty string checks
if (mem.m_str->is_empty()) {
if (mem.m_regex->is_nullable())
return 1;
return -1;
}
return 0;
}
// -----------------------------------------------------------------------
// Minterm computation with filtering
// -----------------------------------------------------------------------
void nseq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) {
if (!regex)
return;
// compute raw minterms from the regex predicates
euf::snode_vector raw;
m_sg.compute_minterms(regex, raw);
// filter: keep only minterms that are non-fail (non-empty character class).
// note: minterms are regex character-class expressions, not concrete
// characters, so we cannot compute Brzozowski derivatives with them.
// callers should compute derivatives using concrete or fresh chars.
for (euf::snode* mt : raw) {
if (!mt || mt->is_fail())
continue;
minterms.push_back(mt);
}
}
// -----------------------------------------------------------------------
// Collect first characters
// -----------------------------------------------------------------------
void nseq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) {
if (!re)
return;
// to_re(s): extract first character of the string body
if (re->is_to_re()) {
euf::snode* body = re->arg(0);
if (body && !body->is_empty()) {
euf::snode* first = body->first();
if (first && first->is_char()) {
bool dup = false;
for (euf::snode* c : chars)
if (c == first) { dup = true; break; }
if (!dup)
chars.push_back(first);
}
// Handle string literals (classified as s_other in sgraph)
else if (first && first->get_expr()) {
seq_util& seq = m_sg.get_seq_util();
zstring s;
if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) {
euf::snode* ch = m_sg.mk_char(s[0]);
bool dup = false;
for (euf::snode* c : chars)
if (c == ch) { dup = true; break; }
if (!dup)
chars.push_back(ch);
}
}
}
return;
}
// leaf cases: produce representative characters for character classes
if (re->is_full_char()) {
// full character set (.): use 'a' as representative
euf::snode* ch = m_sg.mk_char('a');
bool dup = false;
for (euf::snode* c : chars)
if (c == ch) { dup = true; break; }
if (!dup)
chars.push_back(ch);
return;
}
// re.range(lo, hi): use lo as representative
if (re->get_expr()) {
seq_util& seq = m_sg.get_seq_util();
expr* lo = nullptr, *hi = nullptr;
if (seq.re.is_range(re->get_expr(), lo, hi) && lo) {
zstring s;
unsigned ch_val = 'a';
if (seq.is_const_char(lo, ch_val)) {
euf::snode* ch = m_sg.mk_char(ch_val);
bool dup = false;
for (euf::snode* c : chars)
if (c == ch) { dup = true; break; }
if (!dup)
chars.push_back(ch);
}
return;
}
}
if (re->is_fail() || re->is_full_seq())
return;
// recurse into children (handles union, concat, star, loop, etc.)
for (unsigned i = 0; i < re->num_args(); ++i)
collect_first_chars(re->arg(i), chars);
}
// -----------------------------------------------------------------------
// Membership processing
// -----------------------------------------------------------------------
bool nseq_regex::process_str_mem(seq::str_mem const& mem,
vector<seq::str_mem>& out_mems) {
if (!mem.m_str || !mem.m_regex)
return true;
// empty string: check nullable
if (mem.m_str->is_empty())
return mem.m_regex->is_nullable();
// consume ground prefix: derive regex by each leading concrete char
seq::str_mem working = mem;
simplify_status st = simplify_ground_prefix(working);
if (st == simplify_status::conflict)
return false;
if (st == simplify_status::satisfied)
return true;
// after ground prefix consumption, if the front is still a concrete
// character we can take one more step (shouldn't happen after
// simplify_ground_prefix, but guard defensively)
euf::snode* first = working.m_str->first();
if (first && first->is_char()) {
seq::str_mem derived = derive(working, first);
if (is_empty_regex(derived.m_regex))
return false;
out_mems.push_back(derived);
return true;
}
// string starts with a non-ground element (variable or unit):
// return the simplified constraint for the Nielsen graph to expand
// via character-split modifiers.
out_mems.push_back(working);
return true;
}
// -----------------------------------------------------------------------
// History recording
// -----------------------------------------------------------------------
seq::str_mem nseq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) {
// Build a history chain by prepending the new regex entry to the
// existing history. Uses regex-concat as a cons cell:
// new_history = re.concat(history_re, old_history)
// where arg(0) is the latest entry and arg(1) is the tail.
// If old_history is nullptr, the new entry becomes the terminal leaf.
euf::snode* new_history = history_re;
if (mem.m_history && history_re) {
expr* re_expr = history_re->get_expr();
expr* old_expr = mem.m_history->get_expr();
if (re_expr && old_expr) {
seq_util& seq = m_sg.get_seq_util();
expr_ref chain(seq.re.mk_concat(re_expr, old_expr), m_sg.get_manager());
new_history = m_sg.mk(chain);
}
}
return seq::str_mem(mem.m_str, mem.m_regex, new_history, mem.m_id, mem.m_dep);
}
// -----------------------------------------------------------------------
// Cycle detection
// -----------------------------------------------------------------------
euf::snode* nseq_regex::extract_cycle(seq::str_mem const& mem) const {
// Walk the history chain looking for a repeated regex.
// A cycle exists when the current regex matches a regex in the history.
if (!mem.m_regex || !mem.m_history)
return nullptr;
euf::snode* current = mem.m_regex;
euf::snode* hist = mem.m_history;
// Walk the history chain up to a bounded depth.
// The history is structured as a chain of regex snapshots connected
// via the sgraph's regex-concat: each level's arg(0) is a snapshot
// and arg(1) is the tail. A leaf (non-concat) is a terminal entry.
unsigned bound = 1000;
while (hist && bound-- > 0) {
euf::snode* entry = hist;
euf::snode* tail = nullptr;
// If the history node is a regex concat, decompose it:
// arg(0) is the regex snapshot, arg(1) is the rest of the chain
seq_util& seq = m_sg.get_seq_util();
if (hist->is_concat() && hist->get_expr() &&
seq.re.is_concat(hist->get_expr())) {
entry = hist->arg(0);
tail = hist->arg(1);
}
// Check pointer equality (fast, covers normalized regexes)
if (entry == current)
return entry;
// Check expression-level equality as fallback
if (entry->get_expr() && current->get_expr() &&
entry->get_expr() == current->get_expr())
return entry;
hist = tail;
}
return nullptr;
}
// -----------------------------------------------------------------------
// Stabilizer from cycle
// -----------------------------------------------------------------------
euf::snode* nseq_regex::stabilizer_from_cycle(euf::snode* cycle_regex,
euf::snode* current_regex) {
if (!cycle_regex || !current_regex)
return nullptr;
// The stabilizer is the Kleene star of the "cycle body" regex.
// If the cycle regex and current regex are the same (pointer equal),
// the stabilizer is cycle_regex* (Kleene star).
// This mirrors ZIPT's StabilizerFromCycle which extracts the
// regex between the cycle entry and current point and wraps it in *.
// Build cycle_regex* via the sgraph's expression factory
expr* re_expr = cycle_regex->get_expr();
if (!re_expr)
return nullptr;
seq_util& seq = m_sg.get_seq_util();
expr_ref star_expr(seq.re.mk_star(re_expr), m_sg.get_manager());
return m_sg.mk(star_expr);
}
// -----------------------------------------------------------------------
// Stabilizer-based subsumption
// -----------------------------------------------------------------------
bool nseq_regex::try_subsume(seq::str_mem const& mem) {
// Check if the derivation history exhibits a cycle, and if so,
// whether the current regex is subsumed by the stabilizer.
euf::snode* cycle = extract_cycle(mem);
if (!cycle)
return false;
euf::snode* stab = stabilizer_from_cycle(cycle, mem.m_regex);
if (!stab)
return false;
// A constraint x ∈ R is subsumed when R ⊆ stab.
// For the simple case where cycle == current regex,
// R ⊆ R* is always true (since R* accepts everything R does, and more).
// This handles the common idempotent cycle case.
if (cycle == mem.m_regex)
return true;
// More sophisticated subsumption checks (regex containment)
// would require a regex inclusion decision procedure.
// For now, only handle the pointer-equality case.
return false;
}
}

View file

@ -7,8 +7,20 @@ Module Name:
Abstract:
Regex membership handling using Brzozowski derivatives.
Processes str_mem constraints after character consumption.
Lazy regex membership processing for the Nielsen-based string solver.
Provides Brzozowski derivative computation, ground prefix/suffix
consumption, cycle detection in derivation histories, and
stabilizer-based subsumption for regex membership constraints.
Ports the following ZIPT StrMem operations:
- SimplifyCharRegex / SimplifyDir (ground prefix/suffix consumption)
- ExtractCycle / StabilizerFromCycle (cycle detection and widening)
- TrySubsume (stabilizer-based subsumption)
The class wraps sgraph operations (brzozowski_deriv, compute_minterms,
drop_first, etc.) and provides a higher-level interface for
nielsen_graph and theory_nseq.
Author:
@ -28,39 +40,146 @@ namespace smt {
public:
nseq_regex(euf::sgraph& sg) : m_sg(sg) {}
// check if a regex snode represents the empty language
bool is_empty_regex(euf::snode* re) const {
return re && re->is_fail();
euf::sgraph& sg() { return m_sg; }
// -----------------------------------------------------------------
// Basic regex predicates
// -----------------------------------------------------------------
// check if regex is the empty language (∅ / re.empty).
// performs structural analysis beyond is_fail() to detect
// derived emptiness (e.g., union of empties, concat with empty).
bool is_empty_regex(euf::snode* re) const;
// check if regex is the full language (Σ* / re.all)
bool is_full_regex(euf::snode* re) const {
return re && re->is_full_seq();
}
// compute derivative of regex re with respect to char elem and
// return a new str_mem for the resulting constraint
// check if regex accepts the empty string
bool is_nullable(euf::snode* re) const {
return re && re->is_nullable();
}
// check if regex is ground (no string variables)
bool is_ground(euf::snode* re) const {
return re && re->is_ground();
}
// -----------------------------------------------------------------
// Derivative computation
// -----------------------------------------------------------------
// compute Brzozowski derivative of regex w.r.t. character element.
// returns nullptr on failure.
euf::snode* derivative(euf::snode* re, euf::snode* elem) {
return m_sg.brzozowski_deriv(re, elem);
}
// compute derivative of a str_mem constraint: advance past one character.
// the string side is shortened by drop_first and the regex is derived.
seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) {
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem);
euf::snode* new_str = m_sg.drop_first(mem.m_str);
return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep);
}
// process a regex membership constraint after one character has been consumed
// returns false if the resulting regex is empty (conflict)
bool process_str_mem(seq::str_mem const& mem,
vector<seq::str_mem>& out_mems) {
if (!mem.m_str || !mem.m_regex)
return true;
// if regex does not accept the empty string and the string side is empty, conflict
if (mem.m_str->is_empty()) {
return mem.m_regex->is_nullable();
}
// compute minterms for the regex
euf::snode_vector minterms;
m_sg.compute_minterms(mem.m_regex, minterms);
for (euf::snode* ch : minterms) {
seq::str_mem new_mem = derive(mem, ch);
if (!is_empty_regex(new_mem.m_regex))
out_mems.push_back(new_mem);
}
return true;
// -----------------------------------------------------------------
// Ground prefix/suffix consumption
// -----------------------------------------------------------------
enum class simplify_status { ok, conflict, satisfied };
// consume ground characters from the front of mem.m_str by computing
// Brzozowski derivatives against mem.m_regex.
// stops when:
// - the string front is not a concrete character (ok)
// - a derivative produces ∅ (conflict)
// - the string becomes empty and regex is nullable (satisfied)
// - the string becomes empty and regex is not nullable (conflict)
// modifies mem in-place.
simplify_status simplify_ground_prefix(seq::str_mem& mem);
// consume ground characters from the back of mem.m_str by computing
// reverse derivatives. modifies mem in-place.
// (reverse derivatives require regex reversal; this is a best-effort
// simplification that handles the common case of trailing constants.)
simplify_status simplify_ground_suffix(seq::str_mem& mem);
// -----------------------------------------------------------------
// Trivial checks
// -----------------------------------------------------------------
// quick check for trivially sat/unsat membership.
// returns 1 if satisfied (empty string in nullable regex, or full regex)
// returns -1 if conflicting (empty string in non-nullable, or ∅ regex)
// returns 0 if undetermined
int check_trivial(seq::str_mem const& mem) const;
// -----------------------------------------------------------------
// Minterm and character computation
// -----------------------------------------------------------------
// compute minterms (character class partition) from regex
void compute_minterms(euf::snode* re, euf::snode_vector& minterms) {
m_sg.compute_minterms(re, minterms);
}
// compute minterms for character splitting, filtering out empty
// (fail) minterms. Minterms are regex character-class expressions
// forming a partition of the alphabet; callers use them to drive
// fresh-variable creation in character-split modifiers.
void get_minterms(euf::snode* regex, euf::snode_vector& minterms);
// collect concrete first-position characters from a regex.
// extracts characters reachable from to_re leaves and simple ranges.
void collect_first_chars(euf::snode* re, euf::snode_vector& chars);
// -----------------------------------------------------------------
// Membership processing
// -----------------------------------------------------------------
// process a str_mem constraint by consuming ground characters from
// the string front via Brzozowski derivatives. If the entire ground
// prefix is consumed and the constraint is neither satisfied nor
// conflicting, the (simplified) constraint is pushed to out_mems
// for the Nielsen graph to expand via character-split modifiers.
// returns false if the constraint is immediately conflicting
// (empty string in non-nullable regex, or derivative yields ∅).
bool process_str_mem(seq::str_mem const& mem,
vector<seq::str_mem>& out_mems);
// -----------------------------------------------------------------
// Cycle detection and stabilizers
// -----------------------------------------------------------------
// record current regex in the derivation history of a str_mem.
// the history tracks a chain of (regex, id) pairs for cycle detection.
// returns the updated str_mem.
seq::str_mem record_history(seq::str_mem const& mem, euf::snode* history_re);
// check if the derivation history of mem contains a cycle, i.e.,
// the same regex id appears twice in the history chain.
// if found, returns the cycle entry point regex; nullptr otherwise.
euf::snode* extract_cycle(seq::str_mem const& mem) const;
// check if the derivation history exhibits a cycle.
// returns true when the current regex matches a previously seen regex
// in the history chain. used to trigger stabilizer introduction.
bool detect_cycle(seq::str_mem const& mem) const;
// compute a Kleene star stabilizer from a cycle.
// given the regex at the cycle point and the current regex,
// builds r* that over-approximates any number of cycle iterations.
// returns nullptr if no stabilizer can be computed.
euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex,
euf::snode* current_regex);
// try to subsume a str_mem constraint using stabilizer-based
// reasoning: if extract_cycle finds a cycle, check whether
// the current regex is already covered by the stabilizer.
// returns true if the constraint can be dropped.
bool try_subsume(seq::str_mem const& mem);
};
}

View file

@ -21,15 +21,48 @@ Author:
#include "util/vector.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
#include "smt/smt_literal.h"
namespace smt {
class enode;
// source info for a string equality (the two enodes whose merge caused it)
struct eq_source {
enode* m_n1;
enode* m_n2;
};
// source info for a regex membership (the literal that asserted it)
struct mem_source {
literal m_lit;
};
// source info for a string disequality
struct diseq_source {
enode* m_n1;
enode* m_n2;
};
// negative regex membership: ¬(str in regex)
struct neg_mem_entry {
euf::snode* m_str;
euf::snode* m_regex;
literal m_lit;
};
class nseq_state {
euf::sgraph& m_sg;
vector<seq::str_eq> m_str_eqs;
vector<seq::str_mem> m_str_mems;
vector<eq_source> m_eq_sources;
vector<mem_source> m_mem_sources;
vector<diseq_source> m_diseqs;
vector<neg_mem_entry> m_neg_mems;
unsigned_vector m_str_eq_lim;
unsigned_vector m_str_mem_lim;
unsigned_vector m_diseq_lim;
unsigned_vector m_neg_mem_lim;
unsigned m_next_mem_id = 0;
public:
@ -38,37 +71,68 @@ namespace smt {
void push() {
m_str_eq_lim.push_back(m_str_eqs.size());
m_str_mem_lim.push_back(m_str_mems.size());
m_diseq_lim.push_back(m_diseqs.size());
m_neg_mem_lim.push_back(m_neg_mems.size());
}
void pop(unsigned n) {
for (unsigned i = 0; i < n; ++i) {
m_str_eqs.shrink(m_str_eq_lim.back());
m_eq_sources.shrink(m_str_eq_lim.back());
m_str_eq_lim.pop_back();
m_str_mems.shrink(m_str_mem_lim.back());
m_mem_sources.shrink(m_str_mem_lim.back());
m_str_mem_lim.pop_back();
m_diseqs.shrink(m_diseq_lim.back());
m_diseq_lim.pop_back();
m_neg_mems.shrink(m_neg_mem_lim.back());
m_neg_mem_lim.pop_back();
}
}
void add_str_eq(euf::snode* lhs, euf::snode* rhs) {
void add_str_eq(euf::snode* lhs, euf::snode* rhs, enode* n1, enode* n2) {
seq::dep_tracker dep;
m_str_eqs.push_back(seq::str_eq(lhs, rhs, dep));
m_eq_sources.push_back({n1, n2});
}
void add_str_mem(euf::snode* str, euf::snode* regex) {
void add_str_mem(euf::snode* str, euf::snode* regex, literal lit) {
seq::dep_tracker dep;
m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep));
m_mem_sources.push_back({lit});
}
void add_diseq(enode* n1, enode* n2) {
m_diseqs.push_back({n1, n2});
}
void add_neg_mem(euf::snode* str, euf::snode* regex, literal lit) {
m_neg_mems.push_back({str, regex, lit});
}
vector<seq::str_eq> const& str_eqs() const { return m_str_eqs; }
vector<seq::str_mem> const& str_mems() const { return m_str_mems; }
vector<diseq_source> const& diseqs() const { return m_diseqs; }
vector<neg_mem_entry> const& neg_mems() const { return m_neg_mems; }
bool empty() const { return m_str_eqs.empty() && m_str_mems.empty(); }
eq_source const& get_eq_source(unsigned i) const { return m_eq_sources[i]; }
mem_source const& get_mem_source(unsigned i) const { return m_mem_sources[i]; }
diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; }
neg_mem_entry const& get_neg_mem(unsigned i) const { return m_neg_mems[i]; }
bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_neg_mems.empty() && m_diseqs.empty(); }
void reset() {
m_str_eqs.reset();
m_str_mems.reset();
m_eq_sources.reset();
m_mem_sources.reset();
m_diseqs.reset();
m_neg_mems.reset();
m_str_eq_lim.reset();
m_str_mem_lim.reset();
m_diseq_lim.reset();
m_neg_mem_lim.reset();
}
};

File diff suppressed because it is too large Load diff

View file

@ -183,22 +183,27 @@ Abstract:
detection during character substitution are not ported.
Modifier hierarchy (Constraints/Modifier/):
- All ~15 Modifier subclasses driving graph expansion are not ported:
VarNielsenModifier, ConstNielsenModifier, DirectedNielsenModifier,
EqSplitModifier, RegexVarSplitModifier, RegexCharSplitModifier,
StarIntrModifier, PowerSplitModifier, GPowerIntrModifier,
NumCmpModifier, NumUnwindingModifier, PowerEpsilonModifier,
DecomposeModifier, CombinedModifier, DetModifier.
- The modifier pattern (each Modifier produces one or more child nodes by
applying substitutions + side conditions to the parent node) is not ported.
- 13 Modifier subclasses driving graph expansion are ported as
apply_* methods in generate_extensions, matching ZIPT's TypeOrder
priority: DetModifier(1), PowerEpsilonModifier(2), NumCmpModifier(3),
ConstNumUnwindingModifier(4), EqSplitModifier(5), StarIntrModifier(6),
GPowerIntrModifier(7), ConstNielsenModifier(8), RegexCharSplitModifier(9),
RegexVarSplitModifier(10), PowerSplitModifier(11), VarNielsenModifier(12),
VarNumUnwindingModifier(13).
- NOT PORTED: DirectedNielsenModifier, DecomposeModifier, CombinedModifier.
- NumCmp, ConstNumUnwinding, VarNumUnwinding are approximated (no PDD
integer polynomial infrastructure; power tokens are replaced with ε
or peeled with fresh variables instead of exact exponent arithmetic).
Search procedure:
- NielsenNode.GraphExpansion(): the recursive search with iterative deepening
(depth-bounded DFS with SAT/UNSAT/CYCLIC return codes) is not ported.
- NielsenNode.SimplifyAndInit(): the simplification-and-initialization pass
run at node creation is not ported.
- NielsenGraph.Check(): the top-level entry point with iterative deepening,
inner solver setup and subsumption-node lookup is not ported.
- NielsenGraph.Check() / NielsenNode.GraphExpansion(): ported as
nielsen_graph::solve() (iterative deepening, 6 rounds starting at
depth 10, doubling) and search_dfs() (depth-bounded DFS with
eval_idx cycle detection and node status tracking). The inner solver
setup and subsumption-node lookup within Check() are not ported.
- NielsenNode.SimplifyAndInit(): ported as
nielsen_node::simplify_and_init() with prefix matching, symbol clash,
empty propagation, and Brzozowski derivative consumption.
- NielsenGraph.FindExisting(): the subsumption cache lookup over
subsumptionCandidates is not ported.
@ -231,6 +236,7 @@ Author:
#include "util/vector.h"
#include "util/uint_set.h"
#include "ast/ast.h"
#include "ast/arith_decl_plugin.h"
#include "ast/seq_decl_plugin.h"
#include "ast/euf/euf_sgraph.h"
@ -281,6 +287,9 @@ namespace seq {
bool is_superset(dep_tracker const& other) const;
bool empty() const;
// collect indices of all set bits into 'indices'
void get_set_bits(unsigned_vector& indices) const;
bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; }
bool operator!=(dep_tracker const& other) const { return !(*this == other); }
};
@ -353,6 +362,24 @@ namespace seq {
}
};
// kind of length constraint determines propagation strategy
enum class length_kind {
nonneg, // len(x) >= 0: unconditional axiom
eq, // len(lhs) = len(rhs): conditional on string equality
bound // Parikh bound: conditional on regex membership
};
// arithmetic length constraint derived from string equations
struct length_constraint {
expr_ref m_expr; // arithmetic expression (e.g., len(x) + len(y) = len(a) + 1)
dep_tracker m_dep; // tracks which input constraints contributed
length_kind m_kind; // determines propagation strategy
length_constraint(ast_manager& m): m_expr(m), m_kind(length_kind::nonneg) {}
length_constraint(expr* e, dep_tracker const& dep, length_kind kind, ast_manager& m):
m_expr(e, m), m_dep(dep), m_kind(kind) {}
};
// edge in the Nielsen graph connecting two nodes
// mirrors ZIPT's NielsenEdge
class nielsen_edge {
@ -469,6 +496,39 @@ namespace seq {
// true if other's constraint set is a subset of this node's
bool is_subsumed_by(nielsen_node const& other) const;
// true if any constraint has opaque (s_other) terms that
// the Nielsen graph cannot decompose
bool has_opaque_terms() const;
};
// search statistics collected during Nielsen graph solving
struct nielsen_stats {
unsigned m_num_solve_calls = 0;
unsigned m_num_dfs_nodes = 0;
unsigned m_num_sat = 0;
unsigned m_num_unsat = 0;
unsigned m_num_unknown = 0;
unsigned m_num_simplify_conflict = 0;
unsigned m_num_subsumptions = 0;
unsigned m_num_extensions = 0;
unsigned m_num_fresh_vars = 0;
unsigned m_max_depth = 0;
// modifier application counts
unsigned m_mod_det = 0;
unsigned m_mod_power_epsilon = 0;
unsigned m_mod_num_cmp = 0;
unsigned m_mod_const_num_unwinding = 0;
unsigned m_mod_eq_split = 0;
unsigned m_mod_star_intr = 0;
unsigned m_mod_gpower_intr = 0;
unsigned m_mod_const_nielsen = 0;
unsigned m_mod_regex_char_split = 0;
unsigned m_mod_regex_var_split = 0;
unsigned m_mod_power_split = 0;
unsigned m_mod_var_nielsen = 0;
unsigned m_mod_var_num_unwinding = 0;
void reset() { memset(this, 0, sizeof(nielsen_stats)); }
};
// the overall Nielsen transformation graph
@ -482,6 +542,10 @@ namespace seq {
unsigned m_run_idx = 0;
unsigned m_depth_bound = 0;
unsigned m_next_mem_id = 0;
unsigned m_fresh_cnt = 0;
unsigned m_num_input_eqs = 0;
unsigned m_num_input_mems = 0;
nielsen_stats m_stats;
public:
nielsen_graph(euf::sgraph& sg);
@ -519,6 +583,10 @@ namespace seq {
// generate next unique regex membership id
unsigned next_mem_id() { return m_next_mem_id++; }
// number of input constraints (for dep_tracker bit mapping)
unsigned num_input_eqs() const { return m_num_input_eqs; }
unsigned num_input_mems() const { return m_num_input_mems; }
// display for debugging
std::ostream& display(std::ostream& out) const;
@ -541,8 +609,111 @@ namespace seq {
// collect dependency information from conflicting constraints
void collect_conflict_deps(dep_tracker& deps) const;
// explain a conflict: partition the set bits into str_eq indices
// (bits 0..num_eqs-1) and str_mem indices (bits num_eqs..num_eqs+num_mems-1).
// Must be called after solve() returns unsat.
void explain_conflict(unsigned_vector& eq_indices, unsigned_vector& mem_indices) const;
// accumulated search statistics
nielsen_stats const& stats() const { return m_stats; }
void reset_stats() { m_stats.reset(); }
// generate arithmetic length constraints from the root node's string
// equalities and regex memberships. For each non-trivial equation lhs = rhs,
// produces len(lhs) = len(rhs) by expanding concatenations into sums.
// For each regex membership str in regex, produces Parikh interval
// constraints: len(str) >= min_len and len(str) <= max_len.
// Also generates len(x) >= 0 for each variable appearing in the equations.
void generate_length_constraints(vector<length_constraint>& constraints);
private:
search_result search_dfs(nielsen_node* node, unsigned depth);
// create a fresh variable with a unique name
euf::snode* mk_fresh_var();
// deterministic modifier: var = ε, same-head cancel
bool apply_det_modifier(nielsen_node* node);
// const nielsen modifier: char vs var (2 branches per case)
bool apply_const_nielsen(nielsen_node* node);
// variable Nielsen modifier: var vs var, all progress (3 branches)
bool apply_var_nielsen(nielsen_node* node);
// eq split modifier: var vs var (3 branches)
bool apply_eq_split(nielsen_node* node);
// apply regex character split modifier to a node.
// for a str_mem constraint x·s ∈ R where x is a variable:
// (1) x → c·z for each char c accepted by R at first position
// (2) x → ε (x is empty)
// returns true if children were generated.
bool apply_regex_char_split(nielsen_node* node);
// power epsilon modifier: for a power token u^n in an equation,
// branch: (1) base u = ε, (2) power is empty (n = 0 semantics).
// mirrors ZIPT's PowerEpsilonModifier
bool apply_power_epsilon(nielsen_node* node);
// numeric comparison modifier: for equations involving power tokens
// u^m and u^n with the same base, branch on m < n vs n <= m.
// mirrors ZIPT's NumCmpModifier
bool apply_num_cmp(nielsen_node* node);
// constant numeric unwinding: for a power token u^n vs a constant
// (non-variable), branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u).
// mirrors ZIPT's ConstNumUnwindingModifier
bool apply_const_num_unwinding(nielsen_node* node);
// star introduction: for a str_mem x·s ∈ R where a cycle is detected
// (backedge exists), introduce stabilizer: x ∈ base* with x split.
// mirrors ZIPT's StarIntrModifier
bool apply_star_intr(nielsen_node* node);
// generalized power introduction: for a variable x matched against
// a ground repeated pattern, introduce x = base^n · prefix(base)
// with fresh power variable n and side constraint n >= 0.
// mirrors ZIPT's GPowerIntrModifier
bool apply_gpower_intr(nielsen_node* node);
// regex variable split: for str_mem x·s ∈ R where x is a variable,
// split using minterms: x → ε, or x → c·x' for each minterm c.
// More general than regex_char_split, uses minterm partitioning.
// mirrors ZIPT's RegexVarSplitModifier
bool apply_regex_var_split(nielsen_node* node);
// power split: for a variable x facing a power token u^n,
// branch: x = u^m · prefix(u) with m < n, or x = u^n · x.
// mirrors ZIPT's PowerSplitModifier
bool apply_power_split(nielsen_node* node);
// variable numeric unwinding: for a power token u^n vs a variable,
// branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u).
// mirrors ZIPT's VarNumUnwindingModifier
bool apply_var_num_unwinding(nielsen_node* node);
// collect concrete first-position characters from a regex snode
void collect_first_chars(euf::snode* re, euf::snode_vector& chars);
// find the first power token in any str_eq at this node
euf::snode* find_power_token(nielsen_node* node) const;
// find a power token facing a constant (char) head
bool find_power_vs_const(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const;
// find a power token facing a variable head
bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const;
// build an arithmetic expression representing the length of an snode tree.
// concatenations are expanded to sums, chars to 1, empty to 0,
// variables to (str.len var_expr).
expr_ref compute_length_expr(euf::snode* n);
// compute Parikh length interval [min_len, max_len] for a regex snode.
// uses seq_util::rex min_length/max_length on the underlying expression.
// max_len == UINT_MAX means unbounded.
void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len);
};
}

View file

@ -17,6 +17,10 @@ Author:
--*/
#include "smt/theory_nseq.h"
#include "smt/smt_context.h"
#include "smt/smt_justification.h"
#include "smt/proto_model/proto_model.h"
#include "ast/array_decl_plugin.h"
#include "ast/ast_pp.h"
#include "util/statistics.h"
namespace smt {
@ -26,43 +30,102 @@ namespace smt {
m_seq(ctx.get_manager()),
m_autil(ctx.get_manager()),
m_rewriter(ctx.get_manager()),
m_arith_value(ctx.get_manager()),
m_egraph(ctx.get_manager()),
m_sgraph(ctx.get_manager(), m_egraph),
m_nielsen(m_sgraph),
m_state(m_sgraph)
m_state(m_sgraph),
m_regex(m_sgraph),
m_model(*this, ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex)
{}
// -----------------------------------------------------------------------
// Initialization
// -----------------------------------------------------------------------
void theory_nseq::init() {
m_arith_value.init(&get_context());
}
// -----------------------------------------------------------------------
// Internalization
// -----------------------------------------------------------------------
bool theory_nseq::internalize_atom(app* atom, bool /*gate_ctx*/) {
context& ctx = get_context();
ast_manager& m = get_manager();
// str.in_re atoms are boolean predicates: register as bool_var
// so that assign_eh fires when the SAT solver assigns them.
// Following theory_seq: create a bool_var directly without an enode
// for the str.in_re predicate (avoids needing to internalize the regex arg).
if (m_seq.str.is_in_re(atom)) {
expr* str_arg = atom->get_arg(0);
mk_var(ensure_enode(str_arg));
if (!ctx.b_internalized(atom)) {
bool_var bv = ctx.mk_bool_var(atom);
ctx.set_var_theory(bv, get_id());
ctx.mark_as_relevant(bv);
}
get_snode(str_arg);
return true;
}
return internalize_term(atom);
}
theory_var theory_nseq::mk_var(enode* n) {
expr* o = n->get_expr();
if (!m_seq.is_seq(o) && !m_seq.is_re(o) && !m_seq.str.is_nth_u(o))
return null_theory_var;
if (is_attached_to_var(n))
return n->get_th_var(get_id());
theory_var v = theory::mk_var(n);
get_context().attach_th_var(n, this, v);
get_context().mark_as_relevant(n);
return v;
}
bool theory_nseq::internalize_term(app* term) {
context& ctx = get_context();
ast_manager& m = get_manager();
// ensure children are internalized first
for (expr* arg : *term) {
if (is_app(arg) && m_seq.is_seq(arg)) {
ctx.internalize(arg, false);
}
// ensure ALL children are internalized (following theory_seq pattern)
for (auto arg : *term)
mk_var(ensure_enode(arg));
if (ctx.e_internalized(term)) {
mk_var(ctx.get_enode(term));
return true;
}
if (!ctx.e_internalized(term)) {
ctx.mk_enode(term, false, m.is_bool(term), true);
if (m.is_bool(term)) {
bool_var bv = ctx.mk_bool_var(term);
ctx.set_var_theory(bv, get_id());
ctx.mark_as_relevant(bv);
}
enode* en = ctx.get_enode(term);
if (!is_attached_to_var(en)) {
theory_var v = mk_var(en);
(void)v;
enode* en;
if (ctx.e_internalized(term)) {
en = ctx.get_enode(term);
}
else {
en = ctx.mk_enode(term, false, m.is_bool(term), true);
}
mk_var(en);
// register in our private sgraph
get_snode(term);
// track higher-order terms for lazy unfolding
expr* ho_f = nullptr, *ho_s = nullptr, *ho_b = nullptr, *ho_i = nullptr;
if (m_seq.str.is_map(term, ho_f, ho_s) ||
m_seq.str.is_mapi(term, ho_f, ho_i, ho_s) ||
m_seq.str.is_foldl(term, ho_f, ho_b, ho_s) ||
m_seq.str.is_foldli(term, ho_f, ho_i, ho_b, ho_s)) {
m_ho_terms.push_back(term);
ensure_length_var(ho_s);
}
return true;
}
@ -73,16 +136,73 @@ namespace smt {
void theory_nseq::new_eq_eh(theory_var v1, theory_var v2) {
expr* e1 = get_enode(v1)->get_expr();
expr* e2 = get_enode(v2)->get_expr();
if (m_seq.is_re(e1)) {
++m_num_unhandled_bool;
return;
}
if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2))
return;
euf::snode* s1 = get_snode(e1);
euf::snode* s2 = get_snode(e2);
if (s1 && s2)
m_state.add_str_eq(s1, s2);
if (s1 && s2) {
unsigned idx = m_state.str_eqs().size();
m_state.add_str_eq(s1, s2, get_enode(v1), get_enode(v2));
m_prop_queue.push_back({prop_item::eq_prop, idx});
}
}
void theory_nseq::new_diseq_eh(theory_var /*v1*/, theory_var /*v2*/) {
// not handled in this initial skeleton
void theory_nseq::new_diseq_eh(theory_var v1, theory_var v2) {
expr* e1 = get_enode(v1)->get_expr();
expr* e2 = get_enode(v2)->get_expr();
if (m_seq.is_re(e1)) {
// regex disequality: nseq cannot verify language non-equivalence
++m_num_unhandled_bool;
return;
}
if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2))
return;
unsigned idx = m_state.diseqs().size();
m_state.add_diseq(get_enode(v1), get_enode(v2));
m_prop_queue.push_back({prop_item::diseq_prop, idx});
}
// -----------------------------------------------------------------------
// Boolean assignment notification
// -----------------------------------------------------------------------
void theory_nseq::assign_eh(bool_var v, bool is_true) {
context& ctx = get_context();
expr* e = ctx.bool_var2expr(v);
expr* s = nullptr;
expr* re = nullptr;
if (!m_seq.str.is_in_re(e, s, re)) {
// Track unhandled boolean string predicates (prefixof, contains, etc.)
if (is_app(e) && to_app(e)->get_family_id() == m_seq.get_family_id())
++m_num_unhandled_bool;
return;
}
euf::snode* sn_str = get_snode(s);
euf::snode* sn_re = get_snode(re);
if (!sn_str || !sn_re)
return;
if (is_true) {
unsigned idx = m_state.str_mems().size();
literal lit(v, false);
m_state.add_str_mem(sn_str, sn_re, lit);
m_prop_queue.push_back({prop_item::pos_mem_prop, idx});
}
else {
unsigned idx = m_state.neg_mems().size();
literal lit(v, true);
m_state.add_neg_mem(sn_str, sn_re, lit);
m_prop_queue.push_back({prop_item::neg_mem_prop, idx});
}
TRACE(seq, tout << "nseq assign_eh: " << (is_true ? "" : "¬")
<< "str.in_re "
<< mk_bounded_pp(s, get_manager(), 3) << " in "
<< mk_bounded_pp(re, get_manager(), 3) << "\n";);
}
// -----------------------------------------------------------------------
@ -93,12 +213,139 @@ namespace smt {
theory::push_scope_eh();
m_state.push();
m_sgraph.push();
m_prop_lim.push_back(m_prop_queue.size());
m_ho_lim.push_back(m_ho_terms.size());
m_unhandled_bool_lim.push_back(m_num_unhandled_bool);
}
void theory_nseq::pop_scope_eh(unsigned num_scopes) {
theory::pop_scope_eh(num_scopes);
m_state.pop(num_scopes);
m_sgraph.pop(num_scopes);
unsigned new_sz = m_prop_lim[m_prop_lim.size() - num_scopes];
m_prop_queue.shrink(new_sz);
m_prop_lim.shrink(m_prop_lim.size() - num_scopes);
if (m_prop_qhead > m_prop_queue.size())
m_prop_qhead = m_prop_queue.size();
unsigned ho_sz = m_ho_lim[m_ho_lim.size() - num_scopes];
m_ho_terms.shrink(ho_sz);
m_ho_lim.shrink(m_ho_lim.size() - num_scopes);
m_num_unhandled_bool = m_unhandled_bool_lim[m_unhandled_bool_lim.size() - num_scopes];
m_unhandled_bool_lim.shrink(m_unhandled_bool_lim.size() - num_scopes);
}
// -----------------------------------------------------------------------
// Propagation: eager eq/diseq/literal dispatch
// -----------------------------------------------------------------------
bool theory_nseq::can_propagate() {
return m_prop_qhead < m_prop_queue.size();
}
void theory_nseq::propagate() {
context& ctx = get_context();
while (m_prop_qhead < m_prop_queue.size() && !ctx.inconsistent()) {
prop_item const& item = m_prop_queue[m_prop_qhead++];
switch (item.m_kind) {
case prop_item::eq_prop:
propagate_eq(item.m_idx);
break;
case prop_item::diseq_prop:
propagate_diseq(item.m_idx);
break;
case prop_item::pos_mem_prop:
propagate_pos_mem(item.m_idx);
break;
case prop_item::neg_mem_prop:
propagate_neg_mem(item.m_idx);
break;
}
}
}
void theory_nseq::propagate_eq(unsigned idx) {
// When s1 = s2 is learned, ensure len(s1) and len(s2) are
// internalized so congruence closure propagates len(s1) = len(s2).
eq_source const& src = m_state.get_eq_source(idx);
ensure_length_var(src.m_n1->get_expr());
ensure_length_var(src.m_n2->get_expr());
}
void theory_nseq::propagate_diseq(unsigned idx) {
// Disequalities are recorded for use during final_check.
// No eager propagation beyond recording.
TRACE(seq,
auto const& d = m_state.get_diseq(idx);
tout << "nseq diseq: "
<< mk_bounded_pp(d.m_n1->get_expr(), get_manager(), 3)
<< " != "
<< mk_bounded_pp(d.m_n2->get_expr(), get_manager(), 3) << "\n";);
}
void theory_nseq::propagate_pos_mem(unsigned idx) {
auto const& mem = m_state.str_mems()[idx];
auto const& src = m_state.get_mem_source(idx);
if (!mem.m_str || !mem.m_regex)
return;
// regex is ∅ → conflict
if (m_regex.is_empty_regex(mem.m_regex)) {
enode_pair_vector eqs;
literal_vector lits;
lits.push_back(src.m_lit);
set_conflict(eqs, lits);
return;
}
// empty string in non-nullable regex → conflict
if (mem.m_str->is_empty() && !mem.m_regex->is_nullable()) {
enode_pair_vector eqs;
literal_vector lits;
lits.push_back(src.m_lit);
set_conflict(eqs, lits);
return;
}
// ensure length term exists for the string argument
expr* s_expr = mem.m_str->get_expr();
if (s_expr)
ensure_length_var(s_expr);
}
void theory_nseq::propagate_neg_mem(unsigned idx) {
auto const& entry = m_state.get_neg_mem(idx);
if (!entry.m_str || !entry.m_regex)
return;
// ¬(s in Σ*) is always false → conflict
if (m_regex.is_full_regex(entry.m_regex)) {
enode_pair_vector eqs;
literal_vector lits;
lits.push_back(entry.m_lit);
set_conflict(eqs, lits);
return;
}
// ¬(ε in R) where R is nullable → conflict
if (entry.m_str->is_empty() && entry.m_regex->is_nullable()) {
enode_pair_vector eqs;
literal_vector lits;
lits.push_back(entry.m_lit);
set_conflict(eqs, lits);
return;
}
}
void theory_nseq::ensure_length_var(expr* e) {
if (!e || !m_seq.is_seq(e))
return;
context& ctx = get_context();
ast_manager& m = get_manager();
expr_ref len(m_seq.str.mk_length(e), m);
if (!ctx.e_internalized(len))
ctx.internalize(len, false);
}
// -----------------------------------------------------------------------
@ -107,30 +354,162 @@ namespace smt {
void theory_nseq::populate_nielsen_graph() {
m_nielsen.reset();
seq::nielsen_node* root = m_nielsen.mk_node();
m_nielsen.set_root(root);
for (auto const& eq : m_state.str_eqs())
root->add_str_eq(eq);
for (auto const& mem : m_state.str_mems())
root->add_str_mem(mem);
m_nielsen_to_state_mem.reset();
// transfer string equalities from state to nielsen graph root
for (auto const& eq : m_state.str_eqs()) {
m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs);
}
// transfer regex memberships, pre-processing through nseq_regex
// to consume ground prefixes via Brzozowski derivatives
for (unsigned state_idx = 0; state_idx < m_state.str_mems().size(); ++state_idx) {
auto const& mem = m_state.str_mems()[state_idx];
int triv = m_regex.check_trivial(mem);
if (triv > 0)
continue; // trivially satisfied, skip
if (triv < 0) {
// trivially unsat: add anyway so solve() detects conflict
m_nielsen.add_str_mem(mem.m_str, mem.m_regex);
m_nielsen_to_state_mem.push_back(state_idx);
continue;
}
// pre-process: consume ground prefix characters
vector<seq::str_mem> processed;
if (!m_regex.process_str_mem(mem, processed)) {
// conflict during ground prefix consumption
m_nielsen.add_str_mem(mem.m_str, mem.m_regex);
m_nielsen_to_state_mem.push_back(state_idx);
continue;
}
for (auto const& pm : processed) {
m_nielsen.add_str_mem(pm.m_str, pm.m_regex);
m_nielsen_to_state_mem.push_back(state_idx);
}
}
TRACE(seq, tout << "nseq populate: " << m_state.str_eqs().size() << " eqs, "
<< m_state.str_mems().size() << " mems -> nielsen root with "
<< m_nielsen.num_input_eqs() << " eqs, "
<< m_nielsen.num_input_mems() << " mems\n";);
}
final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) {
// Always assert non-negativity for all string theory vars,
// even when there are no string equations/memberships.
if (assert_nonneg_for_all_vars())
return FC_CONTINUE;
// If there are unhandled boolean string predicates (prefixof, contains, etc.)
// we cannot declare sat — return unknown.
if (has_unhandled_preds())
return FC_GIVEUP;
if (m_state.empty() && m_ho_terms.empty())
return FC_DONE;
// unfold higher-order terms when sequence structure is known
if (unfold_ho_terms())
return FC_CONTINUE;
if (m_state.empty())
return FC_DONE;
// For now, give up if there are string constraints.
// The full search will be wired in once the Nielsen algorithms are complete.
populate_nielsen_graph();
++m_num_nodes_explored;
// assert length constraints derived from string equalities
if (assert_length_constraints())
return FC_CONTINUE;
++m_num_final_checks;
auto result = m_nielsen.solve();
if (result == seq::nielsen_graph::search_result::sat) {
// Nielsen found a consistent assignment for positive constraints.
// If there are negative memberships or disequalities we haven't verified,
// we cannot soundly declare sat.
if (!m_state.neg_mems().empty() || !m_state.diseqs().empty())
return FC_GIVEUP;
return FC_DONE;
}
if (result == seq::nielsen_graph::search_result::unsat) {
explain_nielsen_conflict();
return FC_CONTINUE;
}
return FC_GIVEUP;
}
// -----------------------------------------------------------------------
// Conflict explanation
// -----------------------------------------------------------------------
void theory_nseq::deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits) {
context& ctx = get_context();
unsigned_vector bits;
deps.get_set_bits(bits);
unsigned num_input_eqs = m_nielsen.num_input_eqs();
for (unsigned b : bits) {
if (b < num_input_eqs) {
eq_source const& src = m_state.get_eq_source(b);
if (src.m_n1->get_root() == src.m_n2->get_root())
eqs.push_back({src.m_n1, src.m_n2});
}
else {
unsigned mem_idx = b - num_input_eqs;
if (mem_idx < m_nielsen_to_state_mem.size()) {
unsigned state_mem_idx = m_nielsen_to_state_mem[mem_idx];
mem_source const& src = m_state.get_mem_source(state_mem_idx);
if (ctx.get_assignment(src.m_lit) == l_true)
lits.push_back(src.m_lit);
}
}
}
}
void theory_nseq::add_conflict_clause(seq::dep_tracker const& deps) {
enode_pair_vector eqs;
literal_vector lits;
deps_to_lits(deps, eqs, lits);
++m_num_conflicts;
set_conflict(eqs, lits);
}
void theory_nseq::explain_nielsen_conflict() {
seq::dep_tracker deps;
m_nielsen.collect_conflict_deps(deps);
add_conflict_clause(deps);
}
void theory_nseq::set_conflict(enode_pair_vector const& eqs, literal_vector const& lits) {
context& ctx = get_context();
TRACE(seq, tout << "nseq conflict: " << eqs.size() << " eqs, " << lits.size() << " lits\n";);
ctx.set_conflict(
ctx.mk_justification(
ext_theory_conflict_justification(
get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data(), 0, nullptr)));
}
// -----------------------------------------------------------------------
// Model generation
// -----------------------------------------------------------------------
void theory_nseq::init_model(model_generator& /*mg*/) {
// stub no model assignment for now
void theory_nseq::init_model(model_generator& mg) {
m_model.init(mg, m_nielsen, m_state);
}
model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) {
return m_model.mk_value(n, mg);
}
void theory_nseq::finalize_model(model_generator& mg) {
m_model.finalize(mg);
}
void theory_nseq::validate_model(proto_model& mdl) {
m_model.validate_regex(m_state, mdl);
}
// -----------------------------------------------------------------------
@ -139,14 +518,47 @@ namespace smt {
void theory_nseq::collect_statistics(::statistics& st) const {
st.update("nseq conflicts", m_num_conflicts);
st.update("nseq nodes explored", m_num_nodes_explored);
st.update("nseq depth increases", m_num_depth_increases);
st.update("nseq final checks", m_num_final_checks);
st.update("nseq length axioms", m_num_length_axioms);
// Nielsen graph search metrics
auto const& ns = m_nielsen.stats();
st.update("nseq solve calls", ns.m_num_solve_calls);
st.update("nseq dfs nodes", ns.m_num_dfs_nodes);
st.update("nseq sat", ns.m_num_sat);
st.update("nseq unsat", ns.m_num_unsat);
st.update("nseq unknown", ns.m_num_unknown);
st.update("nseq simplify clash", ns.m_num_simplify_conflict);
st.update("nseq subsumptions", ns.m_num_subsumptions);
st.update("nseq extensions", ns.m_num_extensions);
st.update("nseq fresh vars", ns.m_num_fresh_vars);
st.update("nseq max depth", ns.m_max_depth);
// modifier breakdown
st.update("nseq mod det", ns.m_mod_det);
st.update("nseq mod power epsilon", ns.m_mod_power_epsilon);
st.update("nseq mod num cmp", ns.m_mod_num_cmp);
st.update("nseq mod const num unwind", ns.m_mod_const_num_unwinding);
st.update("nseq mod eq split", ns.m_mod_eq_split);
st.update("nseq mod star intr", ns.m_mod_star_intr);
st.update("nseq mod gpower intr", ns.m_mod_gpower_intr);
st.update("nseq mod const nielsen", ns.m_mod_const_nielsen);
st.update("nseq mod regex char", ns.m_mod_regex_char_split);
st.update("nseq mod regex var", ns.m_mod_regex_var_split);
st.update("nseq mod power split", ns.m_mod_power_split);
st.update("nseq mod var nielsen", ns.m_mod_var_nielsen);
st.update("nseq mod var num unwind", ns.m_mod_var_num_unwinding);
st.update("nseq ho unfolds", m_num_ho_unfolds);
}
void theory_nseq::display(std::ostream& out) const {
out << "theory_nseq\n";
out << " str_eqs: " << m_state.str_eqs().size() << "\n";
out << " str_mems: " << m_state.str_mems().size() << "\n";
out << " str_eqs: " << m_state.str_eqs().size() << "\n";
out << " str_mems: " << m_state.str_mems().size() << "\n";
out << " diseqs: " << m_state.diseqs().size() << "\n";
out << " neg_mems: " << m_state.neg_mems().size() << "\n";
out << " prop_queue: " << m_prop_qhead << "/" << m_prop_queue.size() << "\n";
out << " ho_terms: " << m_ho_terms.size() << "\n";
}
// -----------------------------------------------------------------------
@ -157,6 +569,129 @@ namespace smt {
return alloc(theory_nseq, *ctx);
}
// -----------------------------------------------------------------------
// Higher-order term unfolding (seq.map, seq.foldl, etc.)
// -----------------------------------------------------------------------
bool theory_nseq::unfold_ho_terms() {
if (m_ho_terms.empty())
return false;
context& ctx = get_context();
ast_manager& m = get_manager();
bool progress = false;
unsigned sz = m_ho_terms.size();
for (unsigned i = 0; i < sz; ++i) {
app* term = m_ho_terms[i];
expr* f = nullptr, *s = nullptr, *b = nullptr, *idx = nullptr;
if (!m_seq.str.is_map(term, f, s) &&
!m_seq.str.is_mapi(term, f, idx, s) &&
!m_seq.str.is_foldl(term, f, b, s) &&
!m_seq.str.is_foldli(term, f, idx, b, s))
continue;
if (!ctx.e_internalized(s))
continue;
// Find a structural representative in s's equivalence class
enode* s_root = ctx.get_enode(s)->get_root();
expr* repr = nullptr;
enode* curr = s_root;
do {
expr* e = curr->get_expr();
expr *a1, *a2;
if (m_seq.str.is_empty(e) ||
m_seq.str.is_unit(e, a1) ||
m_seq.str.is_concat(e, a1, a2)) {
repr = e;
break;
}
curr = curr->get_next();
} while (curr != s_root);
if (!repr)
continue;
// Build ho_term with structural seq arg, then rewrite
expr_ref ho_repr(m);
if (m_seq.str.is_map(term))
ho_repr = m_seq.str.mk_map(f, repr);
else if (m_seq.str.is_mapi(term))
ho_repr = m_seq.str.mk_mapi(f, idx, repr);
else if (m_seq.str.is_foldl(term))
ho_repr = m_seq.str.mk_foldl(f, b, repr);
else
ho_repr = m_seq.str.mk_foldli(f, idx, b, repr);
expr_ref rewritten(m);
br_status st = m_rewriter.mk_app_core(
to_app(ho_repr)->get_decl(),
to_app(ho_repr)->get_num_args(),
to_app(ho_repr)->get_args(),
rewritten);
if (st == BR_FAILED)
continue;
// Internalize both the structural ho_term and its rewrite
if (!ctx.e_internalized(ho_repr))
ctx.internalize(ho_repr, false);
if (!ctx.e_internalized(rewritten))
ctx.internalize(rewritten, false);
enode* ho_en = ctx.get_enode(ho_repr);
enode* res_en = ctx.get_enode(rewritten);
if (ho_en->get_root() == res_en->get_root())
continue;
// Assert tautological axiom: ho_repr = rewritten
// Congruence closure merges map(f,s) with map(f,repr)
// since s = repr in the E-graph.
expr_ref eq_expr(m.mk_eq(ho_repr, rewritten), m);
if (!ctx.b_internalized(eq_expr))
ctx.internalize(eq_expr, true);
literal eq_lit = ctx.get_literal(eq_expr);
if (ctx.get_assignment(eq_lit) != l_true) {
ctx.mk_th_axiom(get_id(), 1, &eq_lit);
TRACE(seq, tout << "nseq ho unfold: "
<< mk_bounded_pp(ho_repr, m, 3) << " = "
<< mk_bounded_pp(rewritten, m, 3) << "\n";);
++m_num_ho_unfolds;
progress = true;
}
}
// For map/mapi: propagate length preservation
for (unsigned i = 0; i < sz; ++i) {
app* term = m_ho_terms[i];
expr* f = nullptr, *s = nullptr, *idx = nullptr;
bool is_map = m_seq.str.is_map(term, f, s);
bool is_mapi = !is_map && m_seq.str.is_mapi(term, f, idx, s);
if (!is_map && !is_mapi)
continue;
if (!m_seq.is_seq(term))
continue;
// len(map(f, s)) = len(s)
expr_ref len_map(m_seq.str.mk_length(term), m);
expr_ref len_s(m_seq.str.mk_length(s), m);
expr_ref len_eq(m.mk_eq(len_map, len_s), m);
if (!ctx.b_internalized(len_eq))
ctx.internalize(len_eq, true);
literal len_lit = ctx.get_literal(len_eq);
if (ctx.get_assignment(len_lit) != l_true) {
ctx.mk_th_axiom(get_id(), 1, &len_lit);
++m_num_length_axioms;
progress = true;
}
}
return progress;
}
// -----------------------------------------------------------------------
// Helpers
// -----------------------------------------------------------------------
@ -168,4 +703,136 @@ namespace smt {
return s;
}
// -----------------------------------------------------------------------
// Arithmetic value queries
// -----------------------------------------------------------------------
bool theory_nseq::get_num_value(expr* e, rational& val) const {
return m_arith_value.get_value_equiv(e, val) && val.is_int();
}
bool theory_nseq::lower_bound(expr* e, rational& lo) const {
bool is_strict = true;
return m_arith_value.get_lo(e, lo, is_strict) && !is_strict && lo.is_int();
}
bool theory_nseq::upper_bound(expr* e, rational& hi) const {
bool is_strict = true;
return m_arith_value.get_up(e, hi, is_strict) && !is_strict && hi.is_int();
}
bool theory_nseq::get_length(expr* e, rational& val) {
ast_manager& m = get_manager();
rational val1;
expr* e1 = nullptr;
expr* e2 = nullptr;
ptr_vector<expr> todo;
todo.push_back(e);
val.reset();
zstring s;
while (!todo.empty()) {
expr* c = todo.back();
todo.pop_back();
if (m_seq.str.is_concat(c, e1, e2)) {
todo.push_back(e1);
todo.push_back(e2);
}
else if (m_seq.str.is_unit(c))
val += rational(1);
else if (m_seq.str.is_empty(c))
continue;
else if (m_seq.str.is_string(c, s))
val += rational(s.length());
else {
expr_ref len(m_seq.str.mk_length(c), m);
if (m_arith_value.get_value(len, val1) && !val1.is_neg())
val += val1;
else
return false;
}
}
return val.is_int();
}
void theory_nseq::add_length_axiom(literal lit) {
context& ctx = get_context();
ctx.mark_as_relevant(lit);
ctx.mk_th_axiom(get_id(), 1, &lit);
++m_num_length_axioms;
}
bool theory_nseq::propagate_length_lemma(literal lit, seq::length_constraint const& lc) {
context& ctx = get_context();
ast_manager& m = get_manager();
// unconditional constraints: assert as theory axiom
if (lc.m_kind == seq::length_kind::nonneg) {
add_length_axiom(lit);
return true;
}
// conditional constraints: propagate with justification from dep_tracker
enode_pair_vector eqs;
literal_vector lits;
deps_to_lits(lc.m_dep, eqs, lits);
ctx.mark_as_relevant(lit);
justification* js = ctx.mk_justification(
ext_theory_propagation_justification(
get_id(), ctx,
lits.size(), lits.data(),
eqs.size(), eqs.data(),
lit));
ctx.assign(lit, js);
TRACE(seq, tout << "nseq length propagation: " << mk_pp(lc.m_expr, m)
<< " (" << eqs.size() << " eqs, " << lits.size() << " lits)\n";);
++m_num_length_axioms;
return true;
}
bool theory_nseq::assert_nonneg_for_all_vars() {
ast_manager& m = get_manager();
context& ctx = get_context();
arith_util arith(m);
bool new_axiom = false;
unsigned nv = get_num_vars();
for (unsigned v = 0; v < nv; ++v) {
expr* e = get_enode(v)->get_expr();
if (!m_seq.is_seq(e))
continue;
expr_ref len_var(m_seq.str.mk_length(e), m);
expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m);
if (!ctx.b_internalized(ge_zero))
ctx.internalize(ge_zero, true);
literal lit = ctx.get_literal(ge_zero);
if (ctx.get_assignment(lit) != l_true) {
add_length_axiom(lit);
new_axiom = true;
}
}
return new_axiom;
}
bool theory_nseq::assert_length_constraints() {
ast_manager& m = get_manager();
context& ctx = get_context();
vector<seq::length_constraint> constraints;
m_nielsen.generate_length_constraints(constraints);
bool new_axiom = false;
for (auto const& lc : constraints) {
expr* e = lc.m_expr;
if (!ctx.b_internalized(e))
ctx.internalize(e, true);
literal lit = ctx.get_literal(e);
if (ctx.get_assignment(lit) != l_true) {
TRACE(seq, tout << "nseq length lemma: " << mk_pp(e, m) << "\n";);
propagate_length_lemma(lit, lc);
new_axiom = true;
}
}
return new_axiom;
}
}

View file

@ -35,42 +35,99 @@ namespace smt {
seq_util m_seq;
arith_util m_autil;
seq_rewriter m_rewriter;
arith_value m_arith_value;
euf::egraph m_egraph; // private egraph (not shared with smt context)
euf::sgraph m_sgraph; // private sgraph
seq::nielsen_graph m_nielsen;
nseq_state m_state;
nseq_regex m_regex; // regex membership pre-processing
nseq_model m_model; // model construction helper
// propagation queue
struct prop_item {
enum kind_t { eq_prop, diseq_prop, pos_mem_prop, neg_mem_prop } m_kind;
unsigned m_idx;
};
svector<prop_item> m_prop_queue;
unsigned m_prop_qhead = 0;
unsigned_vector m_prop_lim; // saved queue sizes for push/pop
// statistics
unsigned m_num_conflicts = 0;
unsigned m_num_nodes_explored = 0;
unsigned m_num_depth_increases = 0;
unsigned m_num_final_checks = 0;
unsigned m_num_length_axioms = 0;
// map from context enode to private sgraph snode
obj_map<expr, euf::snode*> m_expr2snode;
// mapping from nielsen mem index to state mem index
// (populated during populate_nielsen_graph, used in deps_to_lits)
unsigned_vector m_nielsen_to_state_mem;
// higher-order terms (seq.map, seq.mapi, seq.foldl, seq.foldli)
ptr_vector<app> m_ho_terms;
unsigned_vector m_ho_lim; // push/pop limits for m_ho_terms
unsigned m_num_ho_unfolds = 0;
// unhandled boolean string predicates (prefixof, suffixof, contains, etc.)
unsigned m_num_unhandled_bool = 0;
unsigned_vector m_unhandled_bool_lim;
bool has_unhandled_preds() const { return m_num_unhandled_bool > 0; }
// required virtual methods
bool internalize_atom(app* a, bool gate_ctx) override;
bool internalize_term(app* term) override;
theory_var mk_var(enode* n) override;
void new_eq_eh(theory_var v1, theory_var v2) override;
void new_diseq_eh(theory_var v1, theory_var v2) override;
theory* mk_fresh(context* ctx) override;
void display(std::ostream& out) const override;
// optional overrides
bool can_propagate() override { return false; }
void propagate() override {}
bool can_propagate() override;
void propagate() override;
void init() override;
void assign_eh(bool_var v, bool is_true) override;
final_check_status final_check_eh(unsigned) override;
void push_scope_eh() override;
void pop_scope_eh(unsigned num_scopes) override;
void init_model(model_generator& mg) override;
model_value_proc* mk_value(enode* n, model_generator& mg) override;
void finalize_model(model_generator& mg) override;
void validate_model(proto_model& mdl) override;
void collect_statistics(::statistics& st) const override;
char const* get_name() const override { return "nseq"; }
// private helpers
void populate_nielsen_graph();
void explain_nielsen_conflict();
void deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits);
void add_conflict_clause(seq::dep_tracker const& deps);
void set_conflict(enode_pair_vector const& eqs, literal_vector const& lits);
euf::snode* get_snode(expr* e);
// propagation dispatch helpers
void propagate_eq(unsigned idx);
void propagate_diseq(unsigned idx);
void propagate_pos_mem(unsigned idx);
void propagate_neg_mem(unsigned idx);
void ensure_length_var(expr* e);
// higher-order term unfolding
bool unfold_ho_terms();
// arithmetic value queries for length reasoning
bool get_num_value(expr* e, rational& val) const;
bool lower_bound(expr* e, rational& lo) const;
bool upper_bound(expr* e, rational& hi) const;
bool get_length(expr* e, rational& val);
void add_length_axiom(literal lit);
bool propagate_length_lemma(literal lit, seq::length_constraint const& lc);
bool assert_nonneg_for_all_vars();
bool assert_length_constraints();
public:
theory_nseq(context& ctx);
};

View file

@ -133,6 +133,7 @@ add_executable(test-z3
sls_seq_plugin.cpp
seq_nielsen.cpp
nseq_basic.cpp
nseq_regex.cpp
small_object_allocator.cpp
smt2print_parse.cpp
smt_context.cpp

View file

@ -288,6 +288,7 @@ int main(int argc, char ** argv) {
TST(sls_seq_plugin);
TST(seq_nielsen);
TST(nseq_basic);
TST(nseq_regex);
TST(ho_matcher);
TST(finite_set);
TST(finite_set_rewriter);

View file

@ -100,10 +100,119 @@ static void test_nseq_node_satisfied() {
std::cout << " ok\n";
}
// Test 5: symbol clash conflict ("a" = "b" is unsat)
static void test_nseq_symbol_clash() {
std::cout << "test_nseq_symbol_clash\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq::nielsen_graph ng(sg);
euf::snode* a = sg.mk_char('a');
euf::snode* b = sg.mk_char('b');
ng.add_str_eq(a, b);
auto r = ng.solve();
SASSERT(r == seq::nielsen_graph::search_result::unsat);
// verify conflict explanation returns the equality index
unsigned_vector eq_idx, mem_idx;
ng.explain_conflict(eq_idx, mem_idx);
SASSERT(eq_idx.size() == 1);
SASSERT(eq_idx[0] == 0);
SASSERT(mem_idx.empty());
std::cout << " ok: symbol clash detected as unsat\n";
}
// Test 6: variable equality x = x is sat
static void test_nseq_var_eq_self() {
std::cout << "test_nseq_var_eq_self\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq::nielsen_graph ng(sg);
euf::snode* x = sg.mk_var(symbol("x"));
ng.add_str_eq(x, x);
auto r = ng.solve();
SASSERT(r == seq::nielsen_graph::search_result::sat);
std::cout << " ok: x = x solved as sat\n";
}
// Test 7: x·a = x·b is unsat (prefix match then clash)
static void test_nseq_prefix_clash() {
std::cout << "test_nseq_prefix_clash\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq::nielsen_graph ng(sg);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* a = sg.mk_char('a');
euf::snode* b = sg.mk_char('b');
euf::snode* xa = sg.mk_concat(x, a);
euf::snode* xb = sg.mk_concat(x, b);
ng.add_str_eq(xa, xb);
auto r = ng.solve();
SASSERT(r == seq::nielsen_graph::search_result::unsat);
std::cout << " ok: x·a = x·b detected as unsat\n";
}
// Test 8: a·x = a·y has solutions (not unsat)
static void test_nseq_const_nielsen_solvable() {
std::cout << "test_nseq_const_nielsen_solvable\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq::nielsen_graph ng(sg);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* a = sg.mk_char('a');
euf::snode* ax = sg.mk_concat(a, x);
euf::snode* ay = sg.mk_concat(a, y);
ng.add_str_eq(ax, ay);
auto r = ng.solve();
// a·x = a·y simplifies to x = y which is satisfiable (x = y = ε)
SASSERT(r == seq::nielsen_graph::search_result::sat);
std::cout << " ok: a·x = a·y solved as sat\n";
}
// Test 9: length mismatch - "ab" = "a" is unsat
static void test_nseq_length_mismatch() {
std::cout << "test_nseq_length_mismatch\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq::nielsen_graph ng(sg);
euf::snode* a = sg.mk_char('a');
euf::snode* b = sg.mk_char('b');
euf::snode* ab = sg.mk_concat(a, b);
ng.add_str_eq(ab, a);
auto r = ng.solve();
SASSERT(r == seq::nielsen_graph::search_result::unsat);
std::cout << " ok: ab = a detected as unsat\n";
}
void tst_nseq_basic() {
test_nseq_instantiation();
test_nseq_param_validation();
test_nseq_simplification();
test_nseq_node_satisfied();
test_nseq_symbol_clash();
test_nseq_var_eq_self();
test_nseq_prefix_clash();
test_nseq_const_nielsen_solvable();
test_nseq_length_mismatch();
std::cout << "nseq_basic: all tests passed\n";
}

File diff suppressed because it is too large Load diff