3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 05:44:51 +00:00

Implement ZIPT string solver skeleton (theory_nseq)

Add theory_nseq, a Nielsen-graph-based string solver plugin for Z3.

## New files
- src/smt/nseq_state.h/.cpp: constraint store bridging SMT context to
  Nielsen graph with manual push/pop backtracking
- src/smt/nseq_regex.h/.cpp: regex membership handling via Brzozowski
  derivatives (stub delegates to sgraph::brzozowski_deriv)
- src/smt/nseq_model.h/.cpp: model generation stub
- src/smt/theory_nseq.h/.cpp: main theory class implementing smt::theory
  with its own private egraph/sgraph, returns FC_GIVEUP as skeleton
- src/test/nseq_basic.cpp: unit tests covering instantiation, parameter
  validation, trivial-equality SAT, and node simplification

## Extensions to seq_nielsen.h/.cpp
- Add search_result enum and solve() iterative-deepening DFS entry point
- Add search_dfs() recursive DFS driver
- Add simplify_node(), generate_extensions(), collect_conflict_deps()
- Add nielsen_node::simplify_and_init(): trivial removal, empty
  propagation, prefix matching, symbol clash detection
- Add nielsen_node::is_satisfied(), is_subsumed_by()
- Implement Det, Const Nielsen, and Eq-split modifiers in
  generate_extensions()

## Integration
- smt_params.cpp: accept 'nseq' as valid string_solver value
- smt_params_helper.pyg: document 'nseq' option
- smt_setup.h/.cpp: add setup_nseq(), wire into setup_QF_S() and
  setup_seq_str()
- smt/CMakeLists.txt: add new sources and smt_seq dependency

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-03-03 21:50:21 +00:00
parent a7084de5a8
commit 0bdec633d7
18 changed files with 997 additions and 6 deletions

View file

@ -187,9 +187,9 @@ void smt_params::display(std::ostream & out) const {
}
void smt_params::validate_string_solver(symbol const& s) const {
if (s == "z3str3" || s == "seq" || s == "empty" || s == "auto" || s == "none")
if (s == "z3str3" || s == "seq" || s == "empty" || s == "auto" || s == "none" || s == "nseq")
return;
throw default_exception("Invalid string solver value. Legal values are z3str3, seq, empty, auto, none");
throw default_exception("Invalid string solver value. Legal values are z3str3, seq, empty, auto, none, nseq");
}
void smt_params::setup_QF_UF() {

View file

@ -122,7 +122,7 @@ def_module_params(module_name='smt',
('dack.gc_inv_decay', DOUBLE, 0.8, 'Dynamic ackermannization garbage collection decay'),
('dack.threshold', UINT, 10, ' number of times the congruence rule must be used before Leibniz\'s axiom is expanded'),
('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'),
('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver)'),
('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver), \'nseq\' (Nielsen-based string solver)'),
('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'),
('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'),
('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'),

View file

@ -64,6 +64,10 @@ z3_add_component(smt
theory_fpa.cpp
theory_intblast.cpp
theory_lra.cpp
theory_nseq.cpp
nseq_state.cpp
nseq_regex.cpp
nseq_model.cpp
theory_opt.cpp
theory_pb.cpp
theory_recfun.cpp
@ -91,4 +95,5 @@ z3_add_component(smt
proto_model
simplex
substitution
smt_seq
)

17
src/smt/nseq_model.cpp Normal file
View file

@ -0,0 +1,17 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_model.cpp
Abstract:
Implementation of nseq_model.
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#include "smt/nseq_model.h"

72
src/smt/nseq_model.h Normal file
View file

@ -0,0 +1,72 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_model.h
Abstract:
Model generation from solved Nielsen graph.
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#pragma once
#include "ast/ast.h"
#include "ast/seq_decl_plugin.h"
#include "util/zstring.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
#include <vector>
#include <utility>
namespace smt {
class nseq_model {
ast_manager& m;
seq_util m_seq;
euf::sgraph& m_sg;
unsigned m_fresh_counter = 0;
public:
nseq_model(ast_manager& m, euf::sgraph& sg) : m(m), m_seq(m), m_sg(sg) {}
// generate a fresh string value (used when a variable is unconstrained)
expr_ref mk_fresh_value() {
std::string name = "s!" + std::to_string(m_fresh_counter++);
zstring zs(name.c_str());
return expr_ref(m_seq.str.mk_string(zs), m);
}
// extract variable assignments from a satisfied leaf node
// Returns true if all variables got a valid assignment
bool extract_assignments(seq::nielsen_node* node,
std::vector<std::pair<euf::snode*, expr*>>& assignment) {
if (!node)
return false;
for (auto const& eq : node->str_eqs()) {
if (!eq.m_lhs || !eq.m_rhs)
continue;
if (eq.m_lhs->is_var() && eq.m_rhs->get_expr()) {
assignment.emplace_back(eq.m_lhs, eq.m_rhs->get_expr());
}
else if (eq.m_rhs->is_var() && eq.m_lhs->get_expr()) {
assignment.emplace_back(eq.m_rhs, eq.m_lhs->get_expr());
}
}
return true;
}
// validate that a regex membership constraint is satisfied by the assignment
bool validate_regex(seq::str_mem const& mem,
obj_map<euf::snode, expr*> const& assignment) {
// stub: assume valid for now
return true;
}
};
}

17
src/smt/nseq_regex.cpp Normal file
View file

@ -0,0 +1,17 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_regex.cpp
Abstract:
Implementation of nseq_regex.
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#include "smt/nseq_regex.h"

66
src/smt/nseq_regex.h Normal file
View file

@ -0,0 +1,66 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_regex.h
Abstract:
Regex membership handling using Brzozowski derivatives.
Processes str_mem constraints after character consumption.
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#pragma once
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
namespace smt {
class nseq_regex {
euf::sgraph& m_sg;
public:
nseq_regex(euf::sgraph& sg) : m_sg(sg) {}
// check if a regex snode represents the empty language
bool is_empty_regex(euf::snode* re) const {
return re && re->is_fail();
}
// compute derivative of regex re with respect to char elem and
// return a new str_mem for the resulting constraint
seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) {
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem);
euf::snode* new_str = m_sg.drop_first(mem.m_str);
return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep);
}
// process a regex membership constraint after one character has been consumed
// returns false if the resulting regex is empty (conflict)
bool process_str_mem(seq::str_mem const& mem,
vector<seq::str_mem>& out_mems) {
if (!mem.m_str || !mem.m_regex)
return true;
// if regex does not accept the empty string and the string side is empty, conflict
if (mem.m_str->is_empty()) {
return mem.m_regex->is_nullable();
}
// compute minterms for the regex
euf::snode_vector minterms;
m_sg.compute_minterms(mem.m_regex, minterms);
for (euf::snode* ch : minterms) {
seq::str_mem new_mem = derive(mem, ch);
if (!is_empty_regex(new_mem.m_regex))
out_mems.push_back(new_mem);
}
return true;
}
};
}

17
src/smt/nseq_state.cpp Normal file
View file

@ -0,0 +1,17 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_state.cpp
Abstract:
Implementation of nseq_state.
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#include "smt/nseq_state.h"

75
src/smt/nseq_state.h Normal file
View file

@ -0,0 +1,75 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_state.h
Abstract:
Constraint store bridging the SMT context to the Nielsen graph.
Holds word equations and regex membership constraints with
push/pop support for backtracking.
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#pragma once
#include "util/vector.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
namespace smt {
class nseq_state {
euf::sgraph& m_sg;
vector<seq::str_eq> m_str_eqs;
vector<seq::str_mem> m_str_mems;
unsigned_vector m_str_eq_lim;
unsigned_vector m_str_mem_lim;
unsigned m_next_mem_id = 0;
public:
nseq_state(euf::sgraph& sg) : m_sg(sg) {}
void push() {
m_str_eq_lim.push_back(m_str_eqs.size());
m_str_mem_lim.push_back(m_str_mems.size());
}
void pop(unsigned n) {
for (unsigned i = 0; i < n; ++i) {
m_str_eqs.shrink(m_str_eq_lim.back());
m_str_eq_lim.pop_back();
m_str_mems.shrink(m_str_mem_lim.back());
m_str_mem_lim.pop_back();
}
}
void add_str_eq(euf::snode* lhs, euf::snode* rhs) {
seq::dep_tracker dep;
m_str_eqs.push_back(seq::str_eq(lhs, rhs, dep));
}
void add_str_mem(euf::snode* str, euf::snode* regex) {
seq::dep_tracker dep;
m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep));
}
vector<seq::str_eq> const& str_eqs() const { return m_str_eqs; }
vector<seq::str_mem> const& str_mems() const { return m_str_mems; }
bool empty() const { return m_str_eqs.empty() && m_str_mems.empty(); }
void reset() {
m_str_eqs.reset();
m_str_mems.reset();
m_str_eq_lim.reset();
m_str_mem_lim.reset();
}
};
}

View file

@ -308,4 +308,325 @@ namespace seq {
return out;
}
// -----------------------------------------------------------------------
// nielsen_node: simplify_and_init
// -----------------------------------------------------------------------
simplify_result nielsen_node::simplify_and_init(nielsen_graph& g) {
euf::sgraph& sg = g.sg();
bool changed = true;
while (changed) {
changed = false;
// pass 1: remove trivially satisfied equalities
unsigned wi = 0;
for (unsigned i = 0; i < m_str_eq.size(); ++i) {
str_eq& eq = m_str_eq[i];
if (eq.is_trivial())
continue;
m_str_eq[wi++] = eq;
}
if (wi < m_str_eq.size()) {
m_str_eq.shrink(wi);
changed = true;
}
// pass 2: detect symbol clashes and empty-propagation
for (str_eq& eq : m_str_eq) {
if (!eq.m_lhs || !eq.m_rhs)
continue;
// both sides start with a concrete character: check match
if (eq.m_lhs->is_char() && eq.m_rhs->is_char()) {
if (eq.m_lhs->id() != eq.m_rhs->id()) {
// symbol clash
m_is_general_conflict = true;
m_reason = backtrack_reason::symbol_clash;
return simplify_result::conflict;
}
// same char: drop from both sides
eq.m_lhs = sg.drop_first(eq.m_lhs);
eq.m_rhs = sg.drop_first(eq.m_rhs);
changed = true;
continue;
}
// one side empty, the other not empty => conflict or substitution
if (eq.m_lhs->is_empty() && !eq.m_rhs->is_empty()) {
// rhs must also be empty; if it is a concrete non-empty string => conflict
if (eq.m_rhs->is_char() || eq.m_rhs->is_concat()) {
// check if rhs has any non-variable tokens
euf::snode_vector tokens;
eq.m_rhs->collect_tokens(tokens);
bool all_vars = true;
for (euf::snode* t : tokens)
if (!t->is_var()) { all_vars = false; break; }
if (!all_vars) {
m_is_general_conflict = true;
m_reason = backtrack_reason::symbol_clash;
return simplify_result::conflict;
}
// substitute: every variable in rhs -> empty
for (euf::snode* t : tokens) {
if (t->is_var()) {
nielsen_subst s(t, sg.mk_empty(), eq.m_dep);
apply_subst(sg, s);
changed = true;
}
}
}
continue;
}
if (eq.m_rhs->is_empty() && !eq.m_lhs->is_empty()) {
euf::snode_vector tokens;
eq.m_lhs->collect_tokens(tokens);
bool all_vars = true;
for (euf::snode* t : tokens)
if (!t->is_var()) { all_vars = false; break; }
if (!all_vars) {
m_is_general_conflict = true;
m_reason = backtrack_reason::symbol_clash;
return simplify_result::conflict;
}
for (euf::snode* t : tokens) {
if (t->is_var()) {
nielsen_subst s(t, sg.mk_empty(), eq.m_dep);
apply_subst(sg, s);
changed = true;
}
}
continue;
}
// prefix matching: lhs and rhs both start with the same char => cancel
{
euf::snode_vector lhs_toks, rhs_toks;
eq.m_lhs->collect_tokens(lhs_toks);
eq.m_rhs->collect_tokens(rhs_toks);
unsigned prefix = 0;
while (prefix < lhs_toks.size() && prefix < rhs_toks.size() &&
lhs_toks[prefix]->is_char() && rhs_toks[prefix]->is_char()) {
if (lhs_toks[prefix]->id() != rhs_toks[prefix]->id()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::symbol_clash;
return simplify_result::conflict;
}
++prefix;
}
if (prefix > 0) {
eq.m_lhs = sg.drop_left(eq.m_lhs, prefix);
eq.m_rhs = sg.drop_left(eq.m_rhs, prefix);
changed = true;
}
}
}
}
// check for regex memberships that are immediately infeasible
for (str_mem& mem : m_str_mem) {
if (!mem.m_str || !mem.m_regex)
continue;
if (mem.m_str->is_empty() && !mem.m_regex->is_nullable()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::regex;
return simplify_result::conflict;
}
}
if (is_satisfied())
return simplify_result::satisfied;
return simplify_result::proceed;
}
bool nielsen_node::is_satisfied() const {
for (str_eq const& eq : m_str_eq)
if (!eq.is_trivial()) return false;
return m_str_mem.empty();
}
bool nielsen_node::is_subsumed_by(nielsen_node const& other) const {
// check if every constraint in 'other' also appears in 'this'
for (str_eq const& oeq : other.m_str_eq) {
bool found = false;
for (str_eq const& teq : m_str_eq)
if (teq == oeq) { found = true; break; }
if (!found) return false;
}
for (str_mem const& omem : other.m_str_mem) {
bool found = false;
for (str_mem const& tmem : m_str_mem)
if (tmem == omem) { found = true; break; }
if (!found) return false;
}
return true;
}
// -----------------------------------------------------------------------
// nielsen_graph: search
// -----------------------------------------------------------------------
nielsen_graph::search_result nielsen_graph::solve() {
if (!m_root)
return search_result::sat;
m_depth_bound = 10;
for (unsigned iter = 0; iter < 6; ++iter, m_depth_bound *= 2) {
inc_run_idx();
search_result r = search_dfs(m_root, 0);
if (r != search_result::unknown)
return r;
// depth limit hit increase bound
}
return search_result::unknown;
}
nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, unsigned depth) {
simplify_result sr = node->simplify_and_init(*this);
if (sr == simplify_result::conflict)
return search_result::unsat;
if (sr == simplify_result::satisfied || node->is_satisfied())
return search_result::sat;
if (depth >= m_depth_bound)
return search_result::unknown;
if (!generate_extensions(node, depth))
return search_result::unsat;
bool any_unknown = false;
for (nielsen_edge* e : node->outgoing()) {
nielsen_node* child = e->tgt();
search_result r = search_dfs(child, depth + 1);
if (r == search_result::sat)
return search_result::sat;
if (r == search_result::unknown)
any_unknown = true;
}
return any_unknown ? search_result::unknown : search_result::unsat;
}
simplify_result nielsen_graph::simplify_node(nielsen_node* node) {
return node->simplify_and_init(*this);
}
bool nielsen_graph::generate_extensions(nielsen_node* node, unsigned /*depth*/) {
// find the first non-trivial string equality to split on
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial())
continue;
if (!eq.m_lhs || !eq.m_rhs)
continue;
euf::snode_vector lhs_toks, rhs_toks;
eq.m_lhs->collect_tokens(lhs_toks);
eq.m_rhs->collect_tokens(rhs_toks);
if (lhs_toks.empty() || rhs_toks.empty())
continue;
euf::snode* lhead = lhs_toks[0];
euf::snode* rhead = rhs_toks[0];
// Det modifier: if one side starts with a variable whose other side is empty
if (lhead->is_var() && eq.m_rhs->is_empty()) {
// substitute lhead -> empty
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
return true;
}
if (rhead->is_var() && eq.m_lhs->is_empty()) {
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
return true;
}
// Const Nielsen modifier: lhs starts with char, rhs starts with var
// -> substitute rhs_var = char . fresh_var
if (lhead->is_char() && rhead->is_var()) {
symbol fresh_name(("v!" + std::to_string(node->id())).c_str());
euf::snode* fresh = m_sg.mk_var(fresh_name);
euf::snode* replacement = m_sg.mk_concat(lhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(rhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
return true;
}
if (rhead->is_char() && lhead->is_var()) {
symbol fresh_name(("v!" + std::to_string(node->id())).c_str());
euf::snode* fresh = m_sg.mk_var(fresh_name);
euf::snode* replacement = m_sg.mk_concat(rhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
return true;
}
// Eq split modifier: both sides start with variables x.A = y.B
// Produce three children:
// 1) x = eps, A = y.B
// 2) x = y, A = B (when len(x) = len(y))
// 3) y = eps, x.A = B
if (lhead->is_var() && rhead->is_var()) {
// child 1: lhead -> eps
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// child 2: lhead = rhead (if different vars, substitute lhead -> rhead)
if (lhead->id() != rhead->id()) {
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(lhead, rhead, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// child 3: rhead -> eps
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
// no applicable modifier for this equality; try next
}
// no extension was generated
return false;
}
void nielsen_graph::collect_conflict_deps(dep_tracker& deps) const {
for (nielsen_node const* n : m_nodes) {
if (!n->is_currently_conflict())
continue;
for (str_eq const& eq : n->str_eqs())
deps.merge(eq.m_dep);
for (str_mem const& mem : n->str_mems())
deps.merge(mem.m_dep);
}
}
}

View file

@ -459,6 +459,16 @@ namespace seq {
// apply a substitution to all constraints
void apply_subst(euf::sgraph& sg, nielsen_subst const& s);
// simplify all constraints at this node and initialize status.
// Returns proceed, conflict, satisfied, or restart.
simplify_result simplify_and_init(nielsen_graph& g);
// true if all str_eqs are trivial and there are no str_mems
bool is_satisfied() const;
// true if other's constraint set is a subset of this node's
bool is_subsumed_by(nielsen_node const& other) const;
};
// the overall Nielsen transformation graph
@ -514,6 +524,25 @@ namespace seq {
// reset all nodes and state
void reset();
// search result returned by solve() / search_dfs()
enum class search_result { sat, unsat, unknown };
// main search entry point: iterative deepening DFS
search_result solve();
// simplify a node's constraints (delegate to node)
simplify_result simplify_node(nielsen_node* node);
// generate child nodes by applying modifier rules
// returns true if at least one child was generated
bool generate_extensions(nielsen_node* node, unsigned depth);
// collect dependency information from conflicting constraints
void collect_conflict_deps(dep_tracker& deps) const;
private:
search_result search_dfs(nielsen_node* node, unsigned depth);
};
}

View file

@ -35,6 +35,7 @@ Revision History:
#include "smt/theory_seq_empty.h"
#include "smt/theory_seq.h"
#include "smt/theory_char.h"
#include "smt/theory_nseq.h"
#include "smt/theory_special_relations.h"
#include "smt/theory_sls.h"
#include "smt/theory_pb.h"
@ -571,7 +572,9 @@ namespace smt {
else if (m_params.m_string_solver == "auto") {
setup_unknown();
}
else if (m_params.m_string_solver == "nseq") {
setup_nseq();
}
else if (m_params.m_string_solver == "empty") {
setup_seq();
}
@ -579,7 +582,7 @@ namespace smt {
// don't register any solver.
}
else {
throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto'");
throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto', 'nseq'");
}
}
@ -754,11 +757,14 @@ namespace smt {
else if (m_params.m_string_solver == "none") {
// don't register any solver.
}
else if (m_params.m_string_solver == "nseq") {
setup_nseq();
}
else if (m_params.m_string_solver == "auto") {
setup_seq();
}
else {
throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto'");
throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto', 'nseq'");
}
}
@ -785,6 +791,11 @@ namespace smt {
m_context.register_plugin(alloc(smt::theory_char, m_context));
}
void setup::setup_nseq() {
m_context.register_plugin(alloc(smt::theory_nseq, m_context));
setup_char();
}
void setup::setup_finite_set() {
m_context.register_plugin(alloc(smt::theory_finite_set, m_context));
}

View file

@ -102,6 +102,7 @@ namespace smt {
void setup_seq_str(static_features const & st);
void setup_seq();
void setup_char();
void setup_nseq();
void setup_finite_set();
void setup_card();
void setup_sls();

171
src/smt/theory_nseq.cpp Normal file
View file

@ -0,0 +1,171 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
theory_nseq.cpp
Abstract:
ZIPT string solver theory for Z3.
Implementation of theory_nseq.
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#include "smt/theory_nseq.h"
#include "smt/smt_context.h"
#include "util/statistics.h"
namespace smt {
theory_nseq::theory_nseq(context& ctx) :
theory(ctx, ctx.get_manager().mk_family_id("seq")),
m_seq(ctx.get_manager()),
m_autil(ctx.get_manager()),
m_rewriter(ctx.get_manager()),
m_egraph(ctx.get_manager()),
m_sgraph(ctx.get_manager(), m_egraph),
m_nielsen(m_sgraph),
m_state(m_sgraph)
{}
// -----------------------------------------------------------------------
// Internalization
// -----------------------------------------------------------------------
bool theory_nseq::internalize_atom(app* atom, bool /*gate_ctx*/) {
return internalize_term(atom);
}
bool theory_nseq::internalize_term(app* term) {
context& ctx = get_context();
ast_manager& m = get_manager();
// ensure children are internalized first
for (expr* arg : *term) {
if (is_app(arg) && m_seq.is_seq(arg)) {
ctx.internalize(arg, false);
}
}
if (!ctx.e_internalized(term)) {
ctx.mk_enode(term, false, m.is_bool(term), true);
}
enode* en = ctx.get_enode(term);
if (!is_attached_to_var(en)) {
theory_var v = mk_var(en);
(void)v;
}
// register in our private sgraph
get_snode(term);
return true;
}
// -----------------------------------------------------------------------
// Equality / disequality notifications
// -----------------------------------------------------------------------
void theory_nseq::new_eq_eh(theory_var v1, theory_var v2) {
expr* e1 = get_enode(v1)->get_expr();
expr* e2 = get_enode(v2)->get_expr();
if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2))
return;
euf::snode* s1 = get_snode(e1);
euf::snode* s2 = get_snode(e2);
if (s1 && s2)
m_state.add_str_eq(s1, s2);
}
void theory_nseq::new_diseq_eh(theory_var /*v1*/, theory_var /*v2*/) {
// not handled in this initial skeleton
}
// -----------------------------------------------------------------------
// Scope management
// -----------------------------------------------------------------------
void theory_nseq::push_scope_eh() {
theory::push_scope_eh();
m_state.push();
m_sgraph.push();
}
void theory_nseq::pop_scope_eh(unsigned num_scopes) {
theory::pop_scope_eh(num_scopes);
m_state.pop(num_scopes);
m_sgraph.pop(num_scopes);
}
// -----------------------------------------------------------------------
// Final check: build Nielsen graph and search
// -----------------------------------------------------------------------
void theory_nseq::populate_nielsen_graph() {
m_nielsen.reset();
seq::nielsen_node* root = m_nielsen.mk_node();
m_nielsen.set_root(root);
for (auto const& eq : m_state.str_eqs())
root->add_str_eq(eq);
for (auto const& mem : m_state.str_mems())
root->add_str_mem(mem);
}
final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) {
if (m_state.empty())
return FC_DONE;
// For now, give up if there are string constraints.
// The full search will be wired in once the Nielsen algorithms are complete.
populate_nielsen_graph();
++m_num_nodes_explored;
return FC_GIVEUP;
}
// -----------------------------------------------------------------------
// Model generation
// -----------------------------------------------------------------------
void theory_nseq::init_model(model_generator& /*mg*/) {
// stub no model assignment for now
}
// -----------------------------------------------------------------------
// Statistics / display
// -----------------------------------------------------------------------
void theory_nseq::collect_statistics(::statistics& st) const {
st.update("nseq conflicts", m_num_conflicts);
st.update("nseq nodes explored", m_num_nodes_explored);
st.update("nseq depth increases", m_num_depth_increases);
}
void theory_nseq::display(std::ostream& out) const {
out << "theory_nseq\n";
out << " str_eqs: " << m_state.str_eqs().size() << "\n";
out << " str_mems: " << m_state.str_mems().size() << "\n";
}
// -----------------------------------------------------------------------
// Factory / clone
// -----------------------------------------------------------------------
theory* theory_nseq::mk_fresh(context* ctx) {
return alloc(theory_nseq, *ctx);
}
// -----------------------------------------------------------------------
// Helpers
// -----------------------------------------------------------------------
euf::snode* theory_nseq::get_snode(expr* e) {
euf::snode* s = m_sgraph.find(e);
if (!s)
s = m_sgraph.mk(e);
return s;
}
}

78
src/smt/theory_nseq.h Normal file
View file

@ -0,0 +1,78 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
theory_nseq.h
Abstract:
ZIPT string solver theory for Z3.
Implements theory_nseq, a theory plugin for string/sequence constraints
using the Nielsen transformation graph (Nielsen graph).
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#pragma once
#include "ast/seq_decl_plugin.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/euf/euf_egraph.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/smt_theory.h"
#include "smt/smt_arith_value.h"
#include "smt/seq/seq_nielsen.h"
#include "smt/nseq_state.h"
#include "smt/nseq_regex.h"
#include "smt/nseq_model.h"
namespace smt {
class theory_nseq : public theory {
seq_util m_seq;
arith_util m_autil;
seq_rewriter m_rewriter;
euf::egraph m_egraph; // private egraph (not shared with smt context)
euf::sgraph m_sgraph; // private sgraph
seq::nielsen_graph m_nielsen;
nseq_state m_state;
// statistics
unsigned m_num_conflicts = 0;
unsigned m_num_nodes_explored = 0;
unsigned m_num_depth_increases = 0;
// map from context enode to private sgraph snode
obj_map<expr, euf::snode*> m_expr2snode;
// required virtual methods
bool internalize_atom(app* a, bool gate_ctx) override;
bool internalize_term(app* term) override;
void new_eq_eh(theory_var v1, theory_var v2) override;
void new_diseq_eh(theory_var v1, theory_var v2) override;
theory* mk_fresh(context* ctx) override;
void display(std::ostream& out) const override;
// optional overrides
bool can_propagate() override { return false; }
void propagate() override {}
final_check_status final_check_eh(unsigned) override;
void push_scope_eh() override;
void pop_scope_eh(unsigned num_scopes) override;
void init_model(model_generator& mg) override;
void collect_statistics(::statistics& st) const override;
char const* get_name() const override { return "nseq"; }
// private helpers
void populate_nielsen_graph();
euf::snode* get_snode(expr* e);
public:
theory_nseq(context& ctx);
};
}

View file

@ -132,6 +132,7 @@ add_executable(test-z3
sls_test.cpp
sls_seq_plugin.cpp
seq_nielsen.cpp
nseq_basic.cpp
small_object_allocator.cpp
smt2print_parse.cpp
smt_context.cpp

View file

@ -287,6 +287,7 @@ int main(int argc, char ** argv) {
TST(scoped_vector);
TST(sls_seq_plugin);
TST(seq_nielsen);
TST(nseq_basic);
TST(ho_matcher);
TST(finite_set);
TST(finite_set_rewriter);

109
src/test/nseq_basic.cpp Normal file
View file

@ -0,0 +1,109 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_basic.cpp
Abstract:
Basic unit tests for theory_nseq and supporting infrastructure.
--*/
#include "util/util.h"
#include "ast/reg_decl_plugins.h"
#include "ast/euf/euf_egraph.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
#include "params/smt_params.h"
#include <iostream>
// Test 1: instantiation of nielsen_graph compiles and doesn't crash
static void test_nseq_instantiation() {
std::cout << "test_nseq_instantiation\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq::nielsen_graph ng(sg);
SASSERT(ng.root() == nullptr);
SASSERT(ng.num_nodes() == 0);
std::cout << " ok\n";
}
// Test 2: parameter validation accepts "nseq"
static void test_nseq_param_validation() {
std::cout << "test_nseq_param_validation\n";
smt_params p;
// Should not throw
try {
p.validate_string_solver(symbol("nseq"));
std::cout << " ok: nseq accepted\n";
} catch (...) {
SASSERT(false && "nseq should be accepted as a valid string_solver value");
}
// Should not throw for legacy values
try {
p.validate_string_solver(symbol("seq"));
p.validate_string_solver(symbol("auto"));
p.validate_string_solver(symbol("none"));
std::cout << " ok: legacy values still accepted\n";
} catch (...) {
SASSERT(false && "legacy values should still be accepted");
}
}
// Test 3: nielsen graph simplification (trivial case)
static void test_nseq_simplification() {
std::cout << "test_nseq_simplification\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq::nielsen_graph ng(sg);
// Add a trivial equality: empty = empty
euf::snode* empty1 = sg.mk_empty();
euf::snode* empty2 = sg.mk_empty();
ng.add_str_eq(empty1, empty2);
seq::nielsen_graph::search_result r = ng.solve();
// empty = empty is trivially satisfied
SASSERT(r == seq::nielsen_graph::search_result::sat);
std::cout << " ok: trivial equality solved as sat\n";
}
// Test 4: node is_satisfied check
static void test_nseq_node_satisfied() {
std::cout << "test_nseq_node_satisfied\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq::nielsen_graph ng(sg);
seq::nielsen_node* node = ng.mk_node();
// empty node has no constraints => satisfied
SASSERT(node->is_satisfied());
// add a trivial equality
euf::snode* empty = sg.mk_empty();
seq::dep_tracker dep;
seq::str_eq eq(empty, empty, dep);
node->add_str_eq(eq);
SASSERT(node->str_eqs().size() == 1);
SASSERT(!node->str_eqs()[0].is_trivial() || node->str_eqs()[0].m_lhs == node->str_eqs()[0].m_rhs);
// After simplification, trivial equalities should be removed
seq::simplify_result sr = node->simplify_and_init(ng);
VERIFY(sr == seq::simplify_result::satisfied || sr == seq::simplify_result::proceed);
std::cout << " ok\n";
}
void tst_nseq_basic() {
test_nseq_instantiation();
test_nseq_param_validation();
test_nseq_simplification();
test_nseq_node_satisfied();
std::cout << "nseq_basic: all tests passed\n";
}