3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-04-27 06:13:35 +00:00

Move seq_nielsen from src/ast/rewriter to src/smt/seq with new smt_seq component

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-03-03 00:17:10 +00:00
parent 425d3aec25
commit 7c328647de
7 changed files with 12 additions and 5 deletions

View file

@ -0,0 +1,7 @@
z3_add_component(smt_seq
SOURCES
seq_nielsen.cpp
COMPONENT_DEPENDENCIES
euf
rewriter
)

311
src/smt/seq/seq_nielsen.cpp Normal file
View file

@ -0,0 +1,311 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_nielsen.cpp
Abstract:
Nielsen graph implementation for string constraint solving.
Ports the constraint types and Nielsen graph structures from
ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
Author:
Nikolaj Bjorner (nbjorner) 2026-03-02
Clemens Eisenhofer 2026-03-02
--*/
#include "smt/seq/seq_nielsen.h"
#include "ast/ast_pp.h"
namespace seq {
// -----------------------------------------------
// dep_tracker
// -----------------------------------------------
dep_tracker::dep_tracker(unsigned num_bits) {
unsigned words = (num_bits + 31) / 32;
m_bits.resize(words, 0);
}
dep_tracker::dep_tracker(unsigned num_bits, unsigned set_bit) {
unsigned words = (num_bits + 31) / 32;
m_bits.resize(words, 0);
if (set_bit < num_bits) {
unsigned word_idx = set_bit / 32;
unsigned bit_idx = set_bit % 32;
m_bits[word_idx] = 1u << bit_idx;
}
}
void dep_tracker::merge(dep_tracker const& other) {
if (other.m_bits.empty())
return;
if (m_bits.size() < other.m_bits.size())
m_bits.resize(other.m_bits.size(), 0);
for (unsigned i = 0; i < other.m_bits.size(); ++i)
m_bits[i] |= other.m_bits[i];
}
bool dep_tracker::is_superset(dep_tracker const& other) const {
for (unsigned i = 0; i < other.m_bits.size(); ++i) {
unsigned my_bits = (i < m_bits.size()) ? m_bits[i] : 0;
if ((my_bits & other.m_bits[i]) != other.m_bits[i])
return false;
}
return true;
}
bool dep_tracker::empty() const {
for (unsigned b : m_bits)
if (b != 0) return false;
return true;
}
// -----------------------------------------------
// str_eq
// -----------------------------------------------
void str_eq::sort() {
if (m_lhs && m_rhs && m_lhs->id() > m_rhs->id())
std::swap(m_lhs, m_rhs);
}
bool str_eq::is_trivial() const {
return m_lhs == m_rhs ||
(m_lhs && m_rhs && m_lhs->is_empty() && m_rhs->is_empty());
}
bool str_eq::contains_var(euf::snode* var) const {
if (!var) return false;
// check if var appears in the token list of lhs or rhs
if (m_lhs) {
euf::snode_vector tokens;
m_lhs->collect_tokens(tokens);
for (euf::snode* t : tokens)
if (t == var) return true;
}
if (m_rhs) {
euf::snode_vector tokens;
m_rhs->collect_tokens(tokens);
for (euf::snode* t : tokens)
if (t == var) return true;
}
return false;
}
// -----------------------------------------------
// str_mem
// -----------------------------------------------
bool str_mem::is_primitive() const {
return m_str && m_str->length() == 1 && m_str->is_var();
}
bool str_mem::contains_var(euf::snode* var) const {
if (!var) return false;
if (m_str) {
euf::snode_vector tokens;
m_str->collect_tokens(tokens);
for (euf::snode* t : tokens)
if (t == var) return true;
}
return false;
}
// -----------------------------------------------
// nielsen_subst
// -----------------------------------------------
bool nielsen_subst::is_eliminating() const {
if (!m_var || !m_replacement) return true;
// check if var appears in replacement
euf::snode_vector tokens;
m_replacement->collect_tokens(tokens);
for (euf::snode* t : tokens)
if (t == m_var) return false;
return true;
}
// -----------------------------------------------
// nielsen_edge
// -----------------------------------------------
nielsen_edge::nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress):
m_src(src), m_tgt(tgt), m_is_progress(is_progress) {
}
// -----------------------------------------------
// nielsen_node
// -----------------------------------------------
nielsen_node::nielsen_node(nielsen_graph* graph, unsigned id):
m_id(id), m_graph(graph), m_is_progress(true) {
}
void nielsen_node::clone_from(nielsen_node const& parent) {
m_str_eq.reset();
m_str_mem.reset();
for (auto const& eq : parent.m_str_eq)
m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep));
for (auto const& mem : parent.m_str_mem)
m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_history, mem.m_id, mem.m_dep));
}
void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) {
if (!s.m_var) return;
for (unsigned i = 0; i < m_str_eq.size(); ++i) {
str_eq& eq = m_str_eq[i];
eq.m_lhs = sg.subst(eq.m_lhs, s.m_var, s.m_replacement);
eq.m_rhs = sg.subst(eq.m_rhs, s.m_var, s.m_replacement);
eq.m_dep.merge(s.m_dep);
eq.sort();
}
for (unsigned i = 0; i < m_str_mem.size(); ++i) {
str_mem& mem = m_str_mem[i];
mem.m_str = sg.subst(mem.m_str, s.m_var, s.m_replacement);
// regex is typically ground, but apply subst for generality
mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement);
mem.m_dep.merge(s.m_dep);
}
}
// -----------------------------------------------
// nielsen_graph
// -----------------------------------------------
nielsen_graph::nielsen_graph(euf::sgraph& sg):
m_sg(sg) {
}
nielsen_graph::~nielsen_graph() {
reset();
}
nielsen_node* nielsen_graph::mk_node() {
unsigned id = m_nodes.size();
nielsen_node* n = alloc(nielsen_node, this, id);
m_nodes.push_back(n);
return n;
}
nielsen_node* nielsen_graph::mk_child(nielsen_node* parent) {
nielsen_node* child = mk_node();
child->clone_from(*parent);
return child;
}
nielsen_edge* nielsen_graph::mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress) {
nielsen_edge* e = alloc(nielsen_edge, src, tgt, is_progress);
m_edges.push_back(e);
src->add_outgoing(e);
return e;
}
void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) {
if (!m_root)
m_root = mk_node();
dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
m_root->str_eqs().size());
str_eq eq(lhs, rhs, dep);
eq.sort();
m_root->add_str_eq(eq);
}
void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) {
if (!m_root)
m_root = mk_node();
dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
m_root->str_eqs().size() + m_root->str_mems().size());
euf::snode* history = m_sg.mk_empty();
unsigned id = next_mem_id();
m_root->add_str_mem(str_mem(str, regex, history, id, dep));
}
void nielsen_graph::inc_run_idx() {
if (m_run_idx == UINT_MAX) {
for (nielsen_node* n : m_nodes)
n->reset_counter();
m_run_idx = 1;
}
else
++m_run_idx;
}
void nielsen_graph::reset() {
for (nielsen_node* n : m_nodes)
dealloc(n);
for (nielsen_edge* e : m_edges)
dealloc(e);
m_nodes.reset();
m_edges.reset();
m_root = nullptr;
m_run_idx = 0;
m_depth_bound = 0;
m_next_mem_id = 0;
}
std::ostream& nielsen_graph::display(std::ostream& out) const {
out << "nielsen_graph with " << m_nodes.size() << " nodes, "
<< m_edges.size() << " edges\n";
for (nielsen_node const* n : m_nodes) {
out << " node[" << n->id() << "]";
if (n == m_root)
out << " (root)";
if (n->is_general_conflict())
out << " CONFLICT";
if (n->is_extended())
out << " EXTENDED";
out << "\n";
// display string equalities
for (auto const& eq : n->str_eqs()) {
out << " str_eq: ";
if (eq.m_lhs) out << "lhs[id=" << eq.m_lhs->id() << ",len=" << eq.m_lhs->length() << "]";
else out << "null";
out << " = ";
if (eq.m_rhs) out << "rhs[id=" << eq.m_rhs->id() << ",len=" << eq.m_rhs->length() << "]";
else out << "null";
out << "\n";
}
// display regex memberships
for (auto const& mem : n->str_mems()) {
out << " str_mem[" << mem.m_id << "]: ";
if (mem.m_str) out << "str[id=" << mem.m_str->id() << ",len=" << mem.m_str->length() << "]";
else out << "null";
out << " in ";
if (mem.m_regex) out << "re[id=" << mem.m_regex->id() << "]";
else out << "null";
out << "\n";
}
// display outgoing edges
for (nielsen_edge const* e : n->outgoing()) {
out << " -> node[" << e->tgt()->id() << "]";
if (e->is_progress()) out << " (progress)";
for (auto const& s : e->subst()) {
out << " {";
if (s.m_var) out << "var[" << s.m_var->id() << "]";
out << " -> ";
if (s.m_replacement) out << "repl[" << s.m_replacement->id() << ",len=" << s.m_replacement->length() << "]";
else out << "eps";
out << "}";
}
out << "\n";
}
if (n->backedge())
out << " backedge -> node[" << n->backedge()->id() << "]\n";
}
return out;
}
}

327
src/smt/seq/seq_nielsen.h Normal file
View file

@ -0,0 +1,327 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_nielsen.h
Abstract:
Nielsen graph for string constraint solving.
Ports the constraint types and Nielsen graph structures from
ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
into Z3's smt/seq framework.
The Nielsen graph is used for solving word equations and regex
membership constraints via Nielsen transformations. Each node
contains a set of constraints (string equalities, regex memberships,
integer equalities/inequalities) and edges represent substitutions
that transform one constraint set into another.
Key components:
-- str_eq: string equality constraint (lhs = rhs)
-- str_mem: regex membership constraint (str in regex)
-- nielsen_subst: variable substitution (var -> replacement)
-- nielsen_edge: graph edge with substitutions and side constraints
-- nielsen_node: graph node with constraint set and outgoing edges
-- nielsen_graph: the overall Nielsen transformation graph
Author:
Nikolaj Bjorner (nbjorner) 2026-03-02
Clemens Eisenhofer 2026-03-02
--*/
#pragma once
#include "util/vector.h"
#include "util/uint_set.h"
#include "ast/ast.h"
#include "ast/seq_decl_plugin.h"
#include "ast/euf/euf_sgraph.h"
namespace seq {
// forward declarations
class nielsen_node;
class nielsen_edge;
class nielsen_graph;
// simplification result for constraint processing
// mirrors ZIPT's SimplifyResult enum
enum class simplify_result {
proceed, // no change, continue
conflict, // constraint is unsatisfiable
satisfied, // constraint is trivially satisfied
restart, // constraint was simplified, restart
restart_and_satisfied, // simplified and satisfied
};
// reason for backtracking in the Nielsen graph
// mirrors ZIPT's BacktrackReasons enum
enum class backtrack_reason {
unevaluated,
extended,
symbol_clash,
parikh_image,
subsumption,
arithmetic,
regex,
regex_widening,
character_range,
smt,
children_failed,
};
// dependency tracker: bitvector tracking which input constraints
// contributed to deriving a given constraint
// mirrors ZIPT's DependencyTracker
class dep_tracker {
svector<unsigned> m_bits;
public:
dep_tracker() = default;
explicit dep_tracker(unsigned num_bits);
dep_tracker(unsigned num_bits, unsigned set_bit);
void merge(dep_tracker const& other);
bool is_superset(dep_tracker const& other) const;
bool empty() const;
bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; }
bool operator!=(dep_tracker const& other) const { return !(*this == other); }
};
// string equality constraint: lhs = rhs
// mirrors ZIPT's StrEq (both sides are regex-free snode trees)
struct str_eq {
euf::snode* m_lhs;
euf::snode* m_rhs;
dep_tracker m_dep;
str_eq(): m_lhs(nullptr), m_rhs(nullptr) {}
str_eq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep):
m_lhs(lhs), m_rhs(rhs), m_dep(dep) {}
bool operator==(str_eq const& other) const {
return m_lhs == other.m_lhs && m_rhs == other.m_rhs;
}
// sort so that lhs <= rhs by snode id
void sort();
// check if both sides are empty (trivially satisfied)
bool is_trivial() const;
// check if the constraint contains a given variable
bool contains_var(euf::snode* var) const;
};
// regex membership constraint: str in regex
// mirrors ZIPT's StrMem
struct str_mem {
euf::snode* m_str;
euf::snode* m_regex;
euf::snode* m_history; // tracks derivation history for cycle detection
unsigned m_id; // unique identifier
dep_tracker m_dep;
str_mem(): m_str(nullptr), m_regex(nullptr), m_history(nullptr), m_id(UINT_MAX) {}
str_mem(euf::snode* str, euf::snode* regex, euf::snode* history, unsigned id, dep_tracker const& dep):
m_str(str), m_regex(regex), m_history(history), m_id(id), m_dep(dep) {}
bool operator==(str_mem const& other) const {
return m_id == other.m_id && m_str == other.m_str && m_regex == other.m_regex;
}
// check if the constraint has the form x in R with x a single variable
bool is_primitive() const;
// check if the constraint contains a given variable
bool contains_var(euf::snode* var) const;
};
// string variable substitution: var -> replacement
// mirrors ZIPT's Subst
struct nielsen_subst {
euf::snode* m_var;
euf::snode* m_replacement;
dep_tracker m_dep;
nielsen_subst(): m_var(nullptr), m_replacement(nullptr) {}
nielsen_subst(euf::snode* var, euf::snode* repl, dep_tracker const& dep):
m_var(var), m_replacement(repl), m_dep(dep) {}
// an eliminating substitution does not contain the variable in the replacement
bool is_eliminating() const;
bool operator==(nielsen_subst const& other) const {
return m_var == other.m_var && m_replacement == other.m_replacement;
}
};
// edge in the Nielsen graph connecting two nodes
// mirrors ZIPT's NielsenEdge
class nielsen_edge {
nielsen_node* m_src;
nielsen_node* m_tgt;
vector<nielsen_subst> m_subst;
ptr_vector<str_eq> m_side_str_eq; // side constraints: string equalities
ptr_vector<str_mem> m_side_str_mem; // side constraints: regex memberships
bool m_is_progress; // does this edge represent progress?
public:
nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress);
nielsen_node* src() const { return m_src; }
nielsen_node* tgt() const { return m_tgt; }
void set_tgt(nielsen_node* tgt) { m_tgt = tgt; }
vector<nielsen_subst> const& subst() const { return m_subst; }
void add_subst(nielsen_subst const& s) { m_subst.push_back(s); }
void add_side_str_eq(str_eq* eq) { m_side_str_eq.push_back(eq); }
void add_side_str_mem(str_mem* mem) { m_side_str_mem.push_back(mem); }
ptr_vector<str_eq> const& side_str_eq() const { return m_side_str_eq; }
ptr_vector<str_mem> const& side_str_mem() const { return m_side_str_mem; }
bool is_progress() const { return m_is_progress; }
bool operator==(nielsen_edge const& other) const {
return m_src == other.m_src && m_tgt == other.m_tgt;
}
};
// node in the Nielsen graph
// mirrors ZIPT's NielsenNode
class nielsen_node {
friend class nielsen_graph;
unsigned m_id;
nielsen_graph* m_graph;
// constraints at this node
vector<str_eq> m_str_eq; // string equalities
vector<str_mem> m_str_mem; // regex memberships
// edges
ptr_vector<nielsen_edge> m_outgoing;
nielsen_node* m_backedge = nullptr;
// status flags
bool m_is_general_conflict = false;
bool m_is_extended = false;
backtrack_reason m_reason = backtrack_reason::unevaluated;
bool m_is_progress = false;
// evaluation index for run tracking
unsigned m_eval_idx = 0;
public:
nielsen_node(nielsen_graph* graph, unsigned id);
unsigned id() const { return m_id; }
nielsen_graph* graph() const { return m_graph; }
// constraint access
vector<str_eq> const& str_eqs() const { return m_str_eq; }
vector<str_eq>& str_eqs() { return m_str_eq; }
vector<str_mem> const& str_mems() const { return m_str_mem; }
vector<str_mem>& str_mems() { return m_str_mem; }
void add_str_eq(str_eq const& eq) { m_str_eq.push_back(eq); }
void add_str_mem(str_mem const& mem) { m_str_mem.push_back(mem); }
// edge access
ptr_vector<nielsen_edge> const& outgoing() const { return m_outgoing; }
void add_outgoing(nielsen_edge* e) { m_outgoing.push_back(e); }
nielsen_node* backedge() const { return m_backedge; }
void set_backedge(nielsen_node* n) { m_backedge = n; }
// status
bool is_general_conflict() const { return m_is_general_conflict; }
void set_general_conflict(bool v) { m_is_general_conflict = v; }
bool is_extended() const { return m_is_extended; }
void set_extended(bool v) { m_is_extended = v; }
bool is_currently_conflict() const {
return m_is_general_conflict ||
(m_reason != backtrack_reason::unevaluated && m_is_extended);
}
backtrack_reason reason() const { return m_reason; }
void set_reason(backtrack_reason r) { m_reason = r; }
bool is_progress() const { return m_is_progress; }
unsigned eval_idx() const { return m_eval_idx; }
void set_eval_idx(unsigned idx) { m_eval_idx = idx; }
void reset_counter() { m_eval_idx = 0; }
// clone constraints from a parent node
void clone_from(nielsen_node const& parent);
// apply a substitution to all constraints
void apply_subst(euf::sgraph& sg, nielsen_subst const& s);
};
// the overall Nielsen transformation graph
// mirrors ZIPT's NielsenGraph
class nielsen_graph {
euf::sgraph& m_sg;
region m_region;
ptr_vector<nielsen_node> m_nodes;
ptr_vector<nielsen_edge> m_edges;
nielsen_node* m_root = nullptr;
unsigned m_run_idx = 0;
unsigned m_depth_bound = 0;
unsigned m_next_mem_id = 0;
public:
nielsen_graph(euf::sgraph& sg);
~nielsen_graph();
euf::sgraph& sg() { return m_sg; }
// node management
nielsen_node* mk_node();
nielsen_node* mk_child(nielsen_node* parent);
// edge management
nielsen_edge* mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress);
// root node access
nielsen_node* root() const { return m_root; }
void set_root(nielsen_node* n) { m_root = n; }
// add constraints to the root node from external solver
void add_str_eq(euf::snode* lhs, euf::snode* rhs);
void add_str_mem(euf::snode* str, euf::snode* regex);
// run management
unsigned run_idx() const { return m_run_idx; }
void inc_run_idx();
// access all nodes
ptr_vector<nielsen_node> const& nodes() const { return m_nodes; }
unsigned num_nodes() const { return m_nodes.size(); }
// depth bound for iterative deepening
unsigned depth_bound() const { return m_depth_bound; }
void set_depth_bound(unsigned d) { m_depth_bound = d; }
// generate next unique regex membership id
unsigned next_mem_id() { return m_next_mem_id++; }
// display for debugging
std::ostream& display(std::ostream& out) const;
// reset all nodes and state
void reset();
};
}