diff --git a/scripts/mk_project.py b/scripts/mk_project.py index 7b4d444ea..777d5466b 100644 --- a/scripts/mk_project.py +++ b/scripts/mk_project.py @@ -68,6 +68,7 @@ def init_project_def(): add_lib('subpaving_tactic', ['core_tactics', 'subpaving'], 'math/subpaving/tactic') add_lib('proto_model', ['model', 'rewriter', 'params'], 'smt/proto_model') + add_lib('smt_seq', ['rewriter', 'euf'], 'smt/seq') add_lib('smt', ['bit_blaster', 'macros', 'normal_forms', 'cmd_context', 'proto_model', 'solver_assertions', 'substitution', 'grobner', 'simplex', 'proofs', 'pattern', 'parser_util', 'fpa', 'lp']) add_lib('sat_smt', ['sat', 'ast_sls', 'euf', 'smt', 'tactic', 'solver', 'params', 'bit_blaster', 'fpa', 'mbp', 'normal_forms', 'lp', 'pattern', 'qe_lite'], 'sat/smt') @@ -100,7 +101,7 @@ def init_project_def(): add_lib('api', ['portfolio', 'realclosure', 'opt', 'extra_cmds'], includes2install=['z3.h', 'z3_v1.h', 'z3_macros.h'] + API_files) add_exe('shell', ['api', 'sat', 'extra_cmds', 'opt'], exe_name='z3') - add_exe('test', ['api', 'fuzzing', 'simplex', 'sat_smt'], exe_name='test-z3', install=False) + add_exe('test', ['api', 'fuzzing', 'simplex', 'sat_smt', 'smt_seq'], exe_name='test-z3', install=False) _libz3Component = add_dll('api_dll', ['api', 'sat', 'extra_cmds'], 'api/dll', reexports=['api'], dll_name='libz3', diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1a6608849..d36b64112 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,6 +53,7 @@ add_subdirectory(ackermannization) add_subdirectory(ast/proofs) add_subdirectory(ast/fpa) add_subdirectory(smt/proto_model) +add_subdirectory(smt/seq) add_subdirectory(smt) add_subdirectory(tactic/bv) add_subdirectory(smt/tactic) diff --git a/src/smt/seq/CMakeLists.txt b/src/smt/seq/CMakeLists.txt new file mode 100644 index 000000000..db63e4c6f --- /dev/null +++ b/src/smt/seq/CMakeLists.txt @@ -0,0 +1,7 @@ +z3_add_component(smt_seq + SOURCES + seq_nielsen.cpp + COMPONENT_DEPENDENCIES + euf + rewriter +) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp new file mode 100644 index 000000000..90b1ffa2a --- /dev/null +++ b/src/smt/seq/seq_nielsen.cpp @@ -0,0 +1,311 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_nielsen.cpp + +Abstract: + + Nielsen graph implementation for string constraint solving. + + Ports the constraint types and Nielsen graph structures from + ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints) + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-02 + Clemens Eisenhofer 2026-03-02 + +--*/ + +#include "smt/seq/seq_nielsen.h" +#include "ast/ast_pp.h" + +namespace seq { + + // ----------------------------------------------- + // dep_tracker + // ----------------------------------------------- + + dep_tracker::dep_tracker(unsigned num_bits) { + unsigned words = (num_bits + 31) / 32; + m_bits.resize(words, 0); + } + + dep_tracker::dep_tracker(unsigned num_bits, unsigned set_bit) { + unsigned words = (num_bits + 31) / 32; + m_bits.resize(words, 0); + if (set_bit < num_bits) { + unsigned word_idx = set_bit / 32; + unsigned bit_idx = set_bit % 32; + m_bits[word_idx] = 1u << bit_idx; + } + } + + void dep_tracker::merge(dep_tracker const& other) { + if (other.m_bits.empty()) + return; + if (m_bits.size() < other.m_bits.size()) + m_bits.resize(other.m_bits.size(), 0); + for (unsigned i = 0; i < other.m_bits.size(); ++i) + m_bits[i] |= other.m_bits[i]; + } + + bool dep_tracker::is_superset(dep_tracker const& other) const { + for (unsigned i = 0; i < other.m_bits.size(); ++i) { + unsigned my_bits = (i < m_bits.size()) ? m_bits[i] : 0; + if ((my_bits & other.m_bits[i]) != other.m_bits[i]) + return false; + } + return true; + } + + bool dep_tracker::empty() const { + for (unsigned b : m_bits) + if (b != 0) return false; + return true; + } + + // ----------------------------------------------- + // str_eq + // ----------------------------------------------- + + void str_eq::sort() { + if (m_lhs && m_rhs && m_lhs->id() > m_rhs->id()) + std::swap(m_lhs, m_rhs); + } + + bool str_eq::is_trivial() const { + return m_lhs == m_rhs || + (m_lhs && m_rhs && m_lhs->is_empty() && m_rhs->is_empty()); + } + + bool str_eq::contains_var(euf::snode* var) const { + if (!var) return false; + // check if var appears in the token list of lhs or rhs + if (m_lhs) { + euf::snode_vector tokens; + m_lhs->collect_tokens(tokens); + for (euf::snode* t : tokens) + if (t == var) return true; + } + if (m_rhs) { + euf::snode_vector tokens; + m_rhs->collect_tokens(tokens); + for (euf::snode* t : tokens) + if (t == var) return true; + } + return false; + } + + // ----------------------------------------------- + // str_mem + // ----------------------------------------------- + + bool str_mem::is_primitive() const { + return m_str && m_str->length() == 1 && m_str->is_var(); + } + + bool str_mem::contains_var(euf::snode* var) const { + if (!var) return false; + if (m_str) { + euf::snode_vector tokens; + m_str->collect_tokens(tokens); + for (euf::snode* t : tokens) + if (t == var) return true; + } + return false; + } + + // ----------------------------------------------- + // nielsen_subst + // ----------------------------------------------- + + bool nielsen_subst::is_eliminating() const { + if (!m_var || !m_replacement) return true; + // check if var appears in replacement + euf::snode_vector tokens; + m_replacement->collect_tokens(tokens); + for (euf::snode* t : tokens) + if (t == m_var) return false; + return true; + } + + // ----------------------------------------------- + // nielsen_edge + // ----------------------------------------------- + + nielsen_edge::nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress): + m_src(src), m_tgt(tgt), m_is_progress(is_progress) { + } + + // ----------------------------------------------- + // nielsen_node + // ----------------------------------------------- + + nielsen_node::nielsen_node(nielsen_graph* graph, unsigned id): + m_id(id), m_graph(graph), m_is_progress(true) { + } + + void nielsen_node::clone_from(nielsen_node const& parent) { + m_str_eq.reset(); + m_str_mem.reset(); + for (auto const& eq : parent.m_str_eq) + m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep)); + for (auto const& mem : parent.m_str_mem) + m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_history, mem.m_id, mem.m_dep)); + } + + void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) { + if (!s.m_var) return; + for (unsigned i = 0; i < m_str_eq.size(); ++i) { + str_eq& eq = m_str_eq[i]; + eq.m_lhs = sg.subst(eq.m_lhs, s.m_var, s.m_replacement); + eq.m_rhs = sg.subst(eq.m_rhs, s.m_var, s.m_replacement); + eq.m_dep.merge(s.m_dep); + eq.sort(); + } + for (unsigned i = 0; i < m_str_mem.size(); ++i) { + str_mem& mem = m_str_mem[i]; + mem.m_str = sg.subst(mem.m_str, s.m_var, s.m_replacement); + // regex is typically ground, but apply subst for generality + mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement); + mem.m_dep.merge(s.m_dep); + } + } + + // ----------------------------------------------- + // nielsen_graph + // ----------------------------------------------- + + nielsen_graph::nielsen_graph(euf::sgraph& sg): + m_sg(sg) { + } + + nielsen_graph::~nielsen_graph() { + reset(); + } + + nielsen_node* nielsen_graph::mk_node() { + unsigned id = m_nodes.size(); + nielsen_node* n = alloc(nielsen_node, this, id); + m_nodes.push_back(n); + return n; + } + + nielsen_node* nielsen_graph::mk_child(nielsen_node* parent) { + nielsen_node* child = mk_node(); + child->clone_from(*parent); + return child; + } + + nielsen_edge* nielsen_graph::mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress) { + nielsen_edge* e = alloc(nielsen_edge, src, tgt, is_progress); + m_edges.push_back(e); + src->add_outgoing(e); + return e; + } + + void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) { + if (!m_root) + m_root = mk_node(); + dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1, + m_root->str_eqs().size()); + str_eq eq(lhs, rhs, dep); + eq.sort(); + m_root->add_str_eq(eq); + } + + void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) { + if (!m_root) + m_root = mk_node(); + dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1, + m_root->str_eqs().size() + m_root->str_mems().size()); + euf::snode* history = m_sg.mk_empty(); + unsigned id = next_mem_id(); + m_root->add_str_mem(str_mem(str, regex, history, id, dep)); + } + + void nielsen_graph::inc_run_idx() { + if (m_run_idx == UINT_MAX) { + for (nielsen_node* n : m_nodes) + n->reset_counter(); + m_run_idx = 1; + } + else + ++m_run_idx; + } + + void nielsen_graph::reset() { + for (nielsen_node* n : m_nodes) + dealloc(n); + for (nielsen_edge* e : m_edges) + dealloc(e); + m_nodes.reset(); + m_edges.reset(); + m_root = nullptr; + m_run_idx = 0; + m_depth_bound = 0; + m_next_mem_id = 0; + } + + std::ostream& nielsen_graph::display(std::ostream& out) const { + out << "nielsen_graph with " << m_nodes.size() << " nodes, " + << m_edges.size() << " edges\n"; + + for (nielsen_node const* n : m_nodes) { + out << " node[" << n->id() << "]"; + if (n == m_root) + out << " (root)"; + if (n->is_general_conflict()) + out << " CONFLICT"; + if (n->is_extended()) + out << " EXTENDED"; + out << "\n"; + + // display string equalities + for (auto const& eq : n->str_eqs()) { + out << " str_eq: "; + if (eq.m_lhs) out << "lhs[id=" << eq.m_lhs->id() << ",len=" << eq.m_lhs->length() << "]"; + else out << "null"; + out << " = "; + if (eq.m_rhs) out << "rhs[id=" << eq.m_rhs->id() << ",len=" << eq.m_rhs->length() << "]"; + else out << "null"; + out << "\n"; + } + + // display regex memberships + for (auto const& mem : n->str_mems()) { + out << " str_mem[" << mem.m_id << "]: "; + if (mem.m_str) out << "str[id=" << mem.m_str->id() << ",len=" << mem.m_str->length() << "]"; + else out << "null"; + out << " in "; + if (mem.m_regex) out << "re[id=" << mem.m_regex->id() << "]"; + else out << "null"; + out << "\n"; + } + + // display outgoing edges + for (nielsen_edge const* e : n->outgoing()) { + out << " -> node[" << e->tgt()->id() << "]"; + if (e->is_progress()) out << " (progress)"; + for (auto const& s : e->subst()) { + out << " {"; + if (s.m_var) out << "var[" << s.m_var->id() << "]"; + out << " -> "; + if (s.m_replacement) out << "repl[" << s.m_replacement->id() << ",len=" << s.m_replacement->length() << "]"; + else out << "eps"; + out << "}"; + } + out << "\n"; + } + + if (n->backedge()) + out << " backedge -> node[" << n->backedge()->id() << "]\n"; + } + + return out; + } + +} diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h new file mode 100644 index 000000000..10120dc6e --- /dev/null +++ b/src/smt/seq/seq_nielsen.h @@ -0,0 +1,327 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_nielsen.h + +Abstract: + + Nielsen graph for string constraint solving. + + Ports the constraint types and Nielsen graph structures from + ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints) + into Z3's smt/seq framework. + + The Nielsen graph is used for solving word equations and regex + membership constraints via Nielsen transformations. Each node + contains a set of constraints (string equalities, regex memberships, + integer equalities/inequalities) and edges represent substitutions + that transform one constraint set into another. + + Key components: + -- str_eq: string equality constraint (lhs = rhs) + -- str_mem: regex membership constraint (str in regex) + -- nielsen_subst: variable substitution (var -> replacement) + -- nielsen_edge: graph edge with substitutions and side constraints + -- nielsen_node: graph node with constraint set and outgoing edges + -- nielsen_graph: the overall Nielsen transformation graph + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-02 + Clemens Eisenhofer 2026-03-02 + +--*/ + +#pragma once + +#include "util/vector.h" +#include "util/uint_set.h" +#include "ast/ast.h" +#include "ast/seq_decl_plugin.h" +#include "ast/euf/euf_sgraph.h" + +namespace seq { + + // forward declarations + class nielsen_node; + class nielsen_edge; + class nielsen_graph; + + // simplification result for constraint processing + // mirrors ZIPT's SimplifyResult enum + enum class simplify_result { + proceed, // no change, continue + conflict, // constraint is unsatisfiable + satisfied, // constraint is trivially satisfied + restart, // constraint was simplified, restart + restart_and_satisfied, // simplified and satisfied + }; + + // reason for backtracking in the Nielsen graph + // mirrors ZIPT's BacktrackReasons enum + enum class backtrack_reason { + unevaluated, + extended, + symbol_clash, + parikh_image, + subsumption, + arithmetic, + regex, + regex_widening, + character_range, + smt, + children_failed, + }; + + // dependency tracker: bitvector tracking which input constraints + // contributed to deriving a given constraint + // mirrors ZIPT's DependencyTracker + class dep_tracker { + svector m_bits; + public: + dep_tracker() = default; + explicit dep_tracker(unsigned num_bits); + dep_tracker(unsigned num_bits, unsigned set_bit); + + void merge(dep_tracker const& other); + bool is_superset(dep_tracker const& other) const; + bool empty() const; + + bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; } + bool operator!=(dep_tracker const& other) const { return !(*this == other); } + }; + + // string equality constraint: lhs = rhs + // mirrors ZIPT's StrEq (both sides are regex-free snode trees) + struct str_eq { + euf::snode* m_lhs; + euf::snode* m_rhs; + dep_tracker m_dep; + + str_eq(): m_lhs(nullptr), m_rhs(nullptr) {} + str_eq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep): + m_lhs(lhs), m_rhs(rhs), m_dep(dep) {} + + bool operator==(str_eq const& other) const { + return m_lhs == other.m_lhs && m_rhs == other.m_rhs; + } + + // sort so that lhs <= rhs by snode id + void sort(); + + // check if both sides are empty (trivially satisfied) + bool is_trivial() const; + + // check if the constraint contains a given variable + bool contains_var(euf::snode* var) const; + }; + + // regex membership constraint: str in regex + // mirrors ZIPT's StrMem + struct str_mem { + euf::snode* m_str; + euf::snode* m_regex; + euf::snode* m_history; // tracks derivation history for cycle detection + unsigned m_id; // unique identifier + dep_tracker m_dep; + + str_mem(): m_str(nullptr), m_regex(nullptr), m_history(nullptr), m_id(UINT_MAX) {} + str_mem(euf::snode* str, euf::snode* regex, euf::snode* history, unsigned id, dep_tracker const& dep): + m_str(str), m_regex(regex), m_history(history), m_id(id), m_dep(dep) {} + + bool operator==(str_mem const& other) const { + return m_id == other.m_id && m_str == other.m_str && m_regex == other.m_regex; + } + + // check if the constraint has the form x in R with x a single variable + bool is_primitive() const; + + // check if the constraint contains a given variable + bool contains_var(euf::snode* var) const; + }; + + // string variable substitution: var -> replacement + // mirrors ZIPT's Subst + struct nielsen_subst { + euf::snode* m_var; + euf::snode* m_replacement; + dep_tracker m_dep; + + nielsen_subst(): m_var(nullptr), m_replacement(nullptr) {} + nielsen_subst(euf::snode* var, euf::snode* repl, dep_tracker const& dep): + m_var(var), m_replacement(repl), m_dep(dep) {} + + // an eliminating substitution does not contain the variable in the replacement + bool is_eliminating() const; + + bool operator==(nielsen_subst const& other) const { + return m_var == other.m_var && m_replacement == other.m_replacement; + } + }; + + // edge in the Nielsen graph connecting two nodes + // mirrors ZIPT's NielsenEdge + class nielsen_edge { + nielsen_node* m_src; + nielsen_node* m_tgt; + vector m_subst; + ptr_vector m_side_str_eq; // side constraints: string equalities + ptr_vector m_side_str_mem; // side constraints: regex memberships + bool m_is_progress; // does this edge represent progress? + public: + nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress); + + nielsen_node* src() const { return m_src; } + nielsen_node* tgt() const { return m_tgt; } + + void set_tgt(nielsen_node* tgt) { m_tgt = tgt; } + + vector const& subst() const { return m_subst; } + void add_subst(nielsen_subst const& s) { m_subst.push_back(s); } + + void add_side_str_eq(str_eq* eq) { m_side_str_eq.push_back(eq); } + void add_side_str_mem(str_mem* mem) { m_side_str_mem.push_back(mem); } + + ptr_vector const& side_str_eq() const { return m_side_str_eq; } + ptr_vector const& side_str_mem() const { return m_side_str_mem; } + + bool is_progress() const { return m_is_progress; } + + bool operator==(nielsen_edge const& other) const { + return m_src == other.m_src && m_tgt == other.m_tgt; + } + }; + + // node in the Nielsen graph + // mirrors ZIPT's NielsenNode + class nielsen_node { + friend class nielsen_graph; + + unsigned m_id; + nielsen_graph* m_graph; + + // constraints at this node + vector m_str_eq; // string equalities + vector m_str_mem; // regex memberships + + // edges + ptr_vector m_outgoing; + nielsen_node* m_backedge = nullptr; + + // status flags + bool m_is_general_conflict = false; + bool m_is_extended = false; + backtrack_reason m_reason = backtrack_reason::unevaluated; + bool m_is_progress = false; + + // evaluation index for run tracking + unsigned m_eval_idx = 0; + + public: + nielsen_node(nielsen_graph* graph, unsigned id); + + unsigned id() const { return m_id; } + nielsen_graph* graph() const { return m_graph; } + + // constraint access + vector const& str_eqs() const { return m_str_eq; } + vector& str_eqs() { return m_str_eq; } + vector const& str_mems() const { return m_str_mem; } + vector& str_mems() { return m_str_mem; } + + void add_str_eq(str_eq const& eq) { m_str_eq.push_back(eq); } + void add_str_mem(str_mem const& mem) { m_str_mem.push_back(mem); } + + // edge access + ptr_vector const& outgoing() const { return m_outgoing; } + void add_outgoing(nielsen_edge* e) { m_outgoing.push_back(e); } + + nielsen_node* backedge() const { return m_backedge; } + void set_backedge(nielsen_node* n) { m_backedge = n; } + + // status + bool is_general_conflict() const { return m_is_general_conflict; } + void set_general_conflict(bool v) { m_is_general_conflict = v; } + + bool is_extended() const { return m_is_extended; } + void set_extended(bool v) { m_is_extended = v; } + + bool is_currently_conflict() const { + return m_is_general_conflict || + (m_reason != backtrack_reason::unevaluated && m_is_extended); + } + + backtrack_reason reason() const { return m_reason; } + void set_reason(backtrack_reason r) { m_reason = r; } + + bool is_progress() const { return m_is_progress; } + + unsigned eval_idx() const { return m_eval_idx; } + void set_eval_idx(unsigned idx) { m_eval_idx = idx; } + void reset_counter() { m_eval_idx = 0; } + + // clone constraints from a parent node + void clone_from(nielsen_node const& parent); + + // apply a substitution to all constraints + void apply_subst(euf::sgraph& sg, nielsen_subst const& s); + }; + + // the overall Nielsen transformation graph + // mirrors ZIPT's NielsenGraph + class nielsen_graph { + euf::sgraph& m_sg; + region m_region; + ptr_vector m_nodes; + ptr_vector m_edges; + nielsen_node* m_root = nullptr; + unsigned m_run_idx = 0; + unsigned m_depth_bound = 0; + unsigned m_next_mem_id = 0; + + public: + nielsen_graph(euf::sgraph& sg); + ~nielsen_graph(); + + euf::sgraph& sg() { return m_sg; } + + // node management + nielsen_node* mk_node(); + nielsen_node* mk_child(nielsen_node* parent); + + // edge management + nielsen_edge* mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress); + + // root node access + nielsen_node* root() const { return m_root; } + void set_root(nielsen_node* n) { m_root = n; } + + // add constraints to the root node from external solver + void add_str_eq(euf::snode* lhs, euf::snode* rhs); + void add_str_mem(euf::snode* str, euf::snode* regex); + + // run management + unsigned run_idx() const { return m_run_idx; } + void inc_run_idx(); + + // access all nodes + ptr_vector const& nodes() const { return m_nodes; } + unsigned num_nodes() const { return m_nodes.size(); } + + // depth bound for iterative deepening + unsigned depth_bound() const { return m_depth_bound; } + void set_depth_bound(unsigned d) { m_depth_bound = d; } + + // generate next unique regex membership id + unsigned next_mem_id() { return m_next_mem_id++; } + + // display for debugging + std::ostream& display(std::ostream& out) const; + + // reset all nodes and state + void reset(); + }; + +} diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 1b00f70db..b0d0a6a1e 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -3,7 +3,7 @@ add_subdirectory(lp) ################################################################################ # z3-test executable ################################################################################ -set(z3_test_deps api fuzzing simplex) +set(z3_test_deps api fuzzing simplex smt_seq) z3_expand_dependencies(z3_test_expanded_deps ${z3_test_deps}) set (z3_test_extra_object_files "") foreach (component ${z3_test_expanded_deps}) @@ -131,6 +131,7 @@ add_executable(test-z3 simplifier.cpp sls_test.cpp sls_seq_plugin.cpp + seq_nielsen.cpp small_object_allocator.cpp smt2print_parse.cpp smt_context.cpp diff --git a/src/test/main.cpp b/src/test/main.cpp index 17b6c8d33..734dfe4b7 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -286,6 +286,7 @@ int main(int argc, char ** argv) { TST(sls_test); TST(scoped_vector); TST(sls_seq_plugin); + TST(seq_nielsen); TST(ho_matcher); TST(finite_set); TST(finite_set_rewriter); diff --git a/src/test/seq_nielsen.cpp b/src/test/seq_nielsen.cpp new file mode 100644 index 000000000..f994f5a70 --- /dev/null +++ b/src/test/seq_nielsen.cpp @@ -0,0 +1,479 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_nielsen.cpp + +Abstract: + + Unit tests for the Nielsen graph framework (seq_nielsen.h). + Tests constraint types, node/edge construction, substitution + application, and graph population. + +--*/ + +#include "util/util.h" +#include "ast/euf/euf_egraph.h" +#include "ast/euf/euf_sgraph.h" +#include "smt/seq/seq_nielsen.h" +#include "ast/reg_decl_plugins.h" +#include "ast/ast_pp.h" +#include + +// test dep_tracker basic operations +static void test_dep_tracker() { + std::cout << "test_dep_tracker\n"; + + // empty tracker + seq::dep_tracker d0; + SASSERT(d0.empty()); + + // tracker with one bit set + seq::dep_tracker d1(8, 3); + SASSERT(!d1.empty()); + + // tracker with another bit + seq::dep_tracker d2(8, 5); + SASSERT(!d2.empty()); + + // merge + seq::dep_tracker d3 = d1; + d3.merge(d2); + SASSERT(!d3.empty()); + SASSERT(d3.is_superset(d1)); + SASSERT(d3.is_superset(d2)); + SASSERT(!d1.is_superset(d2)); + + // equality + seq::dep_tracker d4(8, 3); + SASSERT(d1 == d4); + SASSERT(d1 != d2); +} + +// test str_eq constraint creation and operations +static void test_str_eq() { + std::cout << "test_str_eq\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* e = sg.mk_empty(); + + seq::dep_tracker dep(4, 0); + + // basic equality + seq::str_eq eq1(x, y, dep); + SASSERT(!eq1.is_trivial()); + SASSERT(eq1.contains_var(x)); + SASSERT(eq1.contains_var(y)); + SASSERT(!eq1.contains_var(a)); + + // trivial equality: same node + seq::str_eq eq2(x, x, dep); + SASSERT(eq2.is_trivial()); + + // trivial equality: both empty + seq::str_eq eq3(e, e, dep); + SASSERT(eq3.is_trivial()); + + // sorting: lower id first + seq::str_eq eq4(y, x, dep); + eq4.sort(); + SASSERT(eq4.m_lhs->id() <= eq4.m_rhs->id()); + + // contains_var with concat + euf::snode* xa = sg.mk_concat(x, a); + seq::str_eq eq5(xa, y, dep); + SASSERT(eq5.contains_var(x)); + SASSERT(eq5.contains_var(y)); + SASSERT(!eq5.contains_var(e)); +} + +// test str_mem constraint creation and operations +static void test_str_mem() { + std::cout << "test_str_mem\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* e = sg.mk_empty(); + + // create a regex: re.all (.*) + expr_ref star_fc(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex = sg.mk(star_fc); + + seq::dep_tracker dep(4, 1); + seq::str_mem mem(x, regex, e, 0, dep); + + // x in regex is primitive (x is a single variable) + SASSERT(mem.is_primitive()); + SASSERT(mem.contains_var(x)); + + // concatenation is not primitive + euf::snode* a = sg.mk_char('A'); + euf::snode* xa = sg.mk_concat(x, a); + seq::str_mem mem2(xa, regex, e, 1, dep); + SASSERT(!mem2.is_primitive()); + SASSERT(mem2.contains_var(x)); +} + +// test nielsen_subst +static void test_nielsen_subst() { + std::cout << "test_nielsen_subst\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* e = sg.mk_empty(); + + seq::dep_tracker dep; + + // eliminating substitution: x -> A (x does not appear in A) + seq::nielsen_subst s1(x, a, dep); + SASSERT(s1.is_eliminating()); + + // eliminating substitution: x -> empty + seq::nielsen_subst s2(x, e, dep); + SASSERT(s2.is_eliminating()); + + // non-eliminating substitution: x -> concat(A, x) + euf::snode* ax = sg.mk_concat(a, x); + seq::nielsen_subst s3(x, ax, dep); + SASSERT(!s3.is_eliminating()); + + // eliminating substitution: x -> y (x not in y) + seq::nielsen_subst s4(x, y, dep); + SASSERT(s4.is_eliminating()); +} + +// test nielsen_node creation and constraint management +static void test_nielsen_node() { + std::cout << "test_nielsen_node\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + + seq::nielsen_node* root = ng.mk_node(); + SASSERT(root->id() == 0); + SASSERT(root->str_eqs().empty()); + SASSERT(root->str_mems().empty()); + SASSERT(root->is_progress()); + SASSERT(root->reason() == seq::backtrack_reason::unevaluated); + + // add constraints + seq::dep_tracker dep; + root->add_str_eq(seq::str_eq(x, y, dep)); + root->add_str_eq(seq::str_eq(sg.mk_concat(x, a), sg.mk_concat(a, y), dep)); + SASSERT(root->str_eqs().size() == 2); + + // regex membership + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex = sg.mk(re_all); + euf::snode* empty = sg.mk_empty(); + root->add_str_mem(seq::str_mem(x, regex, empty, 0, dep)); + SASSERT(root->str_mems().size() == 1); + + // clone from parent + seq::nielsen_node* child = ng.mk_node(); + child->clone_from(*root); + SASSERT(child->str_eqs().size() == 2); + SASSERT(child->str_mems().size() == 1); + SASSERT(child->id() == 1); +} + +// test nielsen_edge creation +static void test_nielsen_edge() { + std::cout << "test_nielsen_edge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + + // create parent and child nodes + seq::nielsen_node* parent = ng.mk_node(); + seq::dep_tracker dep; + parent->add_str_eq(seq::str_eq(x, y, dep)); + + seq::nielsen_node* child = ng.mk_child(parent); + + // create edge with substitution x -> A + seq::nielsen_edge* edge = ng.mk_edge(parent, child, true); + edge->add_subst(seq::nielsen_subst(x, a, dep)); + + SASSERT(edge->src() == parent); + SASSERT(edge->tgt() == child); + SASSERT(edge->is_progress()); + SASSERT(edge->subst().size() == 1); + SASSERT(parent->outgoing().size() == 1); + SASSERT(parent->outgoing()[0] == edge); +} + +// test nielsen_graph population from external constraints +static void test_nielsen_graph_populate() { + std::cout << "test_nielsen_graph_populate\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + + // add string equality: x = y + ng.add_str_eq(x, y); + SASSERT(ng.root() != nullptr); + SASSERT(ng.root()->str_eqs().size() == 1); + SASSERT(ng.num_nodes() == 1); + + // add regex membership: x in .* + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex = sg.mk(re_all); + ng.add_str_mem(x, regex); + SASSERT(ng.root()->str_mems().size() == 1); + SASSERT(ng.root()->str_mems()[0].m_id == 0); + + // add another equality: concat(x, A) = concat(A, y) + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* ay = sg.mk_concat(a, y); + ng.add_str_eq(xa, ay); + SASSERT(ng.root()->str_eqs().size() == 2); + + // display for visual inspection + ng.display(std::cout); +} + +// test substitution application on nielsen_node +static void test_nielsen_subst_apply() { + std::cout << "test_nielsen_subst_apply\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* e = sg.mk_empty(); + + // create node with constraint: concat(x, A) = concat(B, y) + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep; + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* by = sg.mk_concat(b, y); + node->add_str_eq(seq::str_eq(xa, by, dep)); + + // apply substitution x -> empty + seq::nielsen_subst s(x, e, dep); + node->apply_subst(sg, s); + + // after x -> empty: lhs should be just A, rhs still concat(B, y) + SASSERT(node->str_eqs().size() == 1); + auto const& eq = node->str_eqs()[0]; + // a should remain (after x replaced with empty, concat(empty, A) = A) + std::cout << " lhs len=" << eq.m_lhs->length() << " rhs len=" << eq.m_rhs->length() << "\n"; +} + +// test Nielsen graph reset +static void test_nielsen_graph_reset() { + std::cout << "test_nielsen_graph_reset\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + ng.add_str_eq(x, y); + SASSERT(ng.num_nodes() == 1); + SASSERT(ng.root() != nullptr); + + ng.reset(); + SASSERT(ng.num_nodes() == 0); + SASSERT(ng.root() == nullptr); +} + +// test constructing a basic Nielsen expansion tree +// x = Ay: split into x -> eps (progress) or x -> Ax (non-progress) +static void test_nielsen_expansion() { + std::cout << "test_nielsen_expansion\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* ay = sg.mk_concat(a, y); + + // root: x = Ay + ng.add_str_eq(x, ay); + seq::nielsen_node* root = ng.root(); + SASSERT(root->str_eqs().size() == 1); + + seq::dep_tracker dep; + + // branch 1: x -> eps (eliminating, progress) + euf::snode* e = sg.mk_empty(); + seq::nielsen_node* child1 = ng.mk_child(root); + seq::nielsen_subst s1(x, e, dep); + child1->apply_subst(sg, s1); + seq::nielsen_edge* edge1 = ng.mk_edge(root, child1, true); + edge1->add_subst(s1); + + // branch 2: x -> Ax (non-eliminating, non-progress) + euf::snode* ax = sg.mk_concat(a, x); + seq::nielsen_node* child2 = ng.mk_child(root); + seq::nielsen_subst s2(x, ax, dep); + child2->apply_subst(sg, s2); + seq::nielsen_edge* edge2 = ng.mk_edge(root, child2, false); + edge2->add_subst(s2); + + SASSERT(ng.num_nodes() == 3); + SASSERT(root->outgoing().size() == 2); + SASSERT(edge1->is_progress()); + SASSERT(!edge2->is_progress()); + + // verify substitution effects on child1: eps = Ay becomes empty = Ay + SASSERT(child1->str_eqs().size() == 1); + + ng.display(std::cout); +} + +// test run index management +static void test_run_idx() { + std::cout << "test_run_idx\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + SASSERT(ng.run_idx() == 0); + + ng.inc_run_idx(); + SASSERT(ng.run_idx() == 1); + + ng.inc_run_idx(); + SASSERT(ng.run_idx() == 2); +} + +// test multiple regex memberships +static void test_multiple_memberships() { + std::cout << "test_multiple_memberships\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + + // x in .* + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex1 = sg.mk(re_all); + ng.add_str_mem(x, regex1); + + // x in re.union(to_re("A"), to_re("B")) + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref to_re_b(seq.re.mk_to_re(unit_b), m); + expr_ref re_union(seq.re.mk_union(to_re_a, to_re_b), m); + euf::snode* regex2 = sg.mk(re_union); + ng.add_str_mem(x, regex2); + + SASSERT(ng.root() != nullptr); + SASSERT(ng.root()->str_mems().size() == 2); + SASSERT(ng.root()->str_mems()[0].m_id == 0); + SASSERT(ng.root()->str_mems()[1].m_id == 1); + + ng.display(std::cout); +} + +// test backedge setting (cycle detection support) +static void test_backedge() { + std::cout << "test_backedge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + ng.add_str_eq(x, y); + seq::nielsen_node* root = ng.root(); + seq::nielsen_node* child = ng.mk_child(root); + + // set backedge from child to root (cycle) + child->set_backedge(root); + SASSERT(child->backedge() == root); + SASSERT(root->backedge() == nullptr); +} + +void tst_seq_nielsen() { + test_dep_tracker(); + test_str_eq(); + test_str_mem(); + test_nielsen_subst(); + test_nielsen_node(); + test_nielsen_edge(); + test_nielsen_graph_populate(); + test_nielsen_subst_apply(); + test_nielsen_graph_reset(); + test_nielsen_expansion(); + test_run_idx(); + test_multiple_memberships(); + test_backedge(); +}