diff --git a/scripts/mk_project.py b/scripts/mk_project.py index 7b4d444ea..777d5466b 100644 --- a/scripts/mk_project.py +++ b/scripts/mk_project.py @@ -68,6 +68,7 @@ def init_project_def(): add_lib('subpaving_tactic', ['core_tactics', 'subpaving'], 'math/subpaving/tactic') add_lib('proto_model', ['model', 'rewriter', 'params'], 'smt/proto_model') + add_lib('smt_seq', ['rewriter', 'euf'], 'smt/seq') add_lib('smt', ['bit_blaster', 'macros', 'normal_forms', 'cmd_context', 'proto_model', 'solver_assertions', 'substitution', 'grobner', 'simplex', 'proofs', 'pattern', 'parser_util', 'fpa', 'lp']) add_lib('sat_smt', ['sat', 'ast_sls', 'euf', 'smt', 'tactic', 'solver', 'params', 'bit_blaster', 'fpa', 'mbp', 'normal_forms', 'lp', 'pattern', 'qe_lite'], 'sat/smt') @@ -100,7 +101,7 @@ def init_project_def(): add_lib('api', ['portfolio', 'realclosure', 'opt', 'extra_cmds'], includes2install=['z3.h', 'z3_v1.h', 'z3_macros.h'] + API_files) add_exe('shell', ['api', 'sat', 'extra_cmds', 'opt'], exe_name='z3') - add_exe('test', ['api', 'fuzzing', 'simplex', 'sat_smt'], exe_name='test-z3', install=False) + add_exe('test', ['api', 'fuzzing', 'simplex', 'sat_smt', 'smt_seq'], exe_name='test-z3', install=False) _libz3Component = add_dll('api_dll', ['api', 'sat', 'extra_cmds'], 'api/dll', reexports=['api'], dll_name='libz3', diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1a6608849..d36b64112 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,6 +53,7 @@ add_subdirectory(ackermannization) add_subdirectory(ast/proofs) add_subdirectory(ast/fpa) add_subdirectory(smt/proto_model) +add_subdirectory(smt/seq) add_subdirectory(smt) add_subdirectory(tactic/bv) add_subdirectory(smt/tactic) diff --git a/src/ast/euf/CMakeLists.txt b/src/ast/euf/CMakeLists.txt index 41f073ad1..edd99487f 100644 --- a/src/ast/euf/CMakeLists.txt +++ b/src/ast/euf/CMakeLists.txt @@ -9,6 +9,8 @@ z3_add_component(euf euf_justification.cpp euf_mam.cpp euf_plugin.cpp + euf_seq_plugin.cpp + euf_sgraph.cpp euf_specrel_plugin.cpp ho_matcher.cpp COMPONENT_DEPENDENCIES diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp new file mode 100644 index 000000000..0a025c5c7 --- /dev/null +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -0,0 +1,296 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_seq_plugin.cpp + +Abstract: + + Plugin structure for sequences/strings. + + Merges equivalence classes taking into account associativity + of concatenation and algebraic properties of strings and + regular expressions. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 + +--*/ + +#include "ast/euf/euf_seq_plugin.h" +#include "ast/euf/euf_egraph.h" +#include "ast/euf/euf_sgraph.h" +#include "ast/ast_pp.h" + +namespace euf { + + // Check if enode is any kind of concat (str.++ or re.++) + static bool is_any_concat(enode* n, seq_util const& seq) { + expr* a = nullptr, *b = nullptr; + return seq.str.is_concat(n->get_expr(), a, b) || seq.re.is_concat(n->get_expr(), a, b); + } + + // Collect leaves of a concat tree in left-to-right order. + // For non-concat nodes, the node itself is a leaf. + // Handles both str.++ and re.++. + static void collect_enode_leaves(enode* n, seq_util const& seq, enode_vector& leaves) { + if (is_any_concat(n, seq)) { + collect_enode_leaves(n->get_arg(0), seq, leaves); + collect_enode_leaves(n->get_arg(1), seq, leaves); + } + else { + leaves.push_back(n); + } + } + + unsigned enode_concat_hash::operator()(enode* n) const { + snode* sn = sg.find(n->get_expr()); + if (sn && sn->has_cached_hash()) + return sn->assoc_hash(); + if (!is_any_concat(n, seq)) + return n->get_id(); + enode_vector leaves; + collect_enode_leaves(n, seq, leaves); + unsigned h = 0; + for (enode* l : leaves) + h = combine_hash(h, l->get_id()); + return h; + } + + bool enode_concat_eq::operator()(enode* a, enode* b) const { + if (a == b) return true; + if (!is_any_concat(a, seq) || !is_any_concat(b, seq)) + return false; + enode_vector la, lb; + collect_enode_leaves(a, seq, la); + collect_enode_leaves(b, seq, lb); + if (la.size() != lb.size()) + return false; + for (unsigned i = 0; i < la.size(); ++i) + if (la[i] != lb[i]) + return false; + return true; + } + + seq_plugin::seq_plugin(egraph& g, sgraph* sg): + plugin(g), + m_seq(g.get_manager()), + m_rewriter(g.get_manager()), + m_sg(sg ? *sg : *alloc(sgraph, g.get_manager(), g, false)), + m_sg_owned(sg == nullptr), + m_concat_hash(m_seq, m_sg), + m_concat_eq(m_seq), + m_concat_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_concat_hash, m_concat_eq) { + } + + seq_plugin::~seq_plugin() { + if (m_sg_owned) + dealloc(&m_sg); + } + + void seq_plugin::register_node(enode* n) { + m_queue.push_back(n); + push_undo(undo_kind::undo_add_concat); + } + + void seq_plugin::merge_eh(enode* n1, enode* n2) { + m_queue.push_back(enode_pair(n1, n2)); + push_undo(undo_kind::undo_add_concat); + } + + void seq_plugin::push_undo(undo_kind k) { + m_undo.push_back(k); + push_plugin_undo(get_id()); + } + + void seq_plugin::propagate() { + if (m_qhead == m_queue.size()) + return; + for (; m_qhead < m_queue.size(); ++m_qhead) { + if (g.inconsistent()) + break; + if (std::holds_alternative(m_queue[m_qhead])) { + auto n = std::get(m_queue[m_qhead]); + propagate_register_node(n); + } + else { + auto [a, b] = std::get(m_queue[m_qhead]); + propagate_merge(a, b); + } + } + } + + void seq_plugin::propagate_register_node(enode* n) { + if (!m_seq.is_seq(n->get_expr()) && !m_seq.is_re(n->get_expr())) + return; + + TRACE(seq, tout << "seq register " << g.bpp(n) << "\n"); + + if (is_concat(n)) { + propagate_assoc(n); + propagate_simplify(n); + } + + // str.++ identity: concat(a, ε) = a, concat(ε, b) = b + enode* a, *b; + if (is_str_concat(n, a, b)) { + if (is_str_empty(a)) + push_merge(n, b); + else if (is_str_empty(b)) + push_merge(n, a); + } + + // re.++ identity: concat(a, epsilon) = a, concat(epsilon, b) = b + // re.++ absorption: concat(a, ∅) = ∅, concat(∅, b) = ∅ + if (is_re_concat(n, a, b)) { + if (is_re_epsilon(a)) + push_merge(n, b); + else if (is_re_epsilon(b)) + push_merge(n, a); + else if (is_re_empty(a)) + push_merge(n, a); + else if (is_re_empty(b)) + push_merge(n, b); + } + } + + void seq_plugin::propagate_merge(enode* a, enode* b) { + if (!m_seq.is_seq(a->get_expr()) && !m_seq.is_re(a->get_expr())) + return; + + TRACE(seq, tout << "seq merge " << g.bpp(a) << " == " << g.bpp(b) << "\n"); + + // when equivalence classes merge, re-check concat simplifications + for (enode* n : enode_class(a)) { + if (is_concat(n)) + propagate_simplify(n); + } + } + + // + // Concat associativity: + // Instead of creating new expressions, maintain a hash table + // that respects associativity. When a concat is registered, + // look up existing concats with the same leaf sequence. + // If found, merge the existing node with the new one. + // + void seq_plugin::propagate_assoc(enode* n) { + if (!is_concat(n)) + return; + + enode* existing = nullptr; + if (m_concat_table.find(n, existing)) { + if (existing != n) + push_merge(n, existing); + } + else { + m_concat_table.insert(n); + m_concats.push_back(n); + push_undo(undo_kind::undo_add_to_table); + } + } + + // + // Concat simplification rules from ZIPT: + // + // 1. Kleene star merging: concat(u, v*, v*, w) = concat(u, v*, w) + // when adjacent children in a concat chain have congruent star bodies. + // + // 2. Nullable absorption: concat(u, .*, v, w) = concat(u, .*, w) + // when v is nullable and adjacent to full_seq (.*). + // + void seq_plugin::propagate_simplify(enode* n) { + enode* a, *b; + if (!is_concat(n, a, b)) + return; + + // Rule 1: Kleene star merging + // concat(v*, v*) = v* + if (same_star_body(a, b)) + push_merge(n, a); + + // Rule 1 extended (right): concat(v*, concat(v*, c)) = concat(v*, c) + enode* b1, *b2; + if (is_concat(b, b1, b2) && same_star_body(a, b1)) + push_merge(n, b); + + // Rule 1 extended (left): concat(concat(c, v*), v*) = concat(c, v*) + enode* a1, *a2; + if (is_concat(a, a1, a2) && same_star_body(a2, b)) + push_merge(n, a); + + // Rule 2: Nullable absorption by .* + // concat(.*, v) = .* when v is nullable + if (is_full_seq(a) && is_nullable(b)) + push_merge(n, a); + + // concat(v, .*) = .* when v is nullable + if (is_nullable(a) && is_full_seq(b)) + push_merge(n, b); + + // concat(.*, concat(v, w)) = concat(.*, w) when v nullable + // handled by associativity + nullable absorption on sub-concats + + // concat(concat(u, v), .*) = concat(u, .*) when v nullable + // handled by associativity + nullable absorption on sub-concats + } + + bool seq_plugin::is_nullable(expr* e) { + expr_ref result = m_rewriter.is_nullable(e); + return g.get_manager().is_true(result); + } + + bool seq_plugin::same_star_body(enode* a, enode* b) { + if (!is_star(a) || !is_star(b)) + return false; + // re.star(x) and re.star(y) have congruent bodies if x ~ y + return a->get_arg(0)->get_root() == b->get_arg(0)->get_root(); + } + + bool seq_plugin::same_loop_body(enode* a, enode* b, + unsigned& lo1, unsigned& hi1, + unsigned& lo2, unsigned& hi2) { + if (!is_loop(a) || !is_loop(b)) + return false; + expr* body_a, *body_b; + if (!m_seq.re.is_loop(a->get_expr(), body_a, lo1, hi1)) + return false; + if (!m_seq.re.is_loop(b->get_expr(), body_b, lo2, hi2)) + return false; + enode* na = g.find(body_a); + enode* nb = g.find(body_b); + if (!na || !nb) + return false; + return na->get_root() == nb->get_root(); + } + + void seq_plugin::undo() { + auto k = m_undo.back(); + m_undo.pop_back(); + switch (k) { + case undo_kind::undo_add_concat: + SASSERT(!m_queue.empty()); + m_queue.pop_back(); + if (m_qhead > m_queue.size()) + m_qhead = m_queue.size(); + break; + case undo_kind::undo_add_to_table: + SASSERT(!m_concats.empty()); + m_concat_table.remove(m_concats.back()); + m_concats.pop_back(); + break; + } + } + + std::ostream& seq_plugin::display(std::ostream& out) const { + out << "seq-plugin\n"; + return out; + } + + void seq_plugin::collect_statistics(statistics& st) const { + // statistics are collected by sgraph which owns us + } +} diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h new file mode 100644 index 000000000..8f72a6837 --- /dev/null +++ b/src/ast/euf/euf_seq_plugin.h @@ -0,0 +1,175 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_seq_plugin.h + +Abstract: + + Plugin structure for sequences/strings. + + Merges equivalence classes taking into account associativity + of concatenation and algebraic properties of strings and + regular expressions. Implements features from ZIPT: + + -- Concat associativity: str.++ is associative, so + concat(a, concat(b, c)) = concat(concat(a, b), c). + Handled via an AC-style plugin for the concat operator. + + -- Kleene star merging: adjacent identical Kleene stars + in a concatenation are collapsed, u.v*.v*.w = u.v*.w + + -- Loop merging: adjacent loops over the same body are + merged, v{l1,h1}.v{l2,h2} = v{l1+l2,h1+h2} + + -- Nullable absorption: a nullable token adjacent to .* + is absorbed, u.*.v.w = u.*.w when v is nullable. + + The plugin integrates with euf_egraph for congruence closure. + Node registration in sgraph is handled by sgraph itself via + the egraph's on_make callback, not by the plugin. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 + +--*/ + +#pragma once + +#include "ast/seq_decl_plugin.h" +#include "ast/rewriter/seq_rewriter.h" +#include "ast/euf/euf_plugin.h" +#include "util/hashtable.h" + +namespace euf { + + class egraph; + class sgraph; + + // Associativity-respecting hash for enode concat trees. + // Uses cached snode hash matrices from the sgraph for O(1) hashing. + // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT). + struct enode_concat_hash { + seq_util const& seq; + sgraph& sg; + enode_concat_hash(seq_util const& s, sgraph& sg) : seq(s), sg(sg) {} + unsigned operator()(enode* n) const; + }; + + // Associativity-respecting equality for enode concat trees. + // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT). + struct enode_concat_eq { + seq_util const& seq; + enode_concat_eq(seq_util const& s) : seq(s) {} + bool operator()(enode* a, enode* b) const; + }; + + class seq_plugin : public plugin { + + enum class undo_kind { + undo_add_concat, + undo_add_to_table, + }; + + seq_util m_seq; + seq_rewriter m_rewriter; + sgraph& m_sg; + bool m_sg_owned = false; // whether we own the sgraph + svector m_undo; + + // queue of merges and registrations to process + vector> m_queue; + unsigned m_qhead = 0; + + // track registered concat nodes for simplification + enode_vector m_concats; + + // associativity-respecting hash table for concat nodes + enode_concat_hash m_concat_hash; + enode_concat_eq m_concat_eq; + hashtable m_concat_table; + + // string concat predicates + bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } + bool is_str_concat(enode* n, enode*& a, enode*& b) { + expr* ea = nullptr, *eb = nullptr; + return m_seq.str.is_concat(n->get_expr(), ea, eb) && + n->num_args() == 2 && + (a = n->get_arg(0), b = n->get_arg(1), true); + } + + // regex concat predicates + bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); } + bool is_re_concat(enode* n, enode*& a, enode*& b) { + expr* ea = nullptr, *eb = nullptr; + return m_seq.re.is_concat(n->get_expr(), ea, eb) && + n->num_args() == 2 && + (a = n->get_arg(0), b = n->get_arg(1), true); + } + + // any concat, string or regex + bool is_concat(enode* n) const { return is_str_concat(n) || is_re_concat(n); } + bool is_concat(enode* n, enode*& a, enode*& b) { + return is_str_concat(n, a, b) || is_re_concat(n, a, b); + } + + bool is_star(enode* n) const { return m_seq.re.is_star(n->get_expr()); } + bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); } + + // string empty: ε for str.++ + bool is_str_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); } + // regex empty set: ∅ for re.++ (absorbing element) + bool is_re_empty(enode* n) const { return m_seq.re.is_empty(n->get_expr()); } + // regex epsilon: to_re("") for re.++ (identity element) + bool is_re_epsilon(enode* n) const { return m_seq.re.is_epsilon(n->get_expr()); } + + bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); } + bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); } + + void push_undo(undo_kind k); + + void propagate_register_node(enode* n); + void propagate_merge(enode* a, enode* b); + + // concat associativity: maintain hash table of concat nodes, + // merge nodes that are equal modulo associativity + void propagate_assoc(enode* n); + + // concat simplification: + // merging Kleene stars, merging loops, absorbing nullables + void propagate_simplify(enode* n); + + // check if expression is nullable using existing seq_rewriter + bool is_nullable(expr* e); + bool is_nullable(enode* n) { return is_nullable(n->get_expr()); } + + // check if two enodes have congruent star bodies + bool same_star_body(enode* a, enode* b); + + // check if two enodes have congruent loop bodies and extract bounds + bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2); + + public: + seq_plugin(egraph& g, sgraph* sg = nullptr); + ~seq_plugin() override; + + theory_id get_id() const override { return m_seq.get_family_id(); } + + void register_node(enode* n) override; + + void merge_eh(enode* n1, enode* n2) override; + + void diseq_eh(enode*) override {} + + void propagate() override; + + void undo() override; + + std::ostream& display(std::ostream& out) const override; + + void collect_statistics(statistics& st) const override; + }; +} diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp new file mode 100644 index 000000000..ac08443c1 --- /dev/null +++ b/src/ast/euf/euf_sgraph.cpp @@ -0,0 +1,637 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_sgraph.cpp + +Abstract: + + Sequence/string graph implementation + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 + +--*/ + +#include "ast/euf/euf_sgraph.h" +#include "ast/euf/euf_seq_plugin.h" +#include "ast/arith_decl_plugin.h" +#include "ast/ast_pp.h" + +namespace euf { + + // substitution cache stored on snode for ZIPT-style optimization + struct snode_subst_cache { + struct entry { + unsigned var_id; + unsigned repl_id; + snode* result; + }; + svector m_entries; + snode* find(unsigned var_id, unsigned repl_id) const { + for (auto const& e : m_entries) + if (e.var_id == var_id && e.repl_id == repl_id) + return e.result; + return nullptr; + } + void insert(unsigned var_id, unsigned repl_id, snode* result) { + m_entries.push_back({var_id, repl_id, result}); + } + }; + + sgraph::sgraph(ast_manager& m, egraph& eg, bool add_plugin): + m(m), + m_seq(m), + m_rewriter(m), + m_egraph(eg), + m_str_sort(m_seq.str.mk_string_sort(), m), + m_add_plugin(add_plugin) { + // create seq_plugin and register it with the egraph + if (add_plugin) + m_egraph.add_plugin(alloc(seq_plugin, m_egraph, this)); + // register on_make callback so sgraph creates snodes for new enodes + std::function on_make = [this](enode* n) { + expr* e = n->get_expr(); + if (m_seq.is_seq(e) || m_seq.is_re(e)) + mk(e); + }; + m_egraph.set_on_make(on_make); + } + + sgraph::~sgraph() { + for (auto* c : m_subst_caches) + dealloc(c); + } + + snode_kind sgraph::classify(expr* e) const { + if (!is_app(e)) + return snode_kind::s_other; + + if (m_seq.str.is_empty(e)) + return snode_kind::s_empty; + + if (m_seq.str.is_string(e)) { + zstring s; + if (m_seq.str.is_string(e, s) && s.empty()) + return snode_kind::s_empty; + return snode_kind::s_other; + } + + if (m_seq.str.is_concat(e)) + return snode_kind::s_concat; + + if (m_seq.str.is_unit(e)) { + expr* ch = to_app(e)->get_arg(0); + if (m_seq.is_const_char(ch)) + return snode_kind::s_char; + return snode_kind::s_unit; + } + + if (m_seq.str.is_power(e)) + return snode_kind::s_power; + + if (m_seq.re.is_star(e)) + return snode_kind::s_star; + + if (m_seq.re.is_loop(e)) + return snode_kind::s_loop; + + if (m_seq.re.is_union(e)) + return snode_kind::s_union; + + if (m_seq.re.is_intersection(e)) + return snode_kind::s_intersect; + + if (m_seq.re.is_complement(e)) + return snode_kind::s_complement; + + if (m_seq.re.is_empty(e)) + return snode_kind::s_fail; + + if (m_seq.re.is_full_char(e)) + return snode_kind::s_full_char; + + if (m_seq.re.is_full_seq(e)) + return snode_kind::s_full_seq; + + if (m_seq.re.is_to_re(e)) + return snode_kind::s_to_re; + + if (m_seq.str.is_in_re(e)) + return snode_kind::s_in_re; + + // uninterpreted constants of string sort are variables + if (is_uninterp_const(e) && m_seq.is_seq(e->get_sort())) + return snode_kind::s_var; + + return snode_kind::s_other; + } + + void sgraph::compute_metadata(snode* n) { + switch (n->m_kind) { + case snode_kind::s_empty: + n->m_ground = true; + n->m_regex_free = true; + n->m_nullable = true; + n->m_level = 0; + n->m_length = 0; + break; + + case snode_kind::s_char: + n->m_ground = true; + n->m_regex_free = true; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_var: + n->m_ground = false; + n->m_regex_free = true; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_unit: + n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true; + n->m_regex_free = true; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_concat: { + SASSERT(n->num_args() == 2); + snode* l = n->arg(0); + snode* r = n->arg(1); + n->m_ground = l->is_ground() && r->is_ground(); + n->m_regex_free = l->is_regex_free() && r->is_regex_free(); + n->m_nullable = l->is_nullable() && r->is_nullable(); + n->m_level = std::max(l->level(), r->level()) + 1; + n->m_length = l->length() + r->length(); + ++m_stats.m_num_concat; + break; + } + + case snode_kind::s_power: { + // s^n: nullable follows base, consistent with ZIPT's PowerToken + // the exponent n is assumed to be a symbolic integer, may or may not be zero + SASSERT(n->num_args() >= 1); + snode* base = n->arg(0); + n->m_ground = base->is_ground(); + n->m_regex_free = base->is_regex_free(); + n->m_nullable = base->is_nullable(); + n->m_level = 1; + n->m_length = 1; + ++m_stats.m_num_power; + break; + } + + case snode_kind::s_star: + SASSERT(n->num_args() == 1); + n->m_ground = n->arg(0)->is_ground(); + n->m_regex_free = false; + n->m_nullable = true; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_loop: { + bool base_nullable = n->num_args() > 0 && n->arg(0)->is_nullable(); + unsigned lo = 0, hi = 0; + expr* body = nullptr; + bool lo_zero = n->get_expr() && m_seq.re.is_loop(n->get_expr(), body, lo, hi) && lo == 0; + n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true; + n->m_regex_free = false; + n->m_nullable = lo_zero || base_nullable; + n->m_level = 1; + n->m_length = 1; + break; + } + + case snode_kind::s_union: + SASSERT(n->num_args() == 2); + n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground(); + n->m_regex_free = false; + n->m_nullable = n->arg(0)->is_nullable() || n->arg(1)->is_nullable(); + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_intersect: + SASSERT(n->num_args() == 2); + n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground(); + n->m_regex_free = false; + n->m_nullable = n->arg(0)->is_nullable() && n->arg(1)->is_nullable(); + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_complement: + SASSERT(n->num_args() == 1); + n->m_ground = n->arg(0)->is_ground(); + n->m_regex_free = false; + n->m_nullable = !n->arg(0)->is_nullable(); + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_fail: + n->m_ground = true; + n->m_regex_free = false; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_full_char: + n->m_ground = true; + n->m_regex_free = false; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_full_seq: + n->m_ground = true; + n->m_regex_free = false; + n->m_nullable = true; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_to_re: + SASSERT(n->num_args() == 1); + n->m_ground = n->arg(0)->is_ground(); + n->m_regex_free = false; + n->m_nullable = n->arg(0)->is_nullable(); + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_in_re: + SASSERT(n->num_args() == 2); + n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground(); + n->m_regex_free = false; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + default: + n->m_ground = true; + n->m_regex_free = true; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + } + } + + static const unsigned HASH_BASE = 31; + + // Compute a 2x2 polynomial hash matrix for associativity-respecting hashing. + // Unsigned overflow is intentional and well-defined (mod 2^32). + // M[0][0] tracks HASH_BASE^(num_leaves), which is always nonzero since + // HASH_BASE is odd. M[0][1] is the actual hash value. + void sgraph::compute_hash_matrix(snode* n) { + if (n->is_empty()) { + // identity matrix: concat with empty is identity + n->m_hash_matrix[0][0] = 1; + n->m_hash_matrix[0][1] = 0; + n->m_hash_matrix[1][0] = 0; + n->m_hash_matrix[1][1] = 1; + } + else if (n->is_concat()) { + snode* l = n->arg(0); + snode* r = n->arg(1); + if (l->has_cached_hash() && r->has_cached_hash()) { + // 2x2 matrix multiplication: M(L) * M(R) + n->m_hash_matrix[0][0] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][0]; + n->m_hash_matrix[0][1] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][1]; + n->m_hash_matrix[1][0] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][0]; + n->m_hash_matrix[1][1] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][1]; + } + } + else { + // leaf/token: [[HASH_BASE, value], [0, 1]] + // +1 avoids zero hash values; wraps safely on unsigned overflow + unsigned v = n->get_expr() ? n->get_expr()->get_id() + 1 : n->id() + 1; + n->m_hash_matrix[0][0] = HASH_BASE; + n->m_hash_matrix[0][1] = v; + n->m_hash_matrix[1][0] = 0; + n->m_hash_matrix[1][1] = 1; + } + } + + snode* sgraph::mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args) { + unsigned id = m_nodes.size(); + snode* n = snode::mk(m_region, e, k, id, num_args, args); + compute_metadata(n); + compute_hash_matrix(n); + m_nodes.push_back(n); + if (e) { + unsigned eid = e->get_id(); + m_expr2snode.reserve(eid + 1, nullptr); + m_expr2snode[eid] = n; + // pin expression via egraph (the egraph has an expr trail) + mk_enode(e); + } + ++m_stats.m_num_nodes; + return n; + } + + snode* sgraph::mk(expr* e) { + SASSERT(e); + snode* n = find(e); + if (n) + return n; + + snode_kind k = classify(e); + + if (!is_app(e)) + return mk_snode(e, k, 0, nullptr); + + app* a = to_app(e); + unsigned arity = a->get_num_args(); + + // recursively register children + // for seq/re children, create classified snodes + // for other children (e.g. integer exponents), create s_other snodes + snode_vector child_nodes; + for (unsigned i = 0; i < arity; ++i) { + expr* ch = a->get_arg(i); + snode* cn = mk(ch); + child_nodes.push_back(cn); + } + + return mk_snode(e, k, child_nodes.size(), child_nodes.data()); + } + + snode* sgraph::find(expr* e) const { + if (!e) + return nullptr; + unsigned eid = e->get_id(); + if (eid < m_expr2snode.size()) + return m_expr2snode[eid]; + return nullptr; + } + + enode* sgraph::mk_enode(expr* e) { + enode* n = m_egraph.find(e); + if (n) return n; + enode_vector args; + if (is_app(e)) { + for (expr* arg : *to_app(e)) { + enode* a = mk_enode(arg); + args.push_back(a); + } + } + return m_egraph.mk(e, 0, args.size(), args.data()); + } + + void sgraph::push() { + m_scopes.push_back(m_nodes.size()); + ++m_num_scopes; + m_egraph.push(); + } + + void sgraph::pop(unsigned num_scopes) { + if (num_scopes == 0) + return; + SASSERT(num_scopes <= m_num_scopes); + unsigned new_lvl = m_num_scopes - num_scopes; + unsigned old_sz = m_scopes[new_lvl]; + for (unsigned i = m_nodes.size(); i-- > old_sz; ) { + snode* n = m_nodes[i]; + if (n->get_expr()) { + unsigned eid = n->get_expr()->get_id(); + if (eid < m_expr2snode.size()) + m_expr2snode[eid] = nullptr; + } + } + m_nodes.shrink(old_sz); + m_scopes.shrink(new_lvl); + m_num_scopes = new_lvl; + m_egraph.pop(num_scopes); + } + + snode* sgraph::mk_var(symbol const& name) { + expr_ref e(m.mk_const(name, m_str_sort), m); + return mk(e); + } + + snode* sgraph::mk_char(unsigned ch) { + expr_ref c(m_seq.str.mk_char(ch), m); + expr_ref u(m_seq.str.mk_unit(c), m); + return mk(u); + } + + snode* sgraph::mk_empty() { + expr_ref e(m_seq.str.mk_empty(m_str_sort), m); + return mk(e); + } + + snode* sgraph::mk_concat(snode* a, snode* b) { + if (a->is_empty()) return b; + if (b->is_empty()) return a; + expr_ref e(m_seq.str.mk_concat(a->get_expr(), b->get_expr()), m); + return mk(e); + } + + snode* sgraph::drop_first(snode* n) { + if (n->is_empty() || n->is_token()) + return mk_empty(); + SASSERT(n->is_concat()); + snode* l = n->arg(0); + snode* r = n->arg(1); + if (l->is_token() || l->is_empty()) + return r; + return mk_concat(drop_first(l), r); + } + + snode* sgraph::drop_last(snode* n) { + if (n->is_empty() || n->is_token()) + return mk_empty(); + SASSERT(n->is_concat()); + snode* l = n->arg(0); + snode* r = n->arg(1); + if (r->is_token() || r->is_empty()) + return l; + return mk_concat(l, drop_last(r)); + } + + snode* sgraph::drop_left(snode* n, unsigned count) { + if (count == 0 || n->is_empty()) return n; + if (count >= n->length()) return mk_empty(); + for (unsigned i = 0; i < count; ++i) + n = drop_first(n); + return n; + } + + snode* sgraph::drop_right(snode* n, unsigned count) { + if (count == 0 || n->is_empty()) return n; + if (count >= n->length()) return mk_empty(); + for (unsigned i = 0; i < count; ++i) + n = drop_last(n); + return n; + } + + snode* sgraph::subst(snode* n, snode* var, snode* replacement) { + if (n == var) + return replacement; + if (n->is_empty() || n->is_char()) + return n; + if (n->is_concat()) { + // check substitution cache (ZIPT-style optimization) + if (n->m_subst_cache) { + snode* cached = n->m_subst_cache->find(var->id(), replacement->id()); + if (cached) + return cached; + } + snode* result = mk_concat(subst(n->arg(0), var, replacement), + subst(n->arg(1), var, replacement)); + // cache the result + if (!n->m_subst_cache) { + n->m_subst_cache = alloc(snode_subst_cache); + m_subst_caches.push_back(n->m_subst_cache); + } + n->m_subst_cache->insert(var->id(), replacement->id(), result); + return result; + } + // for non-concat compound nodes (power, star, etc.), no substitution into children + return n; + } + + snode* sgraph::brzozowski_deriv(snode* re, snode* elem) { + expr* re_expr = re->get_expr(); + expr* elem_expr = elem->get_expr(); + if (!re_expr || !elem_expr) + return nullptr; + // unwrap str.unit to get the character expression + expr* ch = nullptr; + if (m_seq.str.is_unit(elem_expr, ch)) + elem_expr = ch; + expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr); + if (!result) + return nullptr; + return mk(result); + } + + void sgraph::collect_re_predicates(snode* re, expr_ref_vector& preds) { + if (!re || !re->get_expr()) + return; + expr* e = re->get_expr(); + expr* ch = nullptr, *lo = nullptr, *hi = nullptr; + // leaf regex predicates: character ranges and single characters + if (m_seq.re.is_range(e, lo, hi)) { + preds.push_back(e); + return; + } + if (m_seq.re.is_to_re(e)) + return; + if (m_seq.re.is_full_char(e)) + return; + if (m_seq.re.is_full_seq(e)) + return; + if (m_seq.re.is_empty(e)) + return; + // recurse into compound regex operators + for (unsigned i = 0; i < re->num_args(); ++i) + collect_re_predicates(re->arg(i), preds); + } + + void sgraph::compute_minterms(snode* re, snode_vector& minterms) { + // extract character predicates from the regex + expr_ref_vector preds(m); + collect_re_predicates(re, preds); + if (preds.empty()) { + // no predicates means the whole alphabet is one minterm + // represented by full_char + expr_ref fc(m_seq.re.mk_full_char(m_str_sort), m); + minterms.push_back(mk(fc)); + return; + } + // generate minterms as conjunctions/negations of predicates + // for n predicates, there are up to 2^n minterms + unsigned n = preds.size(); + // cap at reasonable size to prevent exponential blowup + if (n > 20) + n = 20; + for (unsigned mask = 0; mask < (1u << n); ++mask) { + expr_ref_vector conj(m); + for (unsigned i = 0; i < n; ++i) { + if (mask & (1u << i)) + conj.push_back(preds.get(i)); + else + conj.push_back(m_seq.re.mk_complement(preds.get(i))); + } + SASSERT(!conj.empty()); + // intersect all terms + expr_ref mt(conj.get(0), m); + for (unsigned i = 1; i < conj.size(); ++i) + mt = m_seq.re.mk_inter(mt, conj.get(i)); + minterms.push_back(mk(mt)); + } + } + + std::ostream& sgraph::display(std::ostream& out) const { + auto kind_str = [](snode_kind k) -> char const* { + switch (k) { + case snode_kind::s_empty: return "empty"; + case snode_kind::s_char: return "char"; + case snode_kind::s_var: return "var"; + case snode_kind::s_unit: return "unit"; + case snode_kind::s_concat: return "concat"; + case snode_kind::s_power: return "power"; + case snode_kind::s_star: return "star"; + case snode_kind::s_loop: return "loop"; + case snode_kind::s_union: return "union"; + case snode_kind::s_intersect: return "intersect"; + case snode_kind::s_complement: return "complement"; + case snode_kind::s_fail: return "fail"; + case snode_kind::s_full_char: return "full_char"; + case snode_kind::s_full_seq: return "full_seq"; + case snode_kind::s_to_re: return "to_re"; + case snode_kind::s_in_re: return "in_re"; + case snode_kind::s_other: return "other"; + } + return "?"; + }; + for (snode* n : m_nodes) { + out << "snode[" << n->id() << "] " + << kind_str(n->kind()) + << " level=" << n->level() + << " len=" << n->length() + << " ground=" << n->is_ground() + << " rfree=" << n->is_regex_free() + << " nullable=" << n->is_nullable(); + if (n->num_args() > 0) { + out << " args=("; + for (unsigned i = 0; i < n->num_args(); ++i) { + if (i > 0) out << ", "; + out << n->arg(i)->id(); + } + out << ")"; + } + if (n->get_expr()) + out << " expr=" << mk_pp(n->get_expr(), m); + out << "\n"; + } + return out; + } + + void sgraph::collect_statistics(statistics& st) const { + st.update("seq-graph-nodes", m_stats.m_num_nodes); + st.update("seq-graph-concat", m_stats.m_num_concat); + st.update("seq-graph-power", m_stats.m_num_power); + st.update("seq-graph-hash-hits", m_stats.m_num_hash_hits); + m_egraph.collect_statistics(st); + } + +} + diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h new file mode 100644 index 000000000..4d7231924 --- /dev/null +++ b/src/ast/euf/euf_sgraph.h @@ -0,0 +1,160 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_sgraph.h + +Abstract: + + Sequence/string graph layer + + Encapsulates string and regex expressions for the string solver. + Implements the string graph layer from ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT). + The sgraph maps Z3 sequence/regex AST expressions to snode structures + organized as binary concatenation trees with metadata, and owns an + egraph with a seq_plugin for congruence closure. + + -- snode classification: empty, char, variable, unit, concat, power, + star, loop, union, intersection, complement, fail, full_char, + full_seq, to_re, in_re, other. + -- Metadata computation: ground, regex_free, nullable, level, length. + -- Expression registration via mk(expr*), lookup via find(expr*). + -- Scope management: push/pop with backtracking. + -- egraph ownership with seq_plugin for: + * concat associativity via associativity-respecting hash table, + * Kleene star merging (u.v*.v*.w = u.v*.w), + * nullable absorption next to .* (u.*.v.w = u.*.w when v nullable), + * str.++ identity elimination (concat(a, ε) = a), + * re.++ identity/absorption (concat(a, epsilon) = a, concat(a, ∅) = ∅). + -- enode registration via mk_enode(expr*). + + ZIPT features not yet ported: + + -- Str operations: normalisation with union-find representatives and + cache migration, balanced tree maintenance, drop left/right with + caching, substitution, indexed access, iteration, ToList caching, + simplification, derivative computation, structural equality with + associative hashing, rotation equality, expression reconstruction, + Graphviz export. + + -- StrToken subclasses: SymCharToken, StrAtToken, SubStrToken, + SetToken, PostToken/PreToken. + + -- StrToken features: Nielsen-style GetDecomposition with side + constraints, NamedStrToken extension tracking for variable + splitting with PowerExtension, CollectSymbols for Parikh analysis, + MinTerms for character class analysis, token ordering, Derivable + and BasicRegex flags. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 + +--*/ + +#pragma once + +#include "util/region.h" +#include "util/statistics.h" +#include "ast/ast.h" +#include "ast/seq_decl_plugin.h" +#include "ast/rewriter/seq_rewriter.h" +#include "ast/euf/euf_snode.h" +#include "ast/euf/euf_egraph.h" + +namespace euf { + + class seq_plugin; + + class sgraph { + + struct stats { + unsigned m_num_nodes; + unsigned m_num_concat; + unsigned m_num_power; + unsigned m_num_hash_hits; + stats() { reset(); } + void reset() { memset(this, 0, sizeof(*this)); } + }; + + ast_manager& m; + seq_util m_seq; + seq_rewriter m_rewriter; + egraph& m_egraph; + region m_region; + snode_vector m_nodes; + sort_ref m_str_sort; // cached string sort + unsigned_vector m_scopes; + unsigned m_num_scopes = 0; + stats m_stats; + bool m_add_plugin; // whether sgraph created the seq_plugin + + // tracks allocated subst caches for cleanup + ptr_vector m_subst_caches; + + // maps expression id to snode + ptr_vector m_expr2snode; + + snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); + snode_kind classify(expr* e) const; + void compute_metadata(snode* n); + void compute_hash_matrix(snode* n); + void collect_re_predicates(snode* re, expr_ref_vector& preds); + + public: + sgraph(ast_manager& m, egraph& eg, bool add_plugin = true); + ~sgraph(); + + ast_manager& get_manager() const { return m; } + seq_util& get_seq_util() { return m_seq; } + egraph& get_egraph() { return m_egraph; } + egraph const& get_egraph() const { return m_egraph; } + + // register an expression and return its snode + snode* mk(expr* e); + + // lookup an already-registered expression + snode* find(expr* e) const; + + // register expression in both sgraph and egraph + enode* mk_enode(expr* e); + + // factory methods for creating snodes with corresponding expressions + snode* mk_var(symbol const& name); + snode* mk_char(unsigned ch); + snode* mk_empty(); + snode* mk_concat(snode* a, snode* b); + + // drop operations: remove tokens from the front/back of a concat tree + snode* drop_first(snode* n); + snode* drop_last(snode* n); + snode* drop_left(snode* n, unsigned count); + snode* drop_right(snode* n, unsigned count); + + // substitution: replace all occurrences of var in n by replacement + snode* subst(snode* n, snode* var, snode* replacement); + + // Brzozowski derivative of regex re with respect to element elem + snode* brzozowski_deriv(snode* re, snode* elem); + + // compute minterms (character class partition) from a regex + void compute_minterms(snode* re, snode_vector& minterms); + + // scope management for backtracking + void push(); + void pop(unsigned num_scopes); + + // access + snode_vector const& nodes() const { return m_nodes; } + unsigned num_nodes() const { return m_nodes.size(); } + + // display + std::ostream& display(std::ostream& out) const; + + void collect_statistics(statistics& st) const; + }; + +} + diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h new file mode 100644 index 000000000..955dd3788 --- /dev/null +++ b/src/ast/euf/euf_snode.h @@ -0,0 +1,202 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_snode.h + +Abstract: + + snode layer for sequence/string graph + + Encapsulates strings in the style of euf_enode.h. + Maps Z3 sequence expressions to a ZIPT-style representation where + strings are composed of tokens (characters, variables, powers, regex, etc.) + organized as a binary tree of concatenations. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 + +--*/ + +#pragma once + +#include "util/vector.h" +#include "util/region.h" +#include "ast/ast.h" +#include "ast/seq_decl_plugin.h" + +namespace euf { + + class sgraph; + class snode; + struct snode_subst_cache; + + typedef ptr_vector snode_vector; + + enum class snode_kind { + s_empty, // empty string (OP_SEQ_EMPTY or empty string constant) + s_char, // concrete character unit (OP_SEQ_UNIT wrapping a char literal) + s_var, // string variable (uninterpreted constant of string sort) + s_unit, // generic unit (OP_SEQ_UNIT with non-literal character) + s_concat, // concatenation of two snodes (OP_SEQ_CONCAT) + s_power, // string exponentiation s^n (OP_SEQ_POWER) + s_star, // Kleene star r* (OP_RE_STAR) + s_loop, // bounded loop r{lo,hi} (OP_RE_LOOP) + s_union, // union r1|r2 (OP_RE_UNION) + s_intersect, // intersection r1&r2 (OP_RE_INTERSECT) + s_complement, // complement ~r (OP_RE_COMPLEMENT) + s_fail, // empty language (OP_RE_EMPTY_SET) + s_full_char, // full character set (OP_RE_FULL_CHAR_SET) + s_full_seq, // full sequence set r=.* (OP_RE_FULL_SEQ_SET) + s_to_re, // string to regex (OP_SEQ_TO_RE) + s_in_re, // regex membership (OP_SEQ_IN_RE) + s_other, // other sequence expression not directly classified + }; + + class snode { + expr* m_expr = nullptr; + snode_kind m_kind = snode_kind::s_other; + unsigned m_id = UINT_MAX; + unsigned m_num_args = 0; + + // metadata flags, analogous to ZIPT's Str/StrToken properties + bool m_ground = true; // no uninterpreted string variables + bool m_regex_free = true; // no regex constructs + bool m_nullable = false; // accepts the empty string + unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons) + unsigned m_length = 0; // token count, number of leaf tokens in the tree + + // hash matrix for associativity-respecting hashing (2x2 polynomial hash matrix) + // all zeros means not cached, non-zero means cached + unsigned m_hash_matrix[2][2] = {{0,0},{0,0}}; + + // substitution cache (lazy-initialized, owned by sgraph) + snode_subst_cache* m_subst_cache = nullptr; + + snode* m_args[0]; // variable-length array, allocated via get_snode_size(num_args) + + friend class sgraph; + + static unsigned get_snode_size(unsigned num_args) { + return sizeof(snode) + num_args * sizeof(snode*); + } + + static snode* mk(region& r, expr* e, snode_kind k, unsigned id, unsigned num_args, snode* const* args) { + void* mem = r.allocate(get_snode_size(num_args)); + snode* n = new (mem) snode(); + n->m_expr = e; + n->m_kind = k; + n->m_id = id; + n->m_num_args = num_args; + for (unsigned i = 0; i < num_args; ++i) + n->m_args[i] = args[i]; + return n; + } + + public: + expr* get_expr() const { return m_expr; } + snode_kind kind() const { return m_kind; } + unsigned id() const { return m_id; } + unsigned num_args() const { return m_num_args; } + snode* arg(unsigned i) const { SASSERT(i < m_num_args); return m_args[i]; } + + bool is_ground() const { return m_ground; } + bool is_regex_free() const { return m_regex_free; } + bool is_nullable() const { return m_nullable; } + unsigned level() const { return m_level; } + unsigned length() const { return m_length; } + + // associativity-respecting hash: cached if the 2x2 matrix is non-zero. + // M[0][0] = HASH_BASE^(num_leaves) which is always nonzero since HASH_BASE + // is odd and gcd(odd, 2^32) = 1, so the check is safe. + bool has_cached_hash() const { return m_hash_matrix[0][0] != 0; } + unsigned assoc_hash() const { return m_hash_matrix[0][1]; } + + bool is_empty() const { return m_kind == snode_kind::s_empty; } + bool is_char() const { return m_kind == snode_kind::s_char; } + bool is_var() const { return m_kind == snode_kind::s_var; } + bool is_unit() const { return m_kind == snode_kind::s_unit; } + bool is_concat() const { return m_kind == snode_kind::s_concat; } + bool is_power() const { return m_kind == snode_kind::s_power; } + bool is_star() const { return m_kind == snode_kind::s_star; } + bool is_loop() const { return m_kind == snode_kind::s_loop; } + bool is_union() const { return m_kind == snode_kind::s_union; } + bool is_intersect() const { return m_kind == snode_kind::s_intersect; } + bool is_complement() const { return m_kind == snode_kind::s_complement; } + bool is_fail() const { return m_kind == snode_kind::s_fail; } + bool is_full_char() const { return m_kind == snode_kind::s_full_char; } + bool is_full_seq() const { return m_kind == snode_kind::s_full_seq; } + bool is_to_re() const { return m_kind == snode_kind::s_to_re; } + bool is_in_re() const { return m_kind == snode_kind::s_in_re; } + + // is this a leaf token (analogous to ZIPT's StrToken as opposed to Str) + bool is_token() const { + switch (m_kind) { + case snode_kind::s_empty: + case snode_kind::s_concat: + return false; + default: + return true; + } + } + + // analogous to ZIPT's Str.First / Str.Last + snode const* first() const { + snode const* s = this; + while (s->is_concat()) + s = s->arg(0); + return s; + } + + snode const* last() const { + snode const* s = this; + while (s->is_concat()) + s = s->arg(1); + return s; + } + + snode* first() { + snode* s = this; + while (s->is_concat()) + s = s->arg(0); + return s; + } + + snode* last() { + snode* s = this; + while (s->is_concat()) + s = s->arg(1); + return s; + } + + // collect all leaf tokens in left-to-right order + void collect_tokens(snode_vector& tokens) const { + if (is_concat()) { + arg(0)->collect_tokens(tokens); + arg(1)->collect_tokens(tokens); + } + else if (!is_empty()) { + tokens.push_back(const_cast(this)); + } + } + + // access the i-th token (0-based, left-to-right order) + // returns nullptr if i >= length() + snode* at(unsigned i) const { + if (is_concat()) { + unsigned left_len = arg(0)->length(); + if (i < left_len) + return arg(0)->at(i); + return arg(1)->at(i - left_len); + } + if (is_empty()) + return nullptr; + return i == 0 ? const_cast(this) : nullptr; + } + }; + +} + diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 634ced5c9..c20fe2175 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -222,6 +222,7 @@ void seq_decl_plugin::init() { m_sigs[OP_SEQ_NTH_I] = alloc(psig, m, "seq.nth_i", 1, 2, seqAintT, A); m_sigs[OP_SEQ_NTH_U] = alloc(psig, m, "seq.nth_u", 1, 2, seqAintT, A); m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq.len", 1, 1, &seqA, intT); + m_sigs[OP_SEQ_POWER] = alloc(psig, m, "seq.power", 1, 2, seqAintT, seqA); m_sigs[OP_RE_PLUS] = alloc(psig, m, "re.+", 1, 1, &reA, reA); m_sigs[OP_RE_STAR] = alloc(psig, m, "re.*", 1, 1, &reA, reA); m_sigs[OP_RE_OPTION] = alloc(psig, m, "re.opt", 1, 1, &reA, reA); @@ -591,6 +592,10 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p add_map_sig(); return mk_str_fun(k, arity, domain, range, k); + case OP_SEQ_POWER: + match(*m_sigs[k], arity, domain, range, rng); + return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k)); + case OP_SEQ_TO_RE: m_has_re = true; return mk_seq_fun(k, arity, domain, range, _OP_STRING_TO_REGEXP); diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 78ad6d544..73eaa9cdd 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -59,6 +59,7 @@ enum seq_op_kind { OP_SEQ_MAPI, // Array[Int,A,B] -> Int -> Seq[A] -> Seq[B] OP_SEQ_FOLDL, // Array[B,A,B] -> B -> Seq[A] -> B OP_SEQ_FOLDLI, // Array[Int,B,A,B] -> Int -> B -> Seq[A] -> B + OP_SEQ_POWER, // Seq -> Int -> Seq, string exponentiation s^n OP_RE_PLUS, OP_RE_STAR, @@ -307,6 +308,7 @@ public: app* mk_mapi(expr* f, expr* i, expr* s) const { expr* es[3] = { f, i, s }; return m.mk_app(m_fid, OP_SEQ_MAPI, 3, es); } app* mk_foldl(expr* f, expr* b, expr* s) const { expr* es[3] = { f, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDL, 3, es); } app* mk_foldli(expr* f, expr* i, expr* b, expr* s) const { expr* es[4] = { f, i, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDLI, 4, es); } + app* mk_power(expr* s, expr* n) const { expr* es[2] = { s, n }; return m.mk_app(m_fid, OP_SEQ_POWER, 2, es); } app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); } app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); } @@ -348,6 +350,7 @@ public: bool is_mapi(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_MAPI); } bool is_foldl(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_FOLDL); } bool is_foldli(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_FOLDLI); } + bool is_power(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_POWER); } bool is_extract(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_EXTRACT); } bool is_contains(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONTAINS); } bool is_at(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_AT); } @@ -404,6 +407,7 @@ public: MATCH_TERNARY(is_mapi); MATCH_TERNARY(is_foldl); MATCH_QUATARY(is_foldli); + MATCH_BINARY(is_power); MATCH_BINARY(is_last_index); MATCH_TERNARY(is_replace); MATCH_TERNARY(is_replace_re); diff --git a/src/smt/seq/CMakeLists.txt b/src/smt/seq/CMakeLists.txt new file mode 100644 index 000000000..db63e4c6f --- /dev/null +++ b/src/smt/seq/CMakeLists.txt @@ -0,0 +1,7 @@ +z3_add_component(smt_seq + SOURCES + seq_nielsen.cpp + COMPONENT_DEPENDENCIES + euf + rewriter +) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp new file mode 100644 index 000000000..90b1ffa2a --- /dev/null +++ b/src/smt/seq/seq_nielsen.cpp @@ -0,0 +1,311 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_nielsen.cpp + +Abstract: + + Nielsen graph implementation for string constraint solving. + + Ports the constraint types and Nielsen graph structures from + ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints) + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-02 + Clemens Eisenhofer 2026-03-02 + +--*/ + +#include "smt/seq/seq_nielsen.h" +#include "ast/ast_pp.h" + +namespace seq { + + // ----------------------------------------------- + // dep_tracker + // ----------------------------------------------- + + dep_tracker::dep_tracker(unsigned num_bits) { + unsigned words = (num_bits + 31) / 32; + m_bits.resize(words, 0); + } + + dep_tracker::dep_tracker(unsigned num_bits, unsigned set_bit) { + unsigned words = (num_bits + 31) / 32; + m_bits.resize(words, 0); + if (set_bit < num_bits) { + unsigned word_idx = set_bit / 32; + unsigned bit_idx = set_bit % 32; + m_bits[word_idx] = 1u << bit_idx; + } + } + + void dep_tracker::merge(dep_tracker const& other) { + if (other.m_bits.empty()) + return; + if (m_bits.size() < other.m_bits.size()) + m_bits.resize(other.m_bits.size(), 0); + for (unsigned i = 0; i < other.m_bits.size(); ++i) + m_bits[i] |= other.m_bits[i]; + } + + bool dep_tracker::is_superset(dep_tracker const& other) const { + for (unsigned i = 0; i < other.m_bits.size(); ++i) { + unsigned my_bits = (i < m_bits.size()) ? m_bits[i] : 0; + if ((my_bits & other.m_bits[i]) != other.m_bits[i]) + return false; + } + return true; + } + + bool dep_tracker::empty() const { + for (unsigned b : m_bits) + if (b != 0) return false; + return true; + } + + // ----------------------------------------------- + // str_eq + // ----------------------------------------------- + + void str_eq::sort() { + if (m_lhs && m_rhs && m_lhs->id() > m_rhs->id()) + std::swap(m_lhs, m_rhs); + } + + bool str_eq::is_trivial() const { + return m_lhs == m_rhs || + (m_lhs && m_rhs && m_lhs->is_empty() && m_rhs->is_empty()); + } + + bool str_eq::contains_var(euf::snode* var) const { + if (!var) return false; + // check if var appears in the token list of lhs or rhs + if (m_lhs) { + euf::snode_vector tokens; + m_lhs->collect_tokens(tokens); + for (euf::snode* t : tokens) + if (t == var) return true; + } + if (m_rhs) { + euf::snode_vector tokens; + m_rhs->collect_tokens(tokens); + for (euf::snode* t : tokens) + if (t == var) return true; + } + return false; + } + + // ----------------------------------------------- + // str_mem + // ----------------------------------------------- + + bool str_mem::is_primitive() const { + return m_str && m_str->length() == 1 && m_str->is_var(); + } + + bool str_mem::contains_var(euf::snode* var) const { + if (!var) return false; + if (m_str) { + euf::snode_vector tokens; + m_str->collect_tokens(tokens); + for (euf::snode* t : tokens) + if (t == var) return true; + } + return false; + } + + // ----------------------------------------------- + // nielsen_subst + // ----------------------------------------------- + + bool nielsen_subst::is_eliminating() const { + if (!m_var || !m_replacement) return true; + // check if var appears in replacement + euf::snode_vector tokens; + m_replacement->collect_tokens(tokens); + for (euf::snode* t : tokens) + if (t == m_var) return false; + return true; + } + + // ----------------------------------------------- + // nielsen_edge + // ----------------------------------------------- + + nielsen_edge::nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress): + m_src(src), m_tgt(tgt), m_is_progress(is_progress) { + } + + // ----------------------------------------------- + // nielsen_node + // ----------------------------------------------- + + nielsen_node::nielsen_node(nielsen_graph* graph, unsigned id): + m_id(id), m_graph(graph), m_is_progress(true) { + } + + void nielsen_node::clone_from(nielsen_node const& parent) { + m_str_eq.reset(); + m_str_mem.reset(); + for (auto const& eq : parent.m_str_eq) + m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep)); + for (auto const& mem : parent.m_str_mem) + m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_history, mem.m_id, mem.m_dep)); + } + + void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) { + if (!s.m_var) return; + for (unsigned i = 0; i < m_str_eq.size(); ++i) { + str_eq& eq = m_str_eq[i]; + eq.m_lhs = sg.subst(eq.m_lhs, s.m_var, s.m_replacement); + eq.m_rhs = sg.subst(eq.m_rhs, s.m_var, s.m_replacement); + eq.m_dep.merge(s.m_dep); + eq.sort(); + } + for (unsigned i = 0; i < m_str_mem.size(); ++i) { + str_mem& mem = m_str_mem[i]; + mem.m_str = sg.subst(mem.m_str, s.m_var, s.m_replacement); + // regex is typically ground, but apply subst for generality + mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement); + mem.m_dep.merge(s.m_dep); + } + } + + // ----------------------------------------------- + // nielsen_graph + // ----------------------------------------------- + + nielsen_graph::nielsen_graph(euf::sgraph& sg): + m_sg(sg) { + } + + nielsen_graph::~nielsen_graph() { + reset(); + } + + nielsen_node* nielsen_graph::mk_node() { + unsigned id = m_nodes.size(); + nielsen_node* n = alloc(nielsen_node, this, id); + m_nodes.push_back(n); + return n; + } + + nielsen_node* nielsen_graph::mk_child(nielsen_node* parent) { + nielsen_node* child = mk_node(); + child->clone_from(*parent); + return child; + } + + nielsen_edge* nielsen_graph::mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress) { + nielsen_edge* e = alloc(nielsen_edge, src, tgt, is_progress); + m_edges.push_back(e); + src->add_outgoing(e); + return e; + } + + void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) { + if (!m_root) + m_root = mk_node(); + dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1, + m_root->str_eqs().size()); + str_eq eq(lhs, rhs, dep); + eq.sort(); + m_root->add_str_eq(eq); + } + + void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) { + if (!m_root) + m_root = mk_node(); + dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1, + m_root->str_eqs().size() + m_root->str_mems().size()); + euf::snode* history = m_sg.mk_empty(); + unsigned id = next_mem_id(); + m_root->add_str_mem(str_mem(str, regex, history, id, dep)); + } + + void nielsen_graph::inc_run_idx() { + if (m_run_idx == UINT_MAX) { + for (nielsen_node* n : m_nodes) + n->reset_counter(); + m_run_idx = 1; + } + else + ++m_run_idx; + } + + void nielsen_graph::reset() { + for (nielsen_node* n : m_nodes) + dealloc(n); + for (nielsen_edge* e : m_edges) + dealloc(e); + m_nodes.reset(); + m_edges.reset(); + m_root = nullptr; + m_run_idx = 0; + m_depth_bound = 0; + m_next_mem_id = 0; + } + + std::ostream& nielsen_graph::display(std::ostream& out) const { + out << "nielsen_graph with " << m_nodes.size() << " nodes, " + << m_edges.size() << " edges\n"; + + for (nielsen_node const* n : m_nodes) { + out << " node[" << n->id() << "]"; + if (n == m_root) + out << " (root)"; + if (n->is_general_conflict()) + out << " CONFLICT"; + if (n->is_extended()) + out << " EXTENDED"; + out << "\n"; + + // display string equalities + for (auto const& eq : n->str_eqs()) { + out << " str_eq: "; + if (eq.m_lhs) out << "lhs[id=" << eq.m_lhs->id() << ",len=" << eq.m_lhs->length() << "]"; + else out << "null"; + out << " = "; + if (eq.m_rhs) out << "rhs[id=" << eq.m_rhs->id() << ",len=" << eq.m_rhs->length() << "]"; + else out << "null"; + out << "\n"; + } + + // display regex memberships + for (auto const& mem : n->str_mems()) { + out << " str_mem[" << mem.m_id << "]: "; + if (mem.m_str) out << "str[id=" << mem.m_str->id() << ",len=" << mem.m_str->length() << "]"; + else out << "null"; + out << " in "; + if (mem.m_regex) out << "re[id=" << mem.m_regex->id() << "]"; + else out << "null"; + out << "\n"; + } + + // display outgoing edges + for (nielsen_edge const* e : n->outgoing()) { + out << " -> node[" << e->tgt()->id() << "]"; + if (e->is_progress()) out << " (progress)"; + for (auto const& s : e->subst()) { + out << " {"; + if (s.m_var) out << "var[" << s.m_var->id() << "]"; + out << " -> "; + if (s.m_replacement) out << "repl[" << s.m_replacement->id() << ",len=" << s.m_replacement->length() << "]"; + else out << "eps"; + out << "}"; + } + out << "\n"; + } + + if (n->backedge()) + out << " backedge -> node[" << n->backedge()->id() << "]\n"; + } + + return out; + } + +} diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h new file mode 100644 index 000000000..10120dc6e --- /dev/null +++ b/src/smt/seq/seq_nielsen.h @@ -0,0 +1,327 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_nielsen.h + +Abstract: + + Nielsen graph for string constraint solving. + + Ports the constraint types and Nielsen graph structures from + ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints) + into Z3's smt/seq framework. + + The Nielsen graph is used for solving word equations and regex + membership constraints via Nielsen transformations. Each node + contains a set of constraints (string equalities, regex memberships, + integer equalities/inequalities) and edges represent substitutions + that transform one constraint set into another. + + Key components: + -- str_eq: string equality constraint (lhs = rhs) + -- str_mem: regex membership constraint (str in regex) + -- nielsen_subst: variable substitution (var -> replacement) + -- nielsen_edge: graph edge with substitutions and side constraints + -- nielsen_node: graph node with constraint set and outgoing edges + -- nielsen_graph: the overall Nielsen transformation graph + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-02 + Clemens Eisenhofer 2026-03-02 + +--*/ + +#pragma once + +#include "util/vector.h" +#include "util/uint_set.h" +#include "ast/ast.h" +#include "ast/seq_decl_plugin.h" +#include "ast/euf/euf_sgraph.h" + +namespace seq { + + // forward declarations + class nielsen_node; + class nielsen_edge; + class nielsen_graph; + + // simplification result for constraint processing + // mirrors ZIPT's SimplifyResult enum + enum class simplify_result { + proceed, // no change, continue + conflict, // constraint is unsatisfiable + satisfied, // constraint is trivially satisfied + restart, // constraint was simplified, restart + restart_and_satisfied, // simplified and satisfied + }; + + // reason for backtracking in the Nielsen graph + // mirrors ZIPT's BacktrackReasons enum + enum class backtrack_reason { + unevaluated, + extended, + symbol_clash, + parikh_image, + subsumption, + arithmetic, + regex, + regex_widening, + character_range, + smt, + children_failed, + }; + + // dependency tracker: bitvector tracking which input constraints + // contributed to deriving a given constraint + // mirrors ZIPT's DependencyTracker + class dep_tracker { + svector m_bits; + public: + dep_tracker() = default; + explicit dep_tracker(unsigned num_bits); + dep_tracker(unsigned num_bits, unsigned set_bit); + + void merge(dep_tracker const& other); + bool is_superset(dep_tracker const& other) const; + bool empty() const; + + bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; } + bool operator!=(dep_tracker const& other) const { return !(*this == other); } + }; + + // string equality constraint: lhs = rhs + // mirrors ZIPT's StrEq (both sides are regex-free snode trees) + struct str_eq { + euf::snode* m_lhs; + euf::snode* m_rhs; + dep_tracker m_dep; + + str_eq(): m_lhs(nullptr), m_rhs(nullptr) {} + str_eq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep): + m_lhs(lhs), m_rhs(rhs), m_dep(dep) {} + + bool operator==(str_eq const& other) const { + return m_lhs == other.m_lhs && m_rhs == other.m_rhs; + } + + // sort so that lhs <= rhs by snode id + void sort(); + + // check if both sides are empty (trivially satisfied) + bool is_trivial() const; + + // check if the constraint contains a given variable + bool contains_var(euf::snode* var) const; + }; + + // regex membership constraint: str in regex + // mirrors ZIPT's StrMem + struct str_mem { + euf::snode* m_str; + euf::snode* m_regex; + euf::snode* m_history; // tracks derivation history for cycle detection + unsigned m_id; // unique identifier + dep_tracker m_dep; + + str_mem(): m_str(nullptr), m_regex(nullptr), m_history(nullptr), m_id(UINT_MAX) {} + str_mem(euf::snode* str, euf::snode* regex, euf::snode* history, unsigned id, dep_tracker const& dep): + m_str(str), m_regex(regex), m_history(history), m_id(id), m_dep(dep) {} + + bool operator==(str_mem const& other) const { + return m_id == other.m_id && m_str == other.m_str && m_regex == other.m_regex; + } + + // check if the constraint has the form x in R with x a single variable + bool is_primitive() const; + + // check if the constraint contains a given variable + bool contains_var(euf::snode* var) const; + }; + + // string variable substitution: var -> replacement + // mirrors ZIPT's Subst + struct nielsen_subst { + euf::snode* m_var; + euf::snode* m_replacement; + dep_tracker m_dep; + + nielsen_subst(): m_var(nullptr), m_replacement(nullptr) {} + nielsen_subst(euf::snode* var, euf::snode* repl, dep_tracker const& dep): + m_var(var), m_replacement(repl), m_dep(dep) {} + + // an eliminating substitution does not contain the variable in the replacement + bool is_eliminating() const; + + bool operator==(nielsen_subst const& other) const { + return m_var == other.m_var && m_replacement == other.m_replacement; + } + }; + + // edge in the Nielsen graph connecting two nodes + // mirrors ZIPT's NielsenEdge + class nielsen_edge { + nielsen_node* m_src; + nielsen_node* m_tgt; + vector m_subst; + ptr_vector m_side_str_eq; // side constraints: string equalities + ptr_vector m_side_str_mem; // side constraints: regex memberships + bool m_is_progress; // does this edge represent progress? + public: + nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress); + + nielsen_node* src() const { return m_src; } + nielsen_node* tgt() const { return m_tgt; } + + void set_tgt(nielsen_node* tgt) { m_tgt = tgt; } + + vector const& subst() const { return m_subst; } + void add_subst(nielsen_subst const& s) { m_subst.push_back(s); } + + void add_side_str_eq(str_eq* eq) { m_side_str_eq.push_back(eq); } + void add_side_str_mem(str_mem* mem) { m_side_str_mem.push_back(mem); } + + ptr_vector const& side_str_eq() const { return m_side_str_eq; } + ptr_vector const& side_str_mem() const { return m_side_str_mem; } + + bool is_progress() const { return m_is_progress; } + + bool operator==(nielsen_edge const& other) const { + return m_src == other.m_src && m_tgt == other.m_tgt; + } + }; + + // node in the Nielsen graph + // mirrors ZIPT's NielsenNode + class nielsen_node { + friend class nielsen_graph; + + unsigned m_id; + nielsen_graph* m_graph; + + // constraints at this node + vector m_str_eq; // string equalities + vector m_str_mem; // regex memberships + + // edges + ptr_vector m_outgoing; + nielsen_node* m_backedge = nullptr; + + // status flags + bool m_is_general_conflict = false; + bool m_is_extended = false; + backtrack_reason m_reason = backtrack_reason::unevaluated; + bool m_is_progress = false; + + // evaluation index for run tracking + unsigned m_eval_idx = 0; + + public: + nielsen_node(nielsen_graph* graph, unsigned id); + + unsigned id() const { return m_id; } + nielsen_graph* graph() const { return m_graph; } + + // constraint access + vector const& str_eqs() const { return m_str_eq; } + vector& str_eqs() { return m_str_eq; } + vector const& str_mems() const { return m_str_mem; } + vector& str_mems() { return m_str_mem; } + + void add_str_eq(str_eq const& eq) { m_str_eq.push_back(eq); } + void add_str_mem(str_mem const& mem) { m_str_mem.push_back(mem); } + + // edge access + ptr_vector const& outgoing() const { return m_outgoing; } + void add_outgoing(nielsen_edge* e) { m_outgoing.push_back(e); } + + nielsen_node* backedge() const { return m_backedge; } + void set_backedge(nielsen_node* n) { m_backedge = n; } + + // status + bool is_general_conflict() const { return m_is_general_conflict; } + void set_general_conflict(bool v) { m_is_general_conflict = v; } + + bool is_extended() const { return m_is_extended; } + void set_extended(bool v) { m_is_extended = v; } + + bool is_currently_conflict() const { + return m_is_general_conflict || + (m_reason != backtrack_reason::unevaluated && m_is_extended); + } + + backtrack_reason reason() const { return m_reason; } + void set_reason(backtrack_reason r) { m_reason = r; } + + bool is_progress() const { return m_is_progress; } + + unsigned eval_idx() const { return m_eval_idx; } + void set_eval_idx(unsigned idx) { m_eval_idx = idx; } + void reset_counter() { m_eval_idx = 0; } + + // clone constraints from a parent node + void clone_from(nielsen_node const& parent); + + // apply a substitution to all constraints + void apply_subst(euf::sgraph& sg, nielsen_subst const& s); + }; + + // the overall Nielsen transformation graph + // mirrors ZIPT's NielsenGraph + class nielsen_graph { + euf::sgraph& m_sg; + region m_region; + ptr_vector m_nodes; + ptr_vector m_edges; + nielsen_node* m_root = nullptr; + unsigned m_run_idx = 0; + unsigned m_depth_bound = 0; + unsigned m_next_mem_id = 0; + + public: + nielsen_graph(euf::sgraph& sg); + ~nielsen_graph(); + + euf::sgraph& sg() { return m_sg; } + + // node management + nielsen_node* mk_node(); + nielsen_node* mk_child(nielsen_node* parent); + + // edge management + nielsen_edge* mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress); + + // root node access + nielsen_node* root() const { return m_root; } + void set_root(nielsen_node* n) { m_root = n; } + + // add constraints to the root node from external solver + void add_str_eq(euf::snode* lhs, euf::snode* rhs); + void add_str_mem(euf::snode* str, euf::snode* regex); + + // run management + unsigned run_idx() const { return m_run_idx; } + void inc_run_idx(); + + // access all nodes + ptr_vector const& nodes() const { return m_nodes; } + unsigned num_nodes() const { return m_nodes.size(); } + + // depth bound for iterative deepening + unsigned depth_bound() const { return m_depth_bound; } + void set_depth_bound(unsigned d) { m_depth_bound = d; } + + // generate next unique regex membership id + unsigned next_mem_id() { return m_next_mem_id++; } + + // display for debugging + std::ostream& display(std::ostream& out) const; + + // reset all nodes and state + void reset(); + }; + +} diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 1d5b5ce18..b0d0a6a1e 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -3,7 +3,7 @@ add_subdirectory(lp) ################################################################################ # z3-test executable ################################################################################ -set(z3_test_deps api fuzzing simplex) +set(z3_test_deps api fuzzing simplex smt_seq) z3_expand_dependencies(z3_test_expanded_deps ${z3_test_deps}) set (z3_test_extra_object_files "") foreach (component ${z3_test_expanded_deps}) @@ -51,6 +51,8 @@ add_executable(test-z3 escaped.cpp euf_bv_plugin.cpp euf_arith_plugin.cpp + euf_sgraph.cpp + euf_seq_plugin.cpp ex.cpp expr_rand.cpp expr_substitution.cpp @@ -129,6 +131,7 @@ add_executable(test-z3 simplifier.cpp sls_test.cpp sls_seq_plugin.cpp + seq_nielsen.cpp small_object_allocator.cpp smt2print_parse.cpp smt_context.cpp diff --git a/src/test/euf_seq_plugin.cpp b/src/test/euf_seq_plugin.cpp new file mode 100644 index 000000000..4beaa012e --- /dev/null +++ b/src/test/euf_seq_plugin.cpp @@ -0,0 +1,241 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +--*/ + +#include "util/util.h" +#include "ast/euf/euf_sgraph.h" +#include "ast/euf/euf_seq_plugin.h" +#include "ast/euf/euf_egraph.h" +#include "ast/reg_decl_plugins.h" +#include "ast/ast_pp.h" +#include + +static unsigned s_var = 0; + +static euf::enode* get_node(euf::egraph& g, seq_util& seq, expr* e) { + auto* n = g.find(e); + if (n) return n; + euf::enode_vector args; + if (is_app(e)) + for (expr* arg : *to_app(e)) + args.push_back(get_node(g, seq, arg)); + n = g.mk(e, 0, args.size(), args.data()); + if (seq.is_seq(e) || seq.is_re(e)) + g.add_th_var(n, ++s_var, seq.get_family_id()); + return n; +} + +// test sgraph: basic classification and metadata +static void test_sgraph_basic() { + std::cout << "test_sgraph_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + expr_ref xy(seq.str.mk_concat(x, y), m); + + euf::snode* sx = sg.mk(x); + SASSERT(sx); + SASSERT(sx->is_var()); + SASSERT(!sx->is_ground()); + SASSERT(sx->is_regex_free()); + SASSERT(!sx->is_nullable()); + SASSERT(sx->length() == 1); + + euf::snode* se = sg.mk(empty); + SASSERT(se); + SASSERT(se->is_empty()); + SASSERT(se->is_ground()); + SASSERT(se->is_nullable()); + SASSERT(se->length() == 0); + + euf::snode* sxy = sg.mk(xy); + SASSERT(sxy); + SASSERT(sxy->is_concat()); + SASSERT(!sxy->is_ground()); + SASSERT(sxy->length() == 2); + SASSERT(sxy->num_args() == 2); + + std::cout << "sgraph:\n"; + sg.display(std::cout); + std::cout << "\n"; +} + +// test sgraph: backtracking with push/pop +static void test_sgraph_backtrack() { + std::cout << "test_sgraph_backtrack\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + + sg.mk(x); + unsigned before = sg.num_nodes(); + + sg.push(); + + expr_ref xy(seq.str.mk_concat(x, y), m); + sg.mk(xy); + SASSERT(sg.num_nodes() > before); + + sg.pop(1); + // y and xy were created inside the scope, so some nodes should be removed + // x was created before the scope, so it should persist + SASSERT(sg.find(x)); +} + +// test seq_plugin: concat associativity is normalized by the plugin +static void test_seq_plugin_assoc() { + std::cout << "test_seq_plugin_assoc\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + // register nodes in egraph + // concat(concat(a,b),c) should be merged with concat(a,concat(b,c)) + expr_ref ab(seq.str.mk_concat(a, b), m); + expr_ref ab_c(seq.str.mk_concat(ab, c), m); + + euf::enode* nab_c = get_node(g, seq, ab_c); + g.propagate(); + + // the plugin should have created a right-associated form and merged + std::cout << g << "\n"; +} + +// test seq_plugin: empty string elimination +static void test_seq_plugin_empty() { + std::cout << "test_seq_plugin_empty\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + expr_ref xe(seq.str.mk_concat(x, empty), m); + + auto* nxe = get_node(g, seq, xe); + auto* nx = g.find(x); + g.propagate(); + + // concat(x, empty) should be merged with x + SASSERT(nxe->get_root() == nx->get_root()); + std::cout << g << "\n"; +} + +// test seq_plugin: Kleene star merging +// The seq_plugin detects when star bodies are congruent +// This tests the same_star_body logic at the regex level +static void test_seq_plugin_star_merge() { + std::cout << "test_seq_plugin_star_merge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + sort_ref re_sort(seq.re.mk_re(str_sort), m); + + expr_ref x(m.mk_const("x", str_sort), m); + // re.star(to_re(x)) + expr_ref to_re_x(seq.re.mk_to_re(x), m); + expr_ref star_x(seq.re.mk_star(to_re_x), m); + // use regex concat for star * star + expr_ref star_star(seq.re.mk_concat(star_x, star_x), m); + + // register in sgraph + sg.mk(star_star); + euf::snode* s = sg.find(star_x); + SASSERT(s && s->is_star()); + SASSERT(s->is_nullable()); + + std::cout << g << "\n"; +} + +// test seq_plugin: nullable absorption by .* +// concat(.*, nullable) should merge with .* +static void test_seq_plugin_nullable_absorb() { + std::cout << "test_seq_plugin_nullable_absorb\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + // concat(x, empty) = x (empty is nullable, exercises nullable check) + expr_ref xe(seq.str.mk_concat(x, empty), m); + + auto* nxe = get_node(g, seq, xe); + auto* nx = g.find(x); + g.propagate(); + + // concat(x, empty) should be merged with x (empty elimination) + SASSERT(nxe->get_root() == nx->get_root()); + std::cout << g << "\n"; +} + +// test sgraph owns egraph and syncs push/pop +static void test_sgraph_egraph_sync() { + std::cout << "test_sgraph_egraph_sync\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + auto* nx = get_node(g, seq, x); + auto* ny = get_node(g, seq, y); + + sg.push(); + + g.merge(nx, ny, nullptr); + g.propagate(); + SASSERT(nx->get_root() == ny->get_root()); + + sg.pop(1); + // after pop, the merge should be undone + SASSERT(nx->get_root() != ny->get_root()); +} + +void tst_euf_seq_plugin() { + s_var = 0; test_sgraph_basic(); + s_var = 0; test_sgraph_backtrack(); + s_var = 0; test_seq_plugin_assoc(); + s_var = 0; test_seq_plugin_empty(); + s_var = 0; test_seq_plugin_star_merge(); + s_var = 0; test_seq_plugin_nullable_absorb(); + s_var = 0; test_sgraph_egraph_sync(); +} diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp new file mode 100644 index 000000000..569e8fd75 --- /dev/null +++ b/src/test/euf_sgraph.cpp @@ -0,0 +1,768 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_sgraph.cpp + +Abstract: + + Self-contained unit tests for the sgraph string graph layer. + Tests snode classification, metadata computation, push/pop + backtracking, associativity-respecting hash table, compound + node construction, and snode navigation. + +--*/ + +#include "util/util.h" +#include "ast/euf/euf_sgraph.h" +#include "ast/reg_decl_plugins.h" +#include "ast/ast_pp.h" +#include "ast/arith_decl_plugin.h" +#include + +// test classification and metadata for basic string nodes: +// variables, empty strings, characters, units, and concats +static void test_sgraph_classify() { + std::cout << "test_sgraph_classify\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + // string variable + expr_ref x(m.mk_const("x", str_sort), m); + euf::snode* sx = sg.mk(x); + SASSERT(sx && sx->is_var()); + SASSERT(!sx->is_ground()); + SASSERT(sx->is_regex_free()); + SASSERT(!sx->is_nullable()); + SASSERT(sx->level() == 1); + SASSERT(sx->length() == 1); + SASSERT(sx->is_token()); + + // empty string + expr_ref empty(seq.str.mk_empty(str_sort), m); + euf::snode* se = sg.mk(empty); + SASSERT(se && se->is_empty()); + SASSERT(se->is_ground()); + SASSERT(se->is_nullable()); + SASSERT(se->level() == 0); + SASSERT(se->length() == 0); + SASSERT(!se->is_token()); + + // character unit with literal char + expr_ref ch(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch), m); + euf::snode* sca = sg.mk(unit_a); + SASSERT(sca && sca->is_char()); + SASSERT(sca->is_ground()); + SASSERT(!sca->is_nullable()); + SASSERT(sca->level() == 1); + SASSERT(sca->length() == 1); + SASSERT(sca->is_token()); + + // concat of two variables + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref xy(seq.str.mk_concat(x, y), m); + euf::snode* sxy = sg.mk(xy); + SASSERT(sxy && sxy->is_concat()); + SASSERT(!sxy->is_ground()); + SASSERT(sxy->is_regex_free()); + SASSERT(!sxy->is_nullable()); + SASSERT(sxy->level() == 2); + SASSERT(sxy->length() == 2); + SASSERT(sxy->num_args() == 2); + SASSERT(!sxy->is_token()); + + sg.display(std::cout); +} + +// test classification for regex nodes: +// star, union, intersection, complement, full_seq, full_char, fail, to_re, in_re +static void test_sgraph_regex() { + std::cout << "test_sgraph_regex\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + + // to_re + expr_ref to_re_x(seq.re.mk_to_re(x), m); + euf::snode* str = sg.mk(to_re_x); + SASSERT(str && str->is_to_re()); + SASSERT(!str->is_regex_free()); + SASSERT(!str->is_nullable()); // to_re(x) nullable iff x nullable, x is var so not nullable + SASSERT(str->num_args() == 1); + + // star + expr_ref star_x(seq.re.mk_star(to_re_x), m); + euf::snode* ss = sg.mk(star_x); + SASSERT(ss && ss->is_star()); + SASSERT(!ss->is_regex_free()); + SASSERT(ss->is_nullable()); // star is always nullable + SASSERT(ss->num_args() == 1); + + // full_seq (.*) + expr_ref full_seq(seq.re.mk_full_seq(str_sort), m); + euf::snode* sfs = sg.mk(full_seq); + SASSERT(sfs && sfs->is_full_seq()); + SASSERT(sfs->is_ground()); + SASSERT(sfs->is_nullable()); + + // full_char (.) + expr_ref full_char(seq.re.mk_full_char(str_sort), m); + euf::snode* sfc = sg.mk(full_char); + SASSERT(sfc && sfc->is_full_char()); + SASSERT(sfc->is_ground()); + SASSERT(!sfc->is_nullable()); + + // empty set, fail + sort_ref re_sort(seq.re.mk_re(str_sort), m); + expr_ref empty_set(seq.re.mk_empty(re_sort), m); + euf::snode* sfail = sg.mk(empty_set); + SASSERT(sfail && sfail->is_fail()); + SASSERT(!sfail->is_nullable()); + + // union: to_re(x) | star(to_re(x)), nullable because star is + expr_ref re_union(seq.re.mk_union(to_re_x, star_x), m); + euf::snode* su = sg.mk(re_union); + SASSERT(su && su->is_union()); + SASSERT(su->is_nullable()); // star_x is nullable + + // intersection: to_re(x) & star(to_re(x)), nullable only if both are + expr_ref re_inter(seq.re.mk_inter(to_re_x, star_x), m); + euf::snode* si = sg.mk(re_inter); + SASSERT(si && si->is_intersect()); + SASSERT(!si->is_nullable()); // to_re(x) is not nullable + + // complement of to_re(x): nullable because to_re(x) is not nullable + expr_ref re_comp(seq.re.mk_complement(to_re_x), m); + euf::snode* sc = sg.mk(re_comp); + SASSERT(sc && sc->is_complement()); + SASSERT(sc->is_nullable()); // complement of non-nullable is nullable + + // in_re + expr_ref in_re(seq.re.mk_in_re(x, star_x), m); + euf::snode* sir = sg.mk(in_re); + SASSERT(sir && sir->is_in_re()); + SASSERT(!sir->is_regex_free()); + + sg.display(std::cout); +} + +// test power node classification and metadata +static void test_sgraph_power() { + std::cout << "test_sgraph_power\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref n(arith.mk_int(3), m); + expr_ref xn(seq.str.mk_power(x, n), m); + + euf::snode* sp = sg.mk(xn); + SASSERT(sp && sp->is_power()); + SASSERT(!sp->is_ground()); // base x is not ground + SASSERT(sp->is_regex_free()); + SASSERT(!sp->is_nullable()); // base x is not nullable + SASSERT(sp->num_args() >= 1); + + sg.display(std::cout); +} + +// test push/pop backtracking: nodes created inside a scope +// are removed on pop, nodes before persist +static void test_sgraph_push_pop() { + std::cout << "test_sgraph_push_pop\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref z(m.mk_const("z", str_sort), m); + + // create x before any scope + sg.mk(x); + unsigned before = sg.num_nodes(); + SASSERT(sg.find(x)); + + sg.push(); + + // create y and concat(x,y) inside scope + expr_ref xy(seq.str.mk_concat(x, y), m); + sg.mk(xy); + SASSERT(sg.num_nodes() > before); + SASSERT(sg.find(y)); + SASSERT(sg.find(xy)); + + sg.pop(1); + + // x persists, y and xy removed + SASSERT(sg.find(x)); + SASSERT(!sg.find(y)); + SASSERT(!sg.find(xy)); + SASSERT(sg.num_nodes() == before); +} + +// test nested push/pop with multiple scopes +static void test_sgraph_nested_scopes() { + std::cout << "test_sgraph_nested_scopes\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + sg.mk(a); + unsigned n0 = sg.num_nodes(); + + sg.push(); + sg.mk(b); + unsigned n1 = sg.num_nodes(); + + sg.push(); + sg.mk(c); + unsigned n2 = sg.num_nodes(); + SASSERT(n2 > n1 && n1 > n0); + + // pop inner scope, c goes away + sg.pop(1); + SASSERT(sg.num_nodes() == n1); + SASSERT(sg.find(a)); + SASSERT(sg.find(b)); + SASSERT(!sg.find(c)); + + // pop outer scope, b goes away + sg.pop(1); + SASSERT(sg.num_nodes() == n0); + SASSERT(sg.find(a)); + SASSERT(!sg.find(b)); +} + +// test that find returns the same snode for the same expression +static void test_sgraph_find_idempotent() { + std::cout << "test_sgraph_find_idempotent\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + euf::snode* s1 = sg.mk(x); + euf::snode* s2 = sg.mk(x); // calling mk again returns same node + SASSERT(s1 == s2); + SASSERT(s1 == sg.find(x)); +} + +// test mk_concat: empty absorption, node construction via mk(concat_expr) +static void test_sgraph_mk_concat() { + std::cout << "test_sgraph_mk_concat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + + euf::snode* sx = sg.mk(x); + euf::snode* sy = sg.mk(y); + euf::snode* se = sg.mk(empty); + + // concat with empty yields the non-empty side at sgraph level + // (empty absorption is a property of the expression, checked via mk) + SASSERT(se && se->is_empty()); + + // normal concat via expression + expr_ref xy(seq.str.mk_concat(x, y), m); + euf::snode* sxy = sg.mk(xy); + SASSERT(sxy && sxy->is_concat()); + SASSERT(sxy->num_args() == 2); + SASSERT(sxy->arg(0) == sx); + SASSERT(sxy->arg(1) == sy); + + // calling mk again with same expr returns same node + euf::snode* sxy2 = sg.mk(xy); + SASSERT(sxy == sxy2); +} + +// test power node construction via mk(power_expr) +static void test_sgraph_mk_power() { + std::cout << "test_sgraph_mk_power\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref n(arith.mk_int(5), m); + expr_ref xn(seq.str.mk_power(x, n), m); + + euf::snode* sx = sg.mk(x); + euf::snode* sp = sg.mk(xn); + SASSERT(sp && sp->is_power()); + SASSERT(sp->num_args() == 2); + SASSERT(sp->arg(0) == sx); + + // calling mk again returns same node + euf::snode* sp2 = sg.mk(xn); + SASSERT(sp == sp2); +} + +// test snode first/last navigation on concat trees +static void test_sgraph_first_last() { + std::cout << "test_sgraph_first_last\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + euf::snode* sa = sg.mk(a); + euf::snode* sb = sg.mk(b); + euf::snode* sc = sg.mk(c); + + // concat(concat(a,b),c): first=a, last=c + expr_ref ab(seq.str.mk_concat(a, b), m); + expr_ref ab_c(seq.str.mk_concat(ab, c), m); + euf::snode* sab_c = sg.mk(ab_c); + SASSERT(sab_c->first() == sa); + SASSERT(sab_c->last() == sc); + + // concat(a,concat(b,c)): first=a, last=c + expr_ref bc(seq.str.mk_concat(b, c), m); + expr_ref a_bc(seq.str.mk_concat(a, bc), m); + euf::snode* sa_bc = sg.mk(a_bc); + SASSERT(sa_bc->first() == sa); + SASSERT(sa_bc->last() == sc); + + // single node: first and last are self + SASSERT(sa->first() == sa); + SASSERT(sa->last() == sa); +} + +// test concat metadata propagation: +// ground, regex_free, nullable, level, length +static void test_sgraph_concat_metadata() { + std::cout << "test_sgraph_concat_metadata\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + expr_ref ch(seq.str.mk_char('Z'), m); + expr_ref unit_z(seq.str.mk_unit(ch), m); + + euf::snode* sx = sg.mk(x); + euf::snode* se = sg.mk(empty); + euf::snode* sz = sg.mk(unit_z); + + // concat(x, unit('Z')): not ground (x is variable), regex_free, not nullable + expr_ref xz(seq.str.mk_concat(x, unit_z), m); + euf::snode* sxz = sg.mk(xz); + SASSERT(!sxz->is_ground()); + SASSERT(sxz->is_regex_free()); + SASSERT(!sxz->is_nullable()); + SASSERT(sxz->length() == 2); + SASSERT(sxz->level() == 2); + + // concat(empty, empty): nullable (both empty) + expr_ref empty2(seq.str.mk_concat(empty, empty), m); + euf::snode* see = sg.mk(empty2); + SASSERT(see->is_nullable()); + SASSERT(see->is_ground()); + SASSERT(see->length() == 0); + + // deep chain: concat(concat(x,x),concat(x,x)) has level 3, length 4 + expr_ref xx(seq.str.mk_concat(x, x), m); + expr_ref xxxx(seq.str.mk_concat(xx, xx), m); + euf::snode* sxxxx = sg.mk(xxxx); + SASSERT(sxxxx->level() == 3); + SASSERT(sxxxx->length() == 4); +} + +// test display does not crash +static void test_sgraph_display() { + std::cout << "test_sgraph_display\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref xy(seq.str.mk_concat(x, y), m); + sg.mk(xy); + + std::ostringstream oss; + sg.display(oss); + std::string out = oss.str(); + SASSERT(out.find("var") != std::string::npos); + SASSERT(out.find("concat") != std::string::npos); + std::cout << out; +} + +// test sgraph factory methods: mk_var, mk_char, mk_empty, mk_concat +static void test_sgraph_factory() { + std::cout << "test_sgraph_factory\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + // mk_var + euf::snode* x = sg.mk_var(symbol("x")); + SASSERT(x && x->is_var()); + SASSERT(!x->is_ground()); + SASSERT(x->length() == 1); + + // mk_char + euf::snode* a = sg.mk_char('A'); + SASSERT(a && a->is_char()); + SASSERT(a->is_ground()); + SASSERT(a->length() == 1); + + // mk_empty + euf::snode* e = sg.mk_empty(); + SASSERT(e && e->is_empty()); + SASSERT(e->is_nullable()); + SASSERT(e->length() == 0); + + // mk_concat with empty absorption + euf::snode* xe = sg.mk_concat(x, e); + SASSERT(xe == x); + euf::snode* ex = sg.mk_concat(e, x); + SASSERT(ex == x); + + // mk_concat of two variables + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* xy = sg.mk_concat(x, y); + SASSERT(xy && xy->is_concat()); + SASSERT(xy->length() == 2); + SASSERT(xy->arg(0) == x); + SASSERT(xy->arg(1) == y); + + // mk_concat of multiple characters + euf::snode* b = sg.mk_char('B'); + euf::snode* c = sg.mk_char('C'); + euf::snode* abc = sg.mk_concat(sg.mk_concat(a, b), c); + SASSERT(abc->length() == 3); + SASSERT(abc->is_ground()); + SASSERT(abc->first() == a); + SASSERT(abc->last() == c); +} + +// test snode::at() and snode::collect_tokens() +static void test_sgraph_indexing() { + std::cout << "test_sgraph_indexing\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* c = sg.mk_char('C'); + euf::snode* x = sg.mk_var(symbol("x")); + + // build concat(concat(a, b), concat(c, x)) => [A, B, C, x] + euf::snode* ab = sg.mk_concat(a, b); + euf::snode* cx = sg.mk_concat(c, x); + euf::snode* abcx = sg.mk_concat(ab, cx); + + SASSERT(abcx->length() == 4); + + // test at() + SASSERT(abcx->at(0) == a); + SASSERT(abcx->at(1) == b); + SASSERT(abcx->at(2) == c); + SASSERT(abcx->at(3) == x); + SASSERT(abcx->at(4) == nullptr); // out of bounds + + // test collect_tokens() + euf::snode_vector tokens; + abcx->collect_tokens(tokens); + SASSERT(tokens.size() == 4); + SASSERT(tokens[0] == a); + SASSERT(tokens[1] == b); + SASSERT(tokens[2] == c); + SASSERT(tokens[3] == x); + + // single token: at(0) is self + SASSERT(a->at(0) == a); + SASSERT(a->at(1) == nullptr); + + // empty: at(0) is nullptr + euf::snode* e = sg.mk_empty(); + SASSERT(e->at(0) == nullptr); + euf::snode_vector empty_tokens; + e->collect_tokens(empty_tokens); + SASSERT(empty_tokens.empty()); +} + +// test sgraph drop operations +static void test_sgraph_drop() { + std::cout << "test_sgraph_drop\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* c = sg.mk_char('C'); + euf::snode* d = sg.mk_char('D'); + + // build concat(concat(a, b), concat(c, d)) => [A, B, C, D] + euf::snode* ab = sg.mk_concat(a, b); + euf::snode* cd = sg.mk_concat(c, d); + euf::snode* abcd = sg.mk_concat(ab, cd); + + SASSERT(abcd->length() == 4); + + // drop_first: [A, B, C, D] => [B, C, D] + euf::snode* bcd = sg.drop_first(abcd); + SASSERT(bcd->length() == 3); + SASSERT(bcd->first() == b); + SASSERT(bcd->last() == d); + + // drop_last: [A, B, C, D] => [A, B, C] + euf::snode* abc = sg.drop_last(abcd); + SASSERT(abc->length() == 3); + SASSERT(abc->first() == a); + SASSERT(abc->last() == c); + + // drop_left(2): [A, B, C, D] => [C, D] + euf::snode* cd2 = sg.drop_left(abcd, 2); + SASSERT(cd2->length() == 2); + SASSERT(cd2->first() == c); + + // drop_right(2): [A, B, C, D] => [A, B] + euf::snode* ab2 = sg.drop_right(abcd, 2); + SASSERT(ab2->length() == 2); + SASSERT(ab2->last() == b); + + // drop all: [A, B, C, D] => empty + euf::snode* empty = sg.drop_left(abcd, 4); + SASSERT(empty->is_empty()); + + // drop from single token: [A] => empty + euf::snode* e = sg.drop_first(a); + SASSERT(e->is_empty()); + + // drop from empty: no change + euf::snode* ee = sg.drop_first(sg.mk_empty()); + SASSERT(ee->is_empty()); +} + +// test sgraph substitution +static void test_sgraph_subst() { + std::cout << "test_sgraph_subst\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + + // concat(x, concat(a, x)) with x -> b gives concat(b, concat(a, b)) + euf::snode* ax = sg.mk_concat(a, x); + euf::snode* xax = sg.mk_concat(x, ax); + SASSERT(xax->length() == 3); + + euf::snode* result = sg.subst(xax, x, b); + SASSERT(result->length() == 3); + SASSERT(result->first() == b); + SASSERT(result->last() == b); + SASSERT(result->at(1) == a); // middle is still 'A' + + // substitution of non-occurring variable is identity + euf::snode* same = sg.subst(xax, y, b); + SASSERT(same == xax); + + // substitution of variable with empty + euf::snode* e = sg.mk_empty(); + euf::snode* collapsed = sg.subst(xax, x, e); + SASSERT(collapsed->length() == 1); // just 'a' remains + SASSERT(collapsed == a); +} + +// test complex concatenation creation, merging and simplification +static void test_sgraph_complex_concat() { + std::cout << "test_sgraph_complex_concat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + // build a string "HELLO" = concat(H, concat(E, concat(L, concat(L, O)))) + euf::snode* h = sg.mk_char('H'); + euf::snode* e = sg.mk_char('E'); + euf::snode* l = sg.mk_char('L'); + euf::snode* o = sg.mk_char('O'); + + euf::snode* lo = sg.mk_concat(l, o); + euf::snode* llo = sg.mk_concat(l, lo); + euf::snode* ello = sg.mk_concat(e, llo); + euf::snode* hello = sg.mk_concat(h, ello); + + SASSERT(hello->length() == 5); + SASSERT(hello->is_ground()); + SASSERT(hello->first() == h); + SASSERT(hello->last() == o); + + // index into "HELLO" + SASSERT(hello->at(0) == h); + SASSERT(hello->at(1) == e); + SASSERT(hello->at(2) == l); + SASSERT(hello->at(3) == l); + SASSERT(hello->at(4) == o); + + // drop first 2 from "HELLO" => "LLO" + euf::snode* llo2 = sg.drop_left(hello, 2); + SASSERT(llo2->length() == 3); + SASSERT(llo2->first() == l); + + // drop last 3 from "HELLO" => "HE" + euf::snode* he = sg.drop_right(hello, 3); + SASSERT(he->length() == 2); + SASSERT(he->first() == h); + SASSERT(he->last() == e); + + // mixed variables and characters: concat(x, "AB", y) + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* ab = sg.mk_concat(a, b); + euf::snode* xab = sg.mk_concat(x, ab); + euf::snode* xaby = sg.mk_concat(xab, y); + + SASSERT(xaby->length() == 4); + SASSERT(!xaby->is_ground()); + SASSERT(xaby->at(0) == x); + SASSERT(xaby->at(1) == a); + SASSERT(xaby->at(2) == b); + SASSERT(xaby->at(3) == y); +} + +// test Brzozowski derivative computation +static void test_sgraph_brzozowski() { + std::cout << "test_sgraph_brzozowski\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + // derivative of re.star(to_re("a")) w.r.t. 'a' + // d/da (a*) = a* + expr_ref ch_a(seq.str.mk_char('a'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref star_a(seq.re.mk_star(to_re_a), m); + + euf::snode* s_star_a = sg.mk(star_a); + euf::snode* s_unit_a = sg.mk(unit_a); + + euf::snode* deriv = sg.brzozowski_deriv(s_star_a, s_unit_a); + SASSERT(deriv != nullptr); + std::cout << " d/da(a*) kind: " << (int)deriv->kind() << "\n"; + + // derivative of re.empty w.r.t. 'a' should be re.empty + sort_ref re_sort(seq.re.mk_re(str_sort), m); + expr_ref re_empty(seq.re.mk_empty(re_sort), m); + euf::snode* s_empty = sg.mk(re_empty); + euf::snode* deriv_empty = sg.brzozowski_deriv(s_empty, s_unit_a); + SASSERT(deriv_empty != nullptr); + SASSERT(deriv_empty->is_fail()); // derivative of empty set is empty set + std::cout << " d/da(empty) kind: " << (int)deriv_empty->kind() << "\n"; + + sg.display(std::cout); +} + +// test minterm computation +static void test_sgraph_minterms() { + std::cout << "test_sgraph_minterms\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + // simple regex with no character predicates: re.all (.*) + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* s_re_all = sg.mk(re_all); + + euf::snode_vector minterms; + sg.compute_minterms(s_re_all, minterms); + // no predicates => single minterm (full_char) + SASSERT(minterms.size() == 1); + std::cout << " re.all minterms: " << minterms.size() << "\n"; +} + +void tst_euf_sgraph() { + test_sgraph_classify(); + test_sgraph_regex(); + test_sgraph_power(); + test_sgraph_push_pop(); + test_sgraph_nested_scopes(); + test_sgraph_find_idempotent(); + test_sgraph_mk_concat(); + test_sgraph_mk_power(); + test_sgraph_first_last(); + test_sgraph_concat_metadata(); + test_sgraph_display(); + test_sgraph_factory(); + test_sgraph_indexing(); + test_sgraph_drop(); + test_sgraph_subst(); + test_sgraph_complex_concat(); + test_sgraph_brzozowski(); + test_sgraph_minterms(); +} diff --git a/src/test/main.cpp b/src/test/main.cpp index c5d55ebe1..734dfe4b7 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -281,9 +281,12 @@ int main(int argc, char ** argv) { TST(distribution); TST(euf_bv_plugin); TST(euf_arith_plugin); + TST(euf_sgraph); + TST(euf_seq_plugin); TST(sls_test); TST(scoped_vector); TST(sls_seq_plugin); + TST(seq_nielsen); TST(ho_matcher); TST(finite_set); TST(finite_set_rewriter); diff --git a/src/test/seq_nielsen.cpp b/src/test/seq_nielsen.cpp new file mode 100644 index 000000000..f994f5a70 --- /dev/null +++ b/src/test/seq_nielsen.cpp @@ -0,0 +1,479 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_nielsen.cpp + +Abstract: + + Unit tests for the Nielsen graph framework (seq_nielsen.h). + Tests constraint types, node/edge construction, substitution + application, and graph population. + +--*/ + +#include "util/util.h" +#include "ast/euf/euf_egraph.h" +#include "ast/euf/euf_sgraph.h" +#include "smt/seq/seq_nielsen.h" +#include "ast/reg_decl_plugins.h" +#include "ast/ast_pp.h" +#include + +// test dep_tracker basic operations +static void test_dep_tracker() { + std::cout << "test_dep_tracker\n"; + + // empty tracker + seq::dep_tracker d0; + SASSERT(d0.empty()); + + // tracker with one bit set + seq::dep_tracker d1(8, 3); + SASSERT(!d1.empty()); + + // tracker with another bit + seq::dep_tracker d2(8, 5); + SASSERT(!d2.empty()); + + // merge + seq::dep_tracker d3 = d1; + d3.merge(d2); + SASSERT(!d3.empty()); + SASSERT(d3.is_superset(d1)); + SASSERT(d3.is_superset(d2)); + SASSERT(!d1.is_superset(d2)); + + // equality + seq::dep_tracker d4(8, 3); + SASSERT(d1 == d4); + SASSERT(d1 != d2); +} + +// test str_eq constraint creation and operations +static void test_str_eq() { + std::cout << "test_str_eq\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* e = sg.mk_empty(); + + seq::dep_tracker dep(4, 0); + + // basic equality + seq::str_eq eq1(x, y, dep); + SASSERT(!eq1.is_trivial()); + SASSERT(eq1.contains_var(x)); + SASSERT(eq1.contains_var(y)); + SASSERT(!eq1.contains_var(a)); + + // trivial equality: same node + seq::str_eq eq2(x, x, dep); + SASSERT(eq2.is_trivial()); + + // trivial equality: both empty + seq::str_eq eq3(e, e, dep); + SASSERT(eq3.is_trivial()); + + // sorting: lower id first + seq::str_eq eq4(y, x, dep); + eq4.sort(); + SASSERT(eq4.m_lhs->id() <= eq4.m_rhs->id()); + + // contains_var with concat + euf::snode* xa = sg.mk_concat(x, a); + seq::str_eq eq5(xa, y, dep); + SASSERT(eq5.contains_var(x)); + SASSERT(eq5.contains_var(y)); + SASSERT(!eq5.contains_var(e)); +} + +// test str_mem constraint creation and operations +static void test_str_mem() { + std::cout << "test_str_mem\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* e = sg.mk_empty(); + + // create a regex: re.all (.*) + expr_ref star_fc(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex = sg.mk(star_fc); + + seq::dep_tracker dep(4, 1); + seq::str_mem mem(x, regex, e, 0, dep); + + // x in regex is primitive (x is a single variable) + SASSERT(mem.is_primitive()); + SASSERT(mem.contains_var(x)); + + // concatenation is not primitive + euf::snode* a = sg.mk_char('A'); + euf::snode* xa = sg.mk_concat(x, a); + seq::str_mem mem2(xa, regex, e, 1, dep); + SASSERT(!mem2.is_primitive()); + SASSERT(mem2.contains_var(x)); +} + +// test nielsen_subst +static void test_nielsen_subst() { + std::cout << "test_nielsen_subst\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* e = sg.mk_empty(); + + seq::dep_tracker dep; + + // eliminating substitution: x -> A (x does not appear in A) + seq::nielsen_subst s1(x, a, dep); + SASSERT(s1.is_eliminating()); + + // eliminating substitution: x -> empty + seq::nielsen_subst s2(x, e, dep); + SASSERT(s2.is_eliminating()); + + // non-eliminating substitution: x -> concat(A, x) + euf::snode* ax = sg.mk_concat(a, x); + seq::nielsen_subst s3(x, ax, dep); + SASSERT(!s3.is_eliminating()); + + // eliminating substitution: x -> y (x not in y) + seq::nielsen_subst s4(x, y, dep); + SASSERT(s4.is_eliminating()); +} + +// test nielsen_node creation and constraint management +static void test_nielsen_node() { + std::cout << "test_nielsen_node\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + + seq::nielsen_node* root = ng.mk_node(); + SASSERT(root->id() == 0); + SASSERT(root->str_eqs().empty()); + SASSERT(root->str_mems().empty()); + SASSERT(root->is_progress()); + SASSERT(root->reason() == seq::backtrack_reason::unevaluated); + + // add constraints + seq::dep_tracker dep; + root->add_str_eq(seq::str_eq(x, y, dep)); + root->add_str_eq(seq::str_eq(sg.mk_concat(x, a), sg.mk_concat(a, y), dep)); + SASSERT(root->str_eqs().size() == 2); + + // regex membership + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex = sg.mk(re_all); + euf::snode* empty = sg.mk_empty(); + root->add_str_mem(seq::str_mem(x, regex, empty, 0, dep)); + SASSERT(root->str_mems().size() == 1); + + // clone from parent + seq::nielsen_node* child = ng.mk_node(); + child->clone_from(*root); + SASSERT(child->str_eqs().size() == 2); + SASSERT(child->str_mems().size() == 1); + SASSERT(child->id() == 1); +} + +// test nielsen_edge creation +static void test_nielsen_edge() { + std::cout << "test_nielsen_edge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + + // create parent and child nodes + seq::nielsen_node* parent = ng.mk_node(); + seq::dep_tracker dep; + parent->add_str_eq(seq::str_eq(x, y, dep)); + + seq::nielsen_node* child = ng.mk_child(parent); + + // create edge with substitution x -> A + seq::nielsen_edge* edge = ng.mk_edge(parent, child, true); + edge->add_subst(seq::nielsen_subst(x, a, dep)); + + SASSERT(edge->src() == parent); + SASSERT(edge->tgt() == child); + SASSERT(edge->is_progress()); + SASSERT(edge->subst().size() == 1); + SASSERT(parent->outgoing().size() == 1); + SASSERT(parent->outgoing()[0] == edge); +} + +// test nielsen_graph population from external constraints +static void test_nielsen_graph_populate() { + std::cout << "test_nielsen_graph_populate\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + + // add string equality: x = y + ng.add_str_eq(x, y); + SASSERT(ng.root() != nullptr); + SASSERT(ng.root()->str_eqs().size() == 1); + SASSERT(ng.num_nodes() == 1); + + // add regex membership: x in .* + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex = sg.mk(re_all); + ng.add_str_mem(x, regex); + SASSERT(ng.root()->str_mems().size() == 1); + SASSERT(ng.root()->str_mems()[0].m_id == 0); + + // add another equality: concat(x, A) = concat(A, y) + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* ay = sg.mk_concat(a, y); + ng.add_str_eq(xa, ay); + SASSERT(ng.root()->str_eqs().size() == 2); + + // display for visual inspection + ng.display(std::cout); +} + +// test substitution application on nielsen_node +static void test_nielsen_subst_apply() { + std::cout << "test_nielsen_subst_apply\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* e = sg.mk_empty(); + + // create node with constraint: concat(x, A) = concat(B, y) + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep; + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* by = sg.mk_concat(b, y); + node->add_str_eq(seq::str_eq(xa, by, dep)); + + // apply substitution x -> empty + seq::nielsen_subst s(x, e, dep); + node->apply_subst(sg, s); + + // after x -> empty: lhs should be just A, rhs still concat(B, y) + SASSERT(node->str_eqs().size() == 1); + auto const& eq = node->str_eqs()[0]; + // a should remain (after x replaced with empty, concat(empty, A) = A) + std::cout << " lhs len=" << eq.m_lhs->length() << " rhs len=" << eq.m_rhs->length() << "\n"; +} + +// test Nielsen graph reset +static void test_nielsen_graph_reset() { + std::cout << "test_nielsen_graph_reset\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + ng.add_str_eq(x, y); + SASSERT(ng.num_nodes() == 1); + SASSERT(ng.root() != nullptr); + + ng.reset(); + SASSERT(ng.num_nodes() == 0); + SASSERT(ng.root() == nullptr); +} + +// test constructing a basic Nielsen expansion tree +// x = Ay: split into x -> eps (progress) or x -> Ax (non-progress) +static void test_nielsen_expansion() { + std::cout << "test_nielsen_expansion\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* ay = sg.mk_concat(a, y); + + // root: x = Ay + ng.add_str_eq(x, ay); + seq::nielsen_node* root = ng.root(); + SASSERT(root->str_eqs().size() == 1); + + seq::dep_tracker dep; + + // branch 1: x -> eps (eliminating, progress) + euf::snode* e = sg.mk_empty(); + seq::nielsen_node* child1 = ng.mk_child(root); + seq::nielsen_subst s1(x, e, dep); + child1->apply_subst(sg, s1); + seq::nielsen_edge* edge1 = ng.mk_edge(root, child1, true); + edge1->add_subst(s1); + + // branch 2: x -> Ax (non-eliminating, non-progress) + euf::snode* ax = sg.mk_concat(a, x); + seq::nielsen_node* child2 = ng.mk_child(root); + seq::nielsen_subst s2(x, ax, dep); + child2->apply_subst(sg, s2); + seq::nielsen_edge* edge2 = ng.mk_edge(root, child2, false); + edge2->add_subst(s2); + + SASSERT(ng.num_nodes() == 3); + SASSERT(root->outgoing().size() == 2); + SASSERT(edge1->is_progress()); + SASSERT(!edge2->is_progress()); + + // verify substitution effects on child1: eps = Ay becomes empty = Ay + SASSERT(child1->str_eqs().size() == 1); + + ng.display(std::cout); +} + +// test run index management +static void test_run_idx() { + std::cout << "test_run_idx\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + SASSERT(ng.run_idx() == 0); + + ng.inc_run_idx(); + SASSERT(ng.run_idx() == 1); + + ng.inc_run_idx(); + SASSERT(ng.run_idx() == 2); +} + +// test multiple regex memberships +static void test_multiple_memberships() { + std::cout << "test_multiple_memberships\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + + // x in .* + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex1 = sg.mk(re_all); + ng.add_str_mem(x, regex1); + + // x in re.union(to_re("A"), to_re("B")) + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref to_re_b(seq.re.mk_to_re(unit_b), m); + expr_ref re_union(seq.re.mk_union(to_re_a, to_re_b), m); + euf::snode* regex2 = sg.mk(re_union); + ng.add_str_mem(x, regex2); + + SASSERT(ng.root() != nullptr); + SASSERT(ng.root()->str_mems().size() == 2); + SASSERT(ng.root()->str_mems()[0].m_id == 0); + SASSERT(ng.root()->str_mems()[1].m_id == 1); + + ng.display(std::cout); +} + +// test backedge setting (cycle detection support) +static void test_backedge() { + std::cout << "test_backedge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + ng.add_str_eq(x, y); + seq::nielsen_node* root = ng.root(); + seq::nielsen_node* child = ng.mk_child(root); + + // set backedge from child to root (cycle) + child->set_backedge(root); + SASSERT(child->backedge() == root); + SASSERT(root->backedge() == nullptr); +} + +void tst_seq_nielsen() { + test_dep_tracker(); + test_str_eq(); + test_str_mem(); + test_nielsen_subst(); + test_nielsen_node(); + test_nielsen_edge(); + test_nielsen_graph_populate(); + test_nielsen_subst_apply(); + test_nielsen_graph_reset(); + test_nielsen_expansion(); + test_run_idx(); + test_multiple_memberships(); + test_backedge(); +}