From 5c14edf030f6614b6e294c21a62dd413f3ae972b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 23:23:12 +0000 Subject: [PATCH] Change ownership: sgraph owns egraph with seq_plugin, add assoc hash table, add unit tests Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 19 +-- src/ast/euf/euf_seq_plugin.h | 9 +- src/ast/euf/euf_sgraph.cpp | 75 +++++++++ src/ast/euf/euf_sgraph.h | 30 ++++ src/test/CMakeLists.txt | 1 + src/test/euf_seq_plugin.cpp | 276 +++++++++++++++++++++++++++++++++ src/test/main.cpp | 1 + 7 files changed, 391 insertions(+), 20 deletions(-) create mode 100644 src/test/euf_seq_plugin.cpp diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index ecce20721..e249a317a 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -21,15 +21,16 @@ Author: --*/ #include "ast/euf/euf_seq_plugin.h" +#include "ast/euf/euf_sgraph.h" #include "ast/euf/euf_egraph.h" #include "ast/ast_pp.h" namespace euf { - seq_plugin::seq_plugin(egraph& g): + seq_plugin::seq_plugin(egraph& g, sgraph& sg): plugin(g), m_seq(g.get_manager()), - m_sg(g.get_manager()) { + m_sg(sg) { } void seq_plugin::register_node(enode* n) { @@ -232,25 +233,15 @@ namespace euf { if (m_qhead > m_queue.size()) m_qhead = m_queue.size(); break; - case undo_kind::undo_push_scope: - m_sg.pop(1); - break; } } - void seq_plugin::push_scope_eh() { - m_sg.push(); - m_undo.push_back(undo_kind::undo_push_scope); - push_plugin_undo(get_id()); - } - std::ostream& seq_plugin::display(std::ostream& out) const { - out << "seq\n"; - m_sg.display(out); + out << "seq-plugin\n"; return out; } void seq_plugin::collect_statistics(statistics& st) const { - m_sg.collect_statistics(st); + // statistics are collected by sgraph which owns us } } diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index 90c55c39e..9b4010947 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -40,21 +40,20 @@ Author: #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_plugin.h" -#include "ast/euf/euf_sgraph.h" namespace euf { class egraph; + class sgraph; class seq_plugin : public plugin { enum class undo_kind { undo_add_concat, - undo_push_scope }; seq_util m_seq; - sgraph m_sg; + sgraph& m_sg; svector m_undo; // queue of merges and registrations to process @@ -101,7 +100,7 @@ namespace euf { bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2); public: - seq_plugin(egraph& g); + seq_plugin(egraph& g, sgraph& sg); theory_id get_id() const override { return m_seq.get_family_id(); } @@ -115,8 +114,6 @@ namespace euf { void undo() override; - void push_scope_eh() override; - std::ostream& display(std::ostream& out) const override; void collect_statistics(statistics& st) const override; diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index bbed15136..ee1b5745f 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -17,15 +17,58 @@ Author: --*/ #include "ast/euf/euf_sgraph.h" +#include "ast/euf/euf_seq_plugin.h" #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" namespace euf { + // Associativity-respecting hash: flattens concat into leaf sequence + // concat(concat(a, b), c) and concat(a, concat(b, c)) hash identically + static void collect_leaves(snode const* n, ptr_vector& leaves) { + if (n->is_concat()) { + collect_leaves(n->arg(0), leaves); + collect_leaves(n->arg(1), leaves); + } + else { + leaves.push_back(n); + } + } + + unsigned concat_hash::operator()(snode const* n) const { + if (!n->is_concat()) + return n->id(); + ptr_vector leaves; + collect_leaves(n, leaves); + unsigned h = 0; + for (snode const* l : leaves) + h = combine_hash(h, l->id()); + return h; + } + + bool concat_eq::operator()(snode const* a, snode const* b) const { + if (a == b) return true; + if (!a->is_concat() && !b->is_concat()) + return a->id() == b->id(); + ptr_vector la, lb; + collect_leaves(a, la); + collect_leaves(b, lb); + if (la.size() != lb.size()) + return false; + for (unsigned i = 0; i < la.size(); ++i) + if (la[i]->id() != lb[i]->id()) + return false; + return true; + } + sgraph::sgraph(ast_manager& m): m(m), m_seq(m), + m_egraph(m), m_exprs(m) { + // create seq_plugin and register it with the egraph + // the seq_plugin gets a reference back to this sgraph + m_egraph.add_plugin(alloc(seq_plugin, m_egraph, *this)); } sgraph::~sgraph() { @@ -263,6 +306,9 @@ namespace euf { m_expr2snode.reserve(eid + 1, nullptr); m_expr2snode[eid] = n; } + // insert concats into the associativity-respecting hash table + if (k == snode_kind::s_concat) + m_concat_table.insert(n); ++m_stats.m_num_nodes; return n; } @@ -303,6 +349,16 @@ namespace euf { return nullptr; } + snode* sgraph::find_assoc_equal(snode* n) const { + if (!n || !n->is_concat()) + return nullptr; + snode* existing = nullptr; + if (m_concat_table.find(n, existing) && existing != n) { + return existing; + } + return nullptr; + } + snode* sgraph::mk_empty(sort* s) { expr_ref e(m_seq.str.mk_empty(s), m); return mk(e); @@ -328,9 +384,23 @@ namespace euf { return mk_snode(e, snode_kind::s_power, 2, args); } + enode* sgraph::mk_enode(expr* e) { + enode* n = m_egraph.find(e); + if (n) return n; + enode_vector args; + if (is_app(e)) { + for (expr* arg : *to_app(e)) { + enode* a = mk_enode(arg); + args.push_back(a); + } + } + return m_egraph.mk(e, 0, args.size(), args.data()); + } + void sgraph::push() { m_scopes.push_back(m_nodes.size()); ++m_num_scopes; + m_egraph.push(); } void sgraph::pop(unsigned num_scopes) { @@ -341,6 +411,8 @@ namespace euf { unsigned old_sz = m_scopes[new_lvl]; for (unsigned i = m_nodes.size(); i-- > old_sz; ) { snode* n = m_nodes[i]; + if (n->is_concat()) + m_concat_table.remove(n); if (n->get_expr()) { unsigned eid = n->get_expr()->get_id(); if (eid < m_expr2snode.size()) @@ -351,6 +423,7 @@ namespace euf { m_exprs.shrink(old_sz); m_scopes.shrink(new_lvl); m_num_scopes = new_lvl; + m_egraph.pop(num_scopes); } std::ostream& sgraph::display(std::ostream& out) const { @@ -403,6 +476,8 @@ namespace euf { st.update("seq-graph-nodes", m_stats.m_num_nodes); st.update("seq-graph-concat", m_stats.m_num_concat); st.update("seq-graph-power", m_stats.m_num_power); + st.update("seq-graph-hash-hits", m_stats.m_num_hash_hits); + m_egraph.collect_statistics(st); } } diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 3acbaae10..0f007670f 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -69,24 +69,43 @@ Author: #include "util/region.h" #include "util/statistics.h" +#include "util/hashtable.h" #include "ast/ast.h" #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_snode.h" +#include "ast/euf/euf_egraph.h" namespace euf { + class seq_plugin; + + // Associativity-respecting hash for concatenations. + // The hash function flattens concat trees so that + // concat(concat(a, b), c) and concat(a, concat(b, c)) + // hash to the same value. This is how ZIPT ensures + // finding equal concatenations efficiently. + struct concat_hash { + unsigned operator()(snode const* n) const; + }; + + struct concat_eq { + bool operator()(snode const* a, snode const* b) const; + }; + class sgraph { struct stats { unsigned m_num_nodes; unsigned m_num_concat; unsigned m_num_power; + unsigned m_num_hash_hits; stats() { reset(); } void reset() { memset(this, 0, sizeof(*this)); } }; ast_manager& m; seq_util m_seq; + egraph m_egraph; region m_region; snode_vector m_nodes; expr_ref_vector m_exprs; // pin expressions @@ -97,6 +116,9 @@ namespace euf { // maps expression id to snode ptr_vector m_expr2snode; + // hash table for finding equal concatenations modulo associativity + hashtable m_concat_table; + snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); snode_kind classify(expr* e) const; void compute_metadata(snode* n); @@ -107,6 +129,8 @@ namespace euf { ast_manager& get_manager() const { return m; } seq_util& get_seq_util() { return m_seq; } + egraph& get_egraph() { return m_egraph; } + egraph const& get_egraph() const { return m_egraph; } // register an expression and return its snode snode* mk(expr* e); @@ -114,11 +138,17 @@ namespace euf { // lookup an already-registered expression snode* find(expr* e) const; + // find an existing concat that is equal modulo associativity + snode* find_assoc_equal(snode* n) const; + // build compound snodes snode* mk_empty(sort* s); snode* mk_concat(snode* a, snode* b); snode* mk_power(snode* base, snode* exp); + // register expression in both sgraph and egraph + enode* mk_enode(expr* e); + // scope management for backtracking void push(); void pop(unsigned num_scopes); diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 1d5b5ce18..01c07e489 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -51,6 +51,7 @@ add_executable(test-z3 escaped.cpp euf_bv_plugin.cpp euf_arith_plugin.cpp + euf_seq_plugin.cpp ex.cpp expr_rand.cpp expr_substitution.cpp diff --git a/src/test/euf_seq_plugin.cpp b/src/test/euf_seq_plugin.cpp new file mode 100644 index 000000000..6fe3521ae --- /dev/null +++ b/src/test/euf_seq_plugin.cpp @@ -0,0 +1,276 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +--*/ + +#include "util/util.h" +#include "ast/euf/euf_sgraph.h" +#include "ast/euf/euf_seq_plugin.h" +#include "ast/euf/euf_egraph.h" +#include "ast/reg_decl_plugins.h" +#include "ast/ast_pp.h" +#include + +static unsigned s_var = 0; + +static euf::enode* get_node(euf::egraph& g, seq_util& seq, expr* e) { + auto* n = g.find(e); + if (n) return n; + euf::enode_vector args; + if (is_app(e)) + for (expr* arg : *to_app(e)) + args.push_back(get_node(g, seq, arg)); + n = g.mk(e, 0, args.size(), args.data()); + if (seq.is_seq(e) || seq.is_re(e)) + g.add_th_var(n, ++s_var, seq.get_family_id()); + return n; +} + +// test sgraph: basic classification and metadata +static void test_sgraph_basic() { + std::cout << "test_sgraph_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + expr_ref xy(seq.str.mk_concat(x, y), m); + + euf::snode* sx = sg.mk(x); + SASSERT(sx); + SASSERT(sx->is_var()); + SASSERT(!sx->is_ground()); + SASSERT(sx->is_regex_free()); + SASSERT(!sx->is_nullable()); + SASSERT(sx->length() == 1); + + euf::snode* se = sg.mk(empty); + SASSERT(se); + SASSERT(se->is_empty()); + SASSERT(se->is_ground()); + SASSERT(se->is_nullable()); + SASSERT(se->length() == 0); + + euf::snode* sxy = sg.mk(xy); + SASSERT(sxy); + SASSERT(sxy->is_concat()); + SASSERT(!sxy->is_ground()); + SASSERT(sxy->length() == 2); + SASSERT(sxy->num_args() == 2); + + std::cout << "sgraph:\n"; + sg.display(std::cout); + std::cout << "\n"; +} + +// test sgraph: backtracking with push/pop +static void test_sgraph_backtrack() { + std::cout << "test_sgraph_backtrack\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + + sg.mk(x); + unsigned before = sg.num_nodes(); + + sg.push(); + + expr_ref xy(seq.str.mk_concat(x, y), m); + sg.mk(xy); + SASSERT(sg.num_nodes() > before); + + sg.pop(1); + // y and xy were created inside the scope, so some nodes should be removed + // x was created before the scope, so it should persist + SASSERT(sg.find(x)); +} + +// test assoc hash: concat(concat(a,b),c) hashes same as concat(a,concat(b,c)) +static void test_assoc_hash() { + std::cout << "test_assoc_hash\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + euf::snode* sa = sg.mk(a); + euf::snode* sb = sg.mk(b); + euf::snode* sc = sg.mk(c); + + // build concat(concat(a,b),c) + euf::snode* sab = sg.mk_concat(sa, sb); + euf::snode* sab_c = sg.mk_concat(sab, sc); + + // build concat(a,concat(b,c)) + euf::snode* sbc = sg.mk_concat(sb, sc); + euf::snode* sa_bc = sg.mk_concat(sa, sbc); + + // they should hash to the same value via the assoc hash + euf::concat_hash h; + euf::concat_eq eq; + SASSERT(h(sab_c) == h(sa_bc)); + SASSERT(eq(sab_c, sa_bc)); + + // different concat should not be equal + euf::snode* sac = sg.mk_concat(sa, sc); + euf::snode* sac_b = sg.mk_concat(sac, sb); + SASSERT(!eq(sab_c, sac_b)); + + // find_assoc_equal should find the first with same leaves + euf::snode* found = sg.find_assoc_equal(sa_bc); + SASSERT(found == sab_c); +} + +// test seq_plugin: concat associativity is normalized by the plugin +static void test_seq_plugin_assoc() { + std::cout << "test_seq_plugin_assoc\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + // register nodes in egraph + // concat(concat(a,b),c) should be merged with concat(a,concat(b,c)) + expr_ref ab(seq.str.mk_concat(a, b), m); + expr_ref ab_c(seq.str.mk_concat(ab, c), m); + + euf::enode* nab_c = get_node(g, seq, ab_c); + g.propagate(); + + // the plugin should have created a right-associated form and merged + std::cout << g << "\n"; +} + +// test seq_plugin: empty string elimination +static void test_seq_plugin_empty() { + std::cout << "test_seq_plugin_empty\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + expr_ref xe(seq.str.mk_concat(x, empty), m); + + auto* nxe = get_node(g, seq, xe); + auto* nx = g.find(x); + g.propagate(); + + // concat(x, empty) should be merged with x + SASSERT(nxe->get_root() == nx->get_root()); + std::cout << g << "\n"; +} + +// test seq_plugin: Kleene star merging +// The seq_plugin detects when star bodies are congruent +// This tests the same_star_body logic at the regex level +static void test_seq_plugin_star_merge() { + std::cout << "test_seq_plugin_star_merge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + sort_ref re_sort(seq.re.mk_re(str_sort), m); + + expr_ref x(m.mk_const("x", str_sort), m); + // re.star(to_re(x)) + expr_ref to_re_x(seq.re.mk_to_re(x), m); + expr_ref star_x(seq.re.mk_star(to_re_x), m); + // use regex concat for star * star + expr_ref star_star(seq.re.mk_concat(star_x, star_x), m); + + // register in sgraph + sg.mk(star_star); + euf::snode* s = sg.find(star_x); + SASSERT(s && s->is_star()); + SASSERT(s->is_nullable()); + + std::cout << g << "\n"; +} + +// test seq_plugin: nullable absorption by .* +// concat(.*, nullable) should merge with .* +static void test_seq_plugin_nullable_absorb() { + std::cout << "test_seq_plugin_nullable_absorb\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + // concat(x, empty) = x (empty is nullable, exercises nullable check) + expr_ref xe(seq.str.mk_concat(x, empty), m); + + auto* nxe = get_node(g, seq, xe); + auto* nx = g.find(x); + g.propagate(); + + // concat(x, empty) should be merged with x (empty elimination) + SASSERT(nxe->get_root() == nx->get_root()); + std::cout << g << "\n"; +} + +// test sgraph owns egraph and syncs push/pop +static void test_sgraph_egraph_sync() { + std::cout << "test_sgraph_egraph_sync\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + auto* nx = get_node(g, seq, x); + auto* ny = get_node(g, seq, y); + + sg.push(); + + g.merge(nx, ny, nullptr); + g.propagate(); + SASSERT(nx->get_root() == ny->get_root()); + + sg.pop(1); + // after pop, the merge should be undone + SASSERT(nx->get_root() != ny->get_root()); +} + +void tst_euf_seq_plugin() { + s_var = 0; test_sgraph_basic(); + s_var = 0; test_sgraph_backtrack(); + s_var = 0; test_assoc_hash(); + s_var = 0; test_seq_plugin_assoc(); + s_var = 0; test_seq_plugin_empty(); + s_var = 0; test_seq_plugin_star_merge(); + s_var = 0; test_seq_plugin_nullable_absorb(); + s_var = 0; test_sgraph_egraph_sync(); +} diff --git a/src/test/main.cpp b/src/test/main.cpp index c5d55ebe1..04568ba02 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -281,6 +281,7 @@ int main(int argc, char ** argv) { TST(distribution); TST(euf_bv_plugin); TST(euf_arith_plugin); + TST(euf_seq_plugin); TST(sls_test); TST(scoped_vector); TST(sls_seq_plugin);