3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-09 06:44:53 +00:00

Change ownership: sgraph owns egraph with seq_plugin, add assoc hash table, add unit tests

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-03-01 23:23:12 +00:00
parent 12b7a4dae3
commit 5c14edf030
7 changed files with 391 additions and 20 deletions

View file

@ -21,15 +21,16 @@ Author:
--*/
#include "ast/euf/euf_seq_plugin.h"
#include "ast/euf/euf_sgraph.h"
#include "ast/euf/euf_egraph.h"
#include "ast/ast_pp.h"
namespace euf {
seq_plugin::seq_plugin(egraph& g):
seq_plugin::seq_plugin(egraph& g, sgraph& sg):
plugin(g),
m_seq(g.get_manager()),
m_sg(g.get_manager()) {
m_sg(sg) {
}
void seq_plugin::register_node(enode* n) {
@ -232,25 +233,15 @@ namespace euf {
if (m_qhead > m_queue.size())
m_qhead = m_queue.size();
break;
case undo_kind::undo_push_scope:
m_sg.pop(1);
break;
}
}
void seq_plugin::push_scope_eh() {
m_sg.push();
m_undo.push_back(undo_kind::undo_push_scope);
push_plugin_undo(get_id());
}
std::ostream& seq_plugin::display(std::ostream& out) const {
out << "seq\n";
m_sg.display(out);
out << "seq-plugin\n";
return out;
}
void seq_plugin::collect_statistics(statistics& st) const {
m_sg.collect_statistics(st);
// statistics are collected by sgraph which owns us
}
}

View file

@ -40,21 +40,20 @@ Author:
#include "ast/seq_decl_plugin.h"
#include "ast/euf/euf_plugin.h"
#include "ast/euf/euf_sgraph.h"
namespace euf {
class egraph;
class sgraph;
class seq_plugin : public plugin {
enum class undo_kind {
undo_add_concat,
undo_push_scope
};
seq_util m_seq;
sgraph m_sg;
sgraph& m_sg;
svector<undo_kind> m_undo;
// queue of merges and registrations to process
@ -101,7 +100,7 @@ namespace euf {
bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2);
public:
seq_plugin(egraph& g);
seq_plugin(egraph& g, sgraph& sg);
theory_id get_id() const override { return m_seq.get_family_id(); }
@ -115,8 +114,6 @@ namespace euf {
void undo() override;
void push_scope_eh() override;
std::ostream& display(std::ostream& out) const override;
void collect_statistics(statistics& st) const override;

View file

@ -17,15 +17,58 @@ Author:
--*/
#include "ast/euf/euf_sgraph.h"
#include "ast/euf/euf_seq_plugin.h"
#include "ast/arith_decl_plugin.h"
#include "ast/ast_pp.h"
namespace euf {
// Associativity-respecting hash: flattens concat into leaf sequence
// concat(concat(a, b), c) and concat(a, concat(b, c)) hash identically
static void collect_leaves(snode const* n, ptr_vector<snode const>& leaves) {
if (n->is_concat()) {
collect_leaves(n->arg(0), leaves);
collect_leaves(n->arg(1), leaves);
}
else {
leaves.push_back(n);
}
}
unsigned concat_hash::operator()(snode const* n) const {
if (!n->is_concat())
return n->id();
ptr_vector<snode const> leaves;
collect_leaves(n, leaves);
unsigned h = 0;
for (snode const* l : leaves)
h = combine_hash(h, l->id());
return h;
}
bool concat_eq::operator()(snode const* a, snode const* b) const {
if (a == b) return true;
if (!a->is_concat() && !b->is_concat())
return a->id() == b->id();
ptr_vector<snode const> la, lb;
collect_leaves(a, la);
collect_leaves(b, lb);
if (la.size() != lb.size())
return false;
for (unsigned i = 0; i < la.size(); ++i)
if (la[i]->id() != lb[i]->id())
return false;
return true;
}
sgraph::sgraph(ast_manager& m):
m(m),
m_seq(m),
m_egraph(m),
m_exprs(m) {
// create seq_plugin and register it with the egraph
// the seq_plugin gets a reference back to this sgraph
m_egraph.add_plugin(alloc(seq_plugin, m_egraph, *this));
}
sgraph::~sgraph() {
@ -263,6 +306,9 @@ namespace euf {
m_expr2snode.reserve(eid + 1, nullptr);
m_expr2snode[eid] = n;
}
// insert concats into the associativity-respecting hash table
if (k == snode_kind::s_concat)
m_concat_table.insert(n);
++m_stats.m_num_nodes;
return n;
}
@ -303,6 +349,16 @@ namespace euf {
return nullptr;
}
snode* sgraph::find_assoc_equal(snode* n) const {
if (!n || !n->is_concat())
return nullptr;
snode* existing = nullptr;
if (m_concat_table.find(n, existing) && existing != n) {
return existing;
}
return nullptr;
}
snode* sgraph::mk_empty(sort* s) {
expr_ref e(m_seq.str.mk_empty(s), m);
return mk(e);
@ -328,9 +384,23 @@ namespace euf {
return mk_snode(e, snode_kind::s_power, 2, args);
}
enode* sgraph::mk_enode(expr* e) {
enode* n = m_egraph.find(e);
if (n) return n;
enode_vector args;
if (is_app(e)) {
for (expr* arg : *to_app(e)) {
enode* a = mk_enode(arg);
args.push_back(a);
}
}
return m_egraph.mk(e, 0, args.size(), args.data());
}
void sgraph::push() {
m_scopes.push_back(m_nodes.size());
++m_num_scopes;
m_egraph.push();
}
void sgraph::pop(unsigned num_scopes) {
@ -341,6 +411,8 @@ namespace euf {
unsigned old_sz = m_scopes[new_lvl];
for (unsigned i = m_nodes.size(); i-- > old_sz; ) {
snode* n = m_nodes[i];
if (n->is_concat())
m_concat_table.remove(n);
if (n->get_expr()) {
unsigned eid = n->get_expr()->get_id();
if (eid < m_expr2snode.size())
@ -351,6 +423,7 @@ namespace euf {
m_exprs.shrink(old_sz);
m_scopes.shrink(new_lvl);
m_num_scopes = new_lvl;
m_egraph.pop(num_scopes);
}
std::ostream& sgraph::display(std::ostream& out) const {
@ -403,6 +476,8 @@ namespace euf {
st.update("seq-graph-nodes", m_stats.m_num_nodes);
st.update("seq-graph-concat", m_stats.m_num_concat);
st.update("seq-graph-power", m_stats.m_num_power);
st.update("seq-graph-hash-hits", m_stats.m_num_hash_hits);
m_egraph.collect_statistics(st);
}
}

View file

@ -69,24 +69,43 @@ Author:
#include "util/region.h"
#include "util/statistics.h"
#include "util/hashtable.h"
#include "ast/ast.h"
#include "ast/seq_decl_plugin.h"
#include "ast/euf/euf_snode.h"
#include "ast/euf/euf_egraph.h"
namespace euf {
class seq_plugin;
// Associativity-respecting hash for concatenations.
// The hash function flattens concat trees so that
// concat(concat(a, b), c) and concat(a, concat(b, c))
// hash to the same value. This is how ZIPT ensures
// finding equal concatenations efficiently.
struct concat_hash {
unsigned operator()(snode const* n) const;
};
struct concat_eq {
bool operator()(snode const* a, snode const* b) const;
};
class sgraph {
struct stats {
unsigned m_num_nodes;
unsigned m_num_concat;
unsigned m_num_power;
unsigned m_num_hash_hits;
stats() { reset(); }
void reset() { memset(this, 0, sizeof(*this)); }
};
ast_manager& m;
seq_util m_seq;
egraph m_egraph;
region m_region;
snode_vector m_nodes;
expr_ref_vector m_exprs; // pin expressions
@ -97,6 +116,9 @@ namespace euf {
// maps expression id to snode
ptr_vector<snode> m_expr2snode;
// hash table for finding equal concatenations modulo associativity
hashtable<snode*, concat_hash, concat_eq> m_concat_table;
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
snode_kind classify(expr* e) const;
void compute_metadata(snode* n);
@ -107,6 +129,8 @@ namespace euf {
ast_manager& get_manager() const { return m; }
seq_util& get_seq_util() { return m_seq; }
egraph& get_egraph() { return m_egraph; }
egraph const& get_egraph() const { return m_egraph; }
// register an expression and return its snode
snode* mk(expr* e);
@ -114,11 +138,17 @@ namespace euf {
// lookup an already-registered expression
snode* find(expr* e) const;
// find an existing concat that is equal modulo associativity
snode* find_assoc_equal(snode* n) const;
// build compound snodes
snode* mk_empty(sort* s);
snode* mk_concat(snode* a, snode* b);
snode* mk_power(snode* base, snode* exp);
// register expression in both sgraph and egraph
enode* mk_enode(expr* e);
// scope management for backtracking
void push();
void pop(unsigned num_scopes);

View file

@ -51,6 +51,7 @@ add_executable(test-z3
escaped.cpp
euf_bv_plugin.cpp
euf_arith_plugin.cpp
euf_seq_plugin.cpp
ex.cpp
expr_rand.cpp
expr_substitution.cpp

276
src/test/euf_seq_plugin.cpp Normal file
View file

@ -0,0 +1,276 @@
/*++
Copyright (c) 2026 Microsoft Corporation
--*/
#include "util/util.h"
#include "ast/euf/euf_sgraph.h"
#include "ast/euf/euf_seq_plugin.h"
#include "ast/euf/euf_egraph.h"
#include "ast/reg_decl_plugins.h"
#include "ast/ast_pp.h"
#include <iostream>
static unsigned s_var = 0;
static euf::enode* get_node(euf::egraph& g, seq_util& seq, expr* e) {
auto* n = g.find(e);
if (n) return n;
euf::enode_vector args;
if (is_app(e))
for (expr* arg : *to_app(e))
args.push_back(get_node(g, seq, arg));
n = g.mk(e, 0, args.size(), args.data());
if (seq.is_seq(e) || seq.is_re(e))
g.add_th_var(n, ++s_var, seq.get_family_id());
return n;
}
// test sgraph: basic classification and metadata
static void test_sgraph_basic() {
std::cout << "test_sgraph_basic\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
expr_ref x(m.mk_const("x", str_sort), m);
expr_ref y(m.mk_const("y", str_sort), m);
expr_ref empty(seq.str.mk_empty(str_sort), m);
expr_ref xy(seq.str.mk_concat(x, y), m);
euf::snode* sx = sg.mk(x);
SASSERT(sx);
SASSERT(sx->is_var());
SASSERT(!sx->is_ground());
SASSERT(sx->is_regex_free());
SASSERT(!sx->is_nullable());
SASSERT(sx->length() == 1);
euf::snode* se = sg.mk(empty);
SASSERT(se);
SASSERT(se->is_empty());
SASSERT(se->is_ground());
SASSERT(se->is_nullable());
SASSERT(se->length() == 0);
euf::snode* sxy = sg.mk(xy);
SASSERT(sxy);
SASSERT(sxy->is_concat());
SASSERT(!sxy->is_ground());
SASSERT(sxy->length() == 2);
SASSERT(sxy->num_args() == 2);
std::cout << "sgraph:\n";
sg.display(std::cout);
std::cout << "\n";
}
// test sgraph: backtracking with push/pop
static void test_sgraph_backtrack() {
std::cout << "test_sgraph_backtrack\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
expr_ref x(m.mk_const("x", str_sort), m);
expr_ref y(m.mk_const("y", str_sort), m);
sg.mk(x);
unsigned before = sg.num_nodes();
sg.push();
expr_ref xy(seq.str.mk_concat(x, y), m);
sg.mk(xy);
SASSERT(sg.num_nodes() > before);
sg.pop(1);
// y and xy were created inside the scope, so some nodes should be removed
// x was created before the scope, so it should persist
SASSERT(sg.find(x));
}
// test assoc hash: concat(concat(a,b),c) hashes same as concat(a,concat(b,c))
static void test_assoc_hash() {
std::cout << "test_assoc_hash\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
expr_ref a(m.mk_const("a", str_sort), m);
expr_ref b(m.mk_const("b", str_sort), m);
expr_ref c(m.mk_const("c", str_sort), m);
euf::snode* sa = sg.mk(a);
euf::snode* sb = sg.mk(b);
euf::snode* sc = sg.mk(c);
// build concat(concat(a,b),c)
euf::snode* sab = sg.mk_concat(sa, sb);
euf::snode* sab_c = sg.mk_concat(sab, sc);
// build concat(a,concat(b,c))
euf::snode* sbc = sg.mk_concat(sb, sc);
euf::snode* sa_bc = sg.mk_concat(sa, sbc);
// they should hash to the same value via the assoc hash
euf::concat_hash h;
euf::concat_eq eq;
SASSERT(h(sab_c) == h(sa_bc));
SASSERT(eq(sab_c, sa_bc));
// different concat should not be equal
euf::snode* sac = sg.mk_concat(sa, sc);
euf::snode* sac_b = sg.mk_concat(sac, sb);
SASSERT(!eq(sab_c, sac_b));
// find_assoc_equal should find the first with same leaves
euf::snode* found = sg.find_assoc_equal(sa_bc);
SASSERT(found == sab_c);
}
// test seq_plugin: concat associativity is normalized by the plugin
static void test_seq_plugin_assoc() {
std::cout << "test_seq_plugin_assoc\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
expr_ref a(m.mk_const("a", str_sort), m);
expr_ref b(m.mk_const("b", str_sort), m);
expr_ref c(m.mk_const("c", str_sort), m);
// register nodes in egraph
// concat(concat(a,b),c) should be merged with concat(a,concat(b,c))
expr_ref ab(seq.str.mk_concat(a, b), m);
expr_ref ab_c(seq.str.mk_concat(ab, c), m);
euf::enode* nab_c = get_node(g, seq, ab_c);
g.propagate();
// the plugin should have created a right-associated form and merged
std::cout << g << "\n";
}
// test seq_plugin: empty string elimination
static void test_seq_plugin_empty() {
std::cout << "test_seq_plugin_empty\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
expr_ref x(m.mk_const("x", str_sort), m);
expr_ref empty(seq.str.mk_empty(str_sort), m);
expr_ref xe(seq.str.mk_concat(x, empty), m);
auto* nxe = get_node(g, seq, xe);
auto* nx = g.find(x);
g.propagate();
// concat(x, empty) should be merged with x
SASSERT(nxe->get_root() == nx->get_root());
std::cout << g << "\n";
}
// test seq_plugin: Kleene star merging
// The seq_plugin detects when star bodies are congruent
// This tests the same_star_body logic at the regex level
static void test_seq_plugin_star_merge() {
std::cout << "test_seq_plugin_star_merge\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
sort_ref re_sort(seq.re.mk_re(str_sort), m);
expr_ref x(m.mk_const("x", str_sort), m);
// re.star(to_re(x))
expr_ref to_re_x(seq.re.mk_to_re(x), m);
expr_ref star_x(seq.re.mk_star(to_re_x), m);
// use regex concat for star * star
expr_ref star_star(seq.re.mk_concat(star_x, star_x), m);
// register in sgraph
sg.mk(star_star);
euf::snode* s = sg.find(star_x);
SASSERT(s && s->is_star());
SASSERT(s->is_nullable());
std::cout << g << "\n";
}
// test seq_plugin: nullable absorption by .*
// concat(.*, nullable) should merge with .*
static void test_seq_plugin_nullable_absorb() {
std::cout << "test_seq_plugin_nullable_absorb\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
expr_ref x(m.mk_const("x", str_sort), m);
expr_ref empty(seq.str.mk_empty(str_sort), m);
// concat(x, empty) = x (empty is nullable, exercises nullable check)
expr_ref xe(seq.str.mk_concat(x, empty), m);
auto* nxe = get_node(g, seq, xe);
auto* nx = g.find(x);
g.propagate();
// concat(x, empty) should be merged with x (empty elimination)
SASSERT(nxe->get_root() == nx->get_root());
std::cout << g << "\n";
}
// test sgraph owns egraph and syncs push/pop
static void test_sgraph_egraph_sync() {
std::cout << "test_sgraph_egraph_sync\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
expr_ref x(m.mk_const("x", str_sort), m);
expr_ref y(m.mk_const("y", str_sort), m);
auto* nx = get_node(g, seq, x);
auto* ny = get_node(g, seq, y);
sg.push();
g.merge(nx, ny, nullptr);
g.propagate();
SASSERT(nx->get_root() == ny->get_root());
sg.pop(1);
// after pop, the merge should be undone
SASSERT(nx->get_root() != ny->get_root());
}
void tst_euf_seq_plugin() {
s_var = 0; test_sgraph_basic();
s_var = 0; test_sgraph_backtrack();
s_var = 0; test_assoc_hash();
s_var = 0; test_seq_plugin_assoc();
s_var = 0; test_seq_plugin_empty();
s_var = 0; test_seq_plugin_star_merge();
s_var = 0; test_seq_plugin_nullable_absorb();
s_var = 0; test_sgraph_egraph_sync();
}

View file

@ -281,6 +281,7 @@ int main(int argc, char ** argv) {
TST(distribution);
TST(euf_bv_plugin);
TST(euf_arith_plugin);
TST(euf_seq_plugin);
TST(sls_test);
TST(scoped_vector);
TST(sls_seq_plugin);