3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 05:44:51 +00:00

Update sgraph and seq_plugin: egraph ref, hash matrix, subst cache, sgraph pointer

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-03-02 21:14:34 +00:00
parent 1885753690
commit cfea2db8bf
8 changed files with 174 additions and 42 deletions

1
.gitignore vendored
View file

@ -117,3 +117,4 @@ genaisrc/genblogpost.genai.mts
bazel-*
# Local issue tracking
.beads
build/

View file

@ -22,6 +22,7 @@ Author:
#include "ast/euf/euf_seq_plugin.h"
#include "ast/euf/euf_egraph.h"
#include "ast/euf/euf_sgraph.h"
#include "ast/ast_pp.h"
namespace euf {
@ -46,6 +47,12 @@ namespace euf {
}
unsigned enode_concat_hash::operator()(enode* n) const {
sgraph* sg = sg_ptr ? *sg_ptr : nullptr;
if (sg) {
snode* sn = sg->find(n->get_expr());
if (sn && sn->has_cached_hash())
return sn->assoc_hash();
}
if (!is_any_concat(n, seq))
return n->get_id();
enode_vector leaves;
@ -71,13 +78,22 @@ namespace euf {
return true;
}
seq_plugin::seq_plugin(egraph& g):
seq_plugin::seq_plugin(egraph& g, sgraph* sg):
plugin(g),
m_seq(g.get_manager()),
m_rewriter(g.get_manager()),
m_concat_hash(m_seq),
m_sg(sg),
m_sg_owned(sg == nullptr),
m_concat_hash(m_seq, &m_sg),
m_concat_eq(m_seq),
m_concat_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_concat_hash, m_concat_eq) {
if (!m_sg)
m_sg = alloc(sgraph, g.get_manager(), g, false);
}
seq_plugin::~seq_plugin() {
if (m_sg_owned && m_sg)
dealloc(m_sg);
}
void seq_plugin::register_node(enode* n) {

View file

@ -47,14 +47,17 @@ Author:
namespace euf {
class egraph;
class sgraph;
// Associativity-respecting hash for enode concat trees.
// Flattens concat(concat(a,b),c) and concat(a,concat(b,c))
// to the same leaf sequence [a,b,c] before hashing.
// Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT).
// When an sgraph is available, uses cached snode hash matrices.
struct enode_concat_hash {
seq_util const& seq;
enode_concat_hash(seq_util const& s) : seq(s) {}
sgraph* const* sg_ptr; // pointer to the sgraph* member in seq_plugin
enode_concat_hash(seq_util const& s, sgraph* const* sg = nullptr) : seq(s), sg_ptr(sg) {}
unsigned operator()(enode* n) const;
};
@ -75,6 +78,8 @@ namespace euf {
seq_util m_seq;
seq_rewriter m_rewriter;
sgraph* m_sg = nullptr; // sgraph (may or may not be owned)
bool m_sg_owned = false; // whether we own the sgraph
svector<undo_kind> m_undo;
// queue of merges and registrations to process
@ -150,7 +155,8 @@ namespace euf {
bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2);
public:
seq_plugin(egraph& g);
seq_plugin(egraph& g, sgraph* sg = nullptr);
~seq_plugin() override;
theory_id get_id() const override { return m_seq.get_family_id(); }

View file

@ -23,15 +23,35 @@ Author:
namespace euf {
sgraph::sgraph(ast_manager& m):
// substitution cache stored on snode for ZIPT-style optimization
struct snode_subst_cache {
struct entry {
unsigned var_id;
unsigned repl_id;
snode* result;
};
svector<entry> m_entries;
snode* find(unsigned var_id, unsigned repl_id) const {
for (auto const& e : m_entries)
if (e.var_id == var_id && e.repl_id == repl_id)
return e.result;
return nullptr;
}
void insert(unsigned var_id, unsigned repl_id, snode* result) {
m_entries.push_back({var_id, repl_id, result});
}
};
sgraph::sgraph(ast_manager& m, egraph& eg, bool add_plugin):
m(m),
m_seq(m),
m_rewriter(m),
m_egraph(m),
m_exprs(m),
m_str_sort(m_seq.str.mk_string_sort(), m) {
m_egraph(eg),
m_str_sort(m_seq.str.mk_string_sort(), m),
m_add_plugin(add_plugin) {
// create seq_plugin and register it with the egraph
m_egraph.add_plugin(alloc(seq_plugin, m_egraph));
if (add_plugin)
m_egraph.add_plugin(alloc(seq_plugin, m_egraph, this));
// register on_make callback so sgraph creates snodes for new enodes
std::function<void(enode*)> on_make = [this](enode* n) {
expr* e = n->get_expr();
@ -42,6 +62,8 @@ namespace euf {
}
sgraph::~sgraph() {
for (auto* c : m_subst_caches)
dealloc(c);
}
snode_kind sgraph::classify(expr* e) const {
@ -265,16 +287,49 @@ namespace euf {
}
}
static const unsigned HASH_BASE = 31;
void sgraph::compute_hash_matrix(snode* n) {
if (n->is_empty()) {
// identity matrix: concat with empty is identity
n->m_hash_matrix[0][0] = 1;
n->m_hash_matrix[0][1] = 0;
n->m_hash_matrix[1][0] = 0;
n->m_hash_matrix[1][1] = 1;
}
else if (n->is_concat()) {
snode* l = n->arg(0);
snode* r = n->arg(1);
if (l->has_cached_hash() && r->has_cached_hash()) {
// 2x2 matrix multiplication: M(L) * M(R)
n->m_hash_matrix[0][0] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][0];
n->m_hash_matrix[0][1] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][1];
n->m_hash_matrix[1][0] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][0];
n->m_hash_matrix[1][1] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][1];
}
}
else {
// leaf/token: [[HASH_BASE, value], [0, 1]]
unsigned v = n->get_expr() ? n->get_expr()->get_id() + 1 : n->id() + 1;
n->m_hash_matrix[0][0] = HASH_BASE;
n->m_hash_matrix[0][1] = v;
n->m_hash_matrix[1][0] = 0;
n->m_hash_matrix[1][1] = 1;
}
}
snode* sgraph::mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args) {
unsigned id = m_nodes.size();
snode* n = snode::mk(m_region, e, k, id, num_args, args);
compute_metadata(n);
compute_hash_matrix(n);
m_nodes.push_back(n);
m_exprs.push_back(e);
if (e) {
unsigned eid = e->get_id();
m_expr2snode.reserve(eid + 1, nullptr);
m_expr2snode[eid] = n;
// pin expression via egraph (the egraph has an expr trail)
mk_enode(e);
}
++m_stats.m_num_nodes;
return n;
@ -350,7 +405,6 @@ namespace euf {
}
}
m_nodes.shrink(old_sz);
m_exprs.shrink(old_sz);
m_scopes.shrink(new_lvl);
m_num_scopes = new_lvl;
m_egraph.pop(num_scopes);
@ -418,9 +472,23 @@ namespace euf {
return replacement;
if (n->is_empty() || n->is_char())
return n;
if (n->is_concat())
return mk_concat(subst(n->arg(0), var, replacement),
subst(n->arg(1), var, replacement));
if (n->is_concat()) {
// check substitution cache (ZIPT-style optimization)
if (n->m_subst_cache) {
snode* cached = n->m_subst_cache->find(var->id(), replacement->id());
if (cached)
return cached;
}
snode* result = mk_concat(subst(n->arg(0), var, replacement),
subst(n->arg(1), var, replacement));
// cache the result
if (!n->m_subst_cache) {
n->m_subst_cache = alloc(snode_subst_cache);
m_subst_caches.push_back(n->m_subst_cache);
}
n->m_subst_cache->insert(var->id(), replacement->id(), result);
return result;
}
// for non-concat compound nodes (power, star, etc.), no substitution into children
return n;
}

View file

@ -82,14 +82,17 @@ namespace euf {
ast_manager& m;
seq_util m_seq;
seq_rewriter m_rewriter;
egraph m_egraph;
egraph& m_egraph;
region m_region;
snode_vector m_nodes;
expr_ref_vector m_exprs; // pin expressions
sort_ref m_str_sort; // cached string sort
unsigned_vector m_scopes;
unsigned m_num_scopes = 0;
stats m_stats;
bool m_add_plugin; // whether sgraph created the seq_plugin
// tracks allocated subst caches for cleanup
ptr_vector<snode_subst_cache> m_subst_caches;
// maps expression id to snode
ptr_vector<snode> m_expr2snode;
@ -97,10 +100,11 @@ namespace euf {
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
snode_kind classify(expr* e) const;
void compute_metadata(snode* n);
void compute_hash_matrix(snode* n);
void collect_re_predicates(snode* re, expr_ref_vector& preds);
public:
sgraph(ast_manager& m);
sgraph(ast_manager& m, egraph& eg, bool add_plugin = true);
~sgraph();
ast_manager& get_manager() const { return m; }

View file

@ -32,6 +32,7 @@ namespace euf {
class sgraph;
class snode;
struct snode_subst_cache;
typedef ptr_vector<snode> snode_vector;
@ -68,6 +69,13 @@ namespace euf {
unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons)
unsigned m_length = 0; // token count, number of leaf tokens in the tree
// hash matrix for associativity-respecting hashing (2x2 polynomial hash matrix)
// all zeros means not cached, non-zero means cached
unsigned m_hash_matrix[2][2] = {{0,0},{0,0}};
// substitution cache (lazy-initialized, owned by sgraph)
snode_subst_cache* m_subst_cache = nullptr;
snode* m_args[0]; // variable-length array, allocated via get_snode_size(num_args)
friend class sgraph;
@ -101,6 +109,10 @@ namespace euf {
unsigned level() const { return m_level; }
unsigned length() const { return m_length; }
// associativity-respecting hash: cached if the 2x2 matrix is non-zero
bool has_cached_hash() const { return m_hash_matrix[0][0] != 0; }
unsigned assoc_hash() const { return m_hash_matrix[0][1]; }
bool is_empty() const { return m_kind == snode_kind::s_empty; }
bool is_char() const { return m_kind == snode_kind::s_char; }
bool is_var() const { return m_kind == snode_kind::s_var; }

View file

@ -31,7 +31,8 @@ static void test_sgraph_basic() {
std::cout << "test_sgraph_basic\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -72,7 +73,8 @@ static void test_sgraph_backtrack() {
std::cout << "test_sgraph_backtrack\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -99,7 +101,8 @@ static void test_seq_plugin_assoc() {
std::cout << "test_seq_plugin_assoc\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -125,7 +128,8 @@ static void test_seq_plugin_empty() {
std::cout << "test_seq_plugin_empty\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -150,7 +154,8 @@ static void test_seq_plugin_star_merge() {
std::cout << "test_seq_plugin_star_merge\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -178,7 +183,8 @@ static void test_seq_plugin_nullable_absorb() {
std::cout << "test_seq_plugin_nullable_absorb\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -202,7 +208,8 @@ static void test_sgraph_egraph_sync() {
std::cout << "test_sgraph_egraph_sync\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
euf::egraph& g = sg.get_egraph();
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);

View file

@ -27,7 +27,8 @@ static void test_sgraph_classify() {
std::cout << "test_sgraph_classify\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -85,7 +86,8 @@ static void test_sgraph_regex() {
std::cout << "test_sgraph_regex\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -160,7 +162,8 @@ static void test_sgraph_power() {
std::cout << "test_sgraph_power\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
arith_util arith(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -185,7 +188,8 @@ static void test_sgraph_push_pop() {
std::cout << "test_sgraph_push_pop\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -221,7 +225,8 @@ static void test_sgraph_nested_scopes() {
std::cout << "test_sgraph_nested_scopes\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -260,7 +265,8 @@ static void test_sgraph_find_idempotent() {
std::cout << "test_sgraph_find_idempotent\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -276,7 +282,8 @@ static void test_sgraph_mk_concat() {
std::cout << "test_sgraph_mk_concat\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -310,7 +317,8 @@ static void test_sgraph_mk_power() {
std::cout << "test_sgraph_mk_power\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
arith_util arith(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -335,7 +343,8 @@ static void test_sgraph_first_last() {
std::cout << "test_sgraph_first_last\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -372,7 +381,8 @@ static void test_sgraph_concat_metadata() {
std::cout << "test_sgraph_concat_metadata\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -414,7 +424,8 @@ static void test_sgraph_display() {
std::cout << "test_sgraph_display\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -436,7 +447,8 @@ static void test_sgraph_factory() {
std::cout << "test_sgraph_factory\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
// mk_var
euf::snode* x = sg.mk_var(symbol("x"));
@ -485,7 +497,8 @@ static void test_sgraph_indexing() {
std::cout << "test_sgraph_indexing\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
euf::snode* a = sg.mk_char('A');
euf::snode* b = sg.mk_char('B');
@ -532,7 +545,8 @@ static void test_sgraph_drop() {
std::cout << "test_sgraph_drop\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
euf::snode* a = sg.mk_char('A');
euf::snode* b = sg.mk_char('B');
@ -586,7 +600,8 @@ static void test_sgraph_subst() {
std::cout << "test_sgraph_subst\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
@ -620,7 +635,8 @@ static void test_sgraph_complex_concat() {
std::cout << "test_sgraph_complex_concat\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
// build a string "HELLO" = concat(H, concat(E, concat(L, concat(L, O))))
euf::snode* h = sg.mk_char('H');
@ -678,7 +694,8 @@ static void test_sgraph_brzozowski() {
std::cout << "test_sgraph_brzozowski\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);
@ -713,7 +730,8 @@ static void test_sgraph_minterms() {
std::cout << "test_sgraph_minterms\n";
ast_manager m;
reg_decl_plugins(m);
euf::sgraph sg(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
sort_ref str_sort(seq.str.mk_string_sort(), m);