From 5ca200ba2c7e18f62a0c158a463bb102c4f8dbb6 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Mon, 2 Mar 2026 12:32:32 -0800 Subject: [PATCH 1/7] Update ZIPT link in euf_sgraph.h documentation --- src/ast/euf/euf_sgraph.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 2eedb3c75..9ef7b86d1 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -10,7 +10,7 @@ Abstract: Sequence/string graph layer Encapsulates string and regex expressions for the string solver. - Implements the string graph layer from ZIPT (https://github.com/CEisenhofer/ZIPT). + Implements the string graph layer from ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT). The sgraph maps Z3 sequence/regex AST expressions to snode structures organized as binary concatenation trees with metadata, and owns an egraph with a seq_plugin for congruence closure. From 1885753690c4ebf47022d8c65159d52e518ed37a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:39:17 +0000 Subject: [PATCH 2/7] Initial plan From cfea2db8bfe878d21d4dfbbb45331514624d4738 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:14:34 +0000 Subject: [PATCH 3/7] Update sgraph and seq_plugin: egraph ref, hash matrix, subst cache, sgraph pointer Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- .gitignore | 1 + src/ast/euf/euf_seq_plugin.cpp | 20 +++++++- src/ast/euf/euf_seq_plugin.h | 10 +++- src/ast/euf/euf_sgraph.cpp | 88 ++++++++++++++++++++++++++++++---- src/ast/euf/euf_sgraph.h | 10 ++-- src/ast/euf/euf_snode.h | 12 +++++ src/test/euf_seq_plugin.cpp | 21 +++++--- src/test/euf_sgraph.cpp | 54 ++++++++++++++------- 8 files changed, 174 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index ee5df58ff..bf7762077 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,4 @@ genaisrc/genblogpost.genai.mts bazel-* # Local issue tracking .beads +build/ diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index da366eb1f..2a711a765 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -22,6 +22,7 @@ Author: #include "ast/euf/euf_seq_plugin.h" #include "ast/euf/euf_egraph.h" +#include "ast/euf/euf_sgraph.h" #include "ast/ast_pp.h" namespace euf { @@ -46,6 +47,12 @@ namespace euf { } unsigned enode_concat_hash::operator()(enode* n) const { + sgraph* sg = sg_ptr ? *sg_ptr : nullptr; + if (sg) { + snode* sn = sg->find(n->get_expr()); + if (sn && sn->has_cached_hash()) + return sn->assoc_hash(); + } if (!is_any_concat(n, seq)) return n->get_id(); enode_vector leaves; @@ -71,13 +78,22 @@ namespace euf { return true; } - seq_plugin::seq_plugin(egraph& g): + seq_plugin::seq_plugin(egraph& g, sgraph* sg): plugin(g), m_seq(g.get_manager()), m_rewriter(g.get_manager()), - m_concat_hash(m_seq), + m_sg(sg), + m_sg_owned(sg == nullptr), + m_concat_hash(m_seq, &m_sg), m_concat_eq(m_seq), m_concat_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_concat_hash, m_concat_eq) { + if (!m_sg) + m_sg = alloc(sgraph, g.get_manager(), g, false); + } + + seq_plugin::~seq_plugin() { + if (m_sg_owned && m_sg) + dealloc(m_sg); } void seq_plugin::register_node(enode* n) { diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index 8fc435892..b6f2637ba 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -47,14 +47,17 @@ Author: namespace euf { class egraph; + class sgraph; // Associativity-respecting hash for enode concat trees. // Flattens concat(concat(a,b),c) and concat(a,concat(b,c)) // to the same leaf sequence [a,b,c] before hashing. // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT). + // When an sgraph is available, uses cached snode hash matrices. struct enode_concat_hash { seq_util const& seq; - enode_concat_hash(seq_util const& s) : seq(s) {} + sgraph* const* sg_ptr; // pointer to the sgraph* member in seq_plugin + enode_concat_hash(seq_util const& s, sgraph* const* sg = nullptr) : seq(s), sg_ptr(sg) {} unsigned operator()(enode* n) const; }; @@ -75,6 +78,8 @@ namespace euf { seq_util m_seq; seq_rewriter m_rewriter; + sgraph* m_sg = nullptr; // sgraph (may or may not be owned) + bool m_sg_owned = false; // whether we own the sgraph svector m_undo; // queue of merges and registrations to process @@ -150,7 +155,8 @@ namespace euf { bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2); public: - seq_plugin(egraph& g); + seq_plugin(egraph& g, sgraph* sg = nullptr); + ~seq_plugin() override; theory_id get_id() const override { return m_seq.get_family_id(); } diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index 3168c2263..a080ab3fa 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -23,15 +23,35 @@ Author: namespace euf { - sgraph::sgraph(ast_manager& m): + // substitution cache stored on snode for ZIPT-style optimization + struct snode_subst_cache { + struct entry { + unsigned var_id; + unsigned repl_id; + snode* result; + }; + svector m_entries; + snode* find(unsigned var_id, unsigned repl_id) const { + for (auto const& e : m_entries) + if (e.var_id == var_id && e.repl_id == repl_id) + return e.result; + return nullptr; + } + void insert(unsigned var_id, unsigned repl_id, snode* result) { + m_entries.push_back({var_id, repl_id, result}); + } + }; + + sgraph::sgraph(ast_manager& m, egraph& eg, bool add_plugin): m(m), m_seq(m), m_rewriter(m), - m_egraph(m), - m_exprs(m), - m_str_sort(m_seq.str.mk_string_sort(), m) { + m_egraph(eg), + m_str_sort(m_seq.str.mk_string_sort(), m), + m_add_plugin(add_plugin) { // create seq_plugin and register it with the egraph - m_egraph.add_plugin(alloc(seq_plugin, m_egraph)); + if (add_plugin) + m_egraph.add_plugin(alloc(seq_plugin, m_egraph, this)); // register on_make callback so sgraph creates snodes for new enodes std::function on_make = [this](enode* n) { expr* e = n->get_expr(); @@ -42,6 +62,8 @@ namespace euf { } sgraph::~sgraph() { + for (auto* c : m_subst_caches) + dealloc(c); } snode_kind sgraph::classify(expr* e) const { @@ -265,16 +287,49 @@ namespace euf { } } + static const unsigned HASH_BASE = 31; + + void sgraph::compute_hash_matrix(snode* n) { + if (n->is_empty()) { + // identity matrix: concat with empty is identity + n->m_hash_matrix[0][0] = 1; + n->m_hash_matrix[0][1] = 0; + n->m_hash_matrix[1][0] = 0; + n->m_hash_matrix[1][1] = 1; + } + else if (n->is_concat()) { + snode* l = n->arg(0); + snode* r = n->arg(1); + if (l->has_cached_hash() && r->has_cached_hash()) { + // 2x2 matrix multiplication: M(L) * M(R) + n->m_hash_matrix[0][0] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][0]; + n->m_hash_matrix[0][1] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][1]; + n->m_hash_matrix[1][0] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][0]; + n->m_hash_matrix[1][1] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][1]; + } + } + else { + // leaf/token: [[HASH_BASE, value], [0, 1]] + unsigned v = n->get_expr() ? n->get_expr()->get_id() + 1 : n->id() + 1; + n->m_hash_matrix[0][0] = HASH_BASE; + n->m_hash_matrix[0][1] = v; + n->m_hash_matrix[1][0] = 0; + n->m_hash_matrix[1][1] = 1; + } + } + snode* sgraph::mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args) { unsigned id = m_nodes.size(); snode* n = snode::mk(m_region, e, k, id, num_args, args); compute_metadata(n); + compute_hash_matrix(n); m_nodes.push_back(n); - m_exprs.push_back(e); if (e) { unsigned eid = e->get_id(); m_expr2snode.reserve(eid + 1, nullptr); m_expr2snode[eid] = n; + // pin expression via egraph (the egraph has an expr trail) + mk_enode(e); } ++m_stats.m_num_nodes; return n; @@ -350,7 +405,6 @@ namespace euf { } } m_nodes.shrink(old_sz); - m_exprs.shrink(old_sz); m_scopes.shrink(new_lvl); m_num_scopes = new_lvl; m_egraph.pop(num_scopes); @@ -418,9 +472,23 @@ namespace euf { return replacement; if (n->is_empty() || n->is_char()) return n; - if (n->is_concat()) - return mk_concat(subst(n->arg(0), var, replacement), - subst(n->arg(1), var, replacement)); + if (n->is_concat()) { + // check substitution cache (ZIPT-style optimization) + if (n->m_subst_cache) { + snode* cached = n->m_subst_cache->find(var->id(), replacement->id()); + if (cached) + return cached; + } + snode* result = mk_concat(subst(n->arg(0), var, replacement), + subst(n->arg(1), var, replacement)); + // cache the result + if (!n->m_subst_cache) { + n->m_subst_cache = alloc(snode_subst_cache); + m_subst_caches.push_back(n->m_subst_cache); + } + n->m_subst_cache->insert(var->id(), replacement->id(), result); + return result; + } // for non-concat compound nodes (power, star, etc.), no substitution into children return n; } diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 9ef7b86d1..4d7231924 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -82,14 +82,17 @@ namespace euf { ast_manager& m; seq_util m_seq; seq_rewriter m_rewriter; - egraph m_egraph; + egraph& m_egraph; region m_region; snode_vector m_nodes; - expr_ref_vector m_exprs; // pin expressions sort_ref m_str_sort; // cached string sort unsigned_vector m_scopes; unsigned m_num_scopes = 0; stats m_stats; + bool m_add_plugin; // whether sgraph created the seq_plugin + + // tracks allocated subst caches for cleanup + ptr_vector m_subst_caches; // maps expression id to snode ptr_vector m_expr2snode; @@ -97,10 +100,11 @@ namespace euf { snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); snode_kind classify(expr* e) const; void compute_metadata(snode* n); + void compute_hash_matrix(snode* n); void collect_re_predicates(snode* re, expr_ref_vector& preds); public: - sgraph(ast_manager& m); + sgraph(ast_manager& m, egraph& eg, bool add_plugin = true); ~sgraph(); ast_manager& get_manager() const { return m; } diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index 26de2d3ec..bf9ce293e 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -32,6 +32,7 @@ namespace euf { class sgraph; class snode; + struct snode_subst_cache; typedef ptr_vector snode_vector; @@ -68,6 +69,13 @@ namespace euf { unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons) unsigned m_length = 0; // token count, number of leaf tokens in the tree + // hash matrix for associativity-respecting hashing (2x2 polynomial hash matrix) + // all zeros means not cached, non-zero means cached + unsigned m_hash_matrix[2][2] = {{0,0},{0,0}}; + + // substitution cache (lazy-initialized, owned by sgraph) + snode_subst_cache* m_subst_cache = nullptr; + snode* m_args[0]; // variable-length array, allocated via get_snode_size(num_args) friend class sgraph; @@ -101,6 +109,10 @@ namespace euf { unsigned level() const { return m_level; } unsigned length() const { return m_length; } + // associativity-respecting hash: cached if the 2x2 matrix is non-zero + bool has_cached_hash() const { return m_hash_matrix[0][0] != 0; } + unsigned assoc_hash() const { return m_hash_matrix[0][1]; } + bool is_empty() const { return m_kind == snode_kind::s_empty; } bool is_char() const { return m_kind == snode_kind::s_char; } bool is_var() const { return m_kind == snode_kind::s_var; } diff --git a/src/test/euf_seq_plugin.cpp b/src/test/euf_seq_plugin.cpp index e933d87c4..4beaa012e 100644 --- a/src/test/euf_seq_plugin.cpp +++ b/src/test/euf_seq_plugin.cpp @@ -31,7 +31,8 @@ static void test_sgraph_basic() { std::cout << "test_sgraph_basic\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -72,7 +73,8 @@ static void test_sgraph_backtrack() { std::cout << "test_sgraph_backtrack\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -99,7 +101,8 @@ static void test_seq_plugin_assoc() { std::cout << "test_seq_plugin_assoc\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); euf::egraph& g = sg.get_egraph(); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -125,7 +128,8 @@ static void test_seq_plugin_empty() { std::cout << "test_seq_plugin_empty\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); euf::egraph& g = sg.get_egraph(); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -150,7 +154,8 @@ static void test_seq_plugin_star_merge() { std::cout << "test_seq_plugin_star_merge\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); euf::egraph& g = sg.get_egraph(); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -178,7 +183,8 @@ static void test_seq_plugin_nullable_absorb() { std::cout << "test_seq_plugin_nullable_absorb\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); euf::egraph& g = sg.get_egraph(); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -202,7 +208,8 @@ static void test_sgraph_egraph_sync() { std::cout << "test_sgraph_egraph_sync\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); euf::egraph& g = sg.get_egraph(); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp index 028573bec..569e8fd75 100644 --- a/src/test/euf_sgraph.cpp +++ b/src/test/euf_sgraph.cpp @@ -27,7 +27,8 @@ static void test_sgraph_classify() { std::cout << "test_sgraph_classify\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -85,7 +86,8 @@ static void test_sgraph_regex() { std::cout << "test_sgraph_regex\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -160,7 +162,8 @@ static void test_sgraph_power() { std::cout << "test_sgraph_power\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); arith_util arith(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -185,7 +188,8 @@ static void test_sgraph_push_pop() { std::cout << "test_sgraph_push_pop\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -221,7 +225,8 @@ static void test_sgraph_nested_scopes() { std::cout << "test_sgraph_nested_scopes\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -260,7 +265,8 @@ static void test_sgraph_find_idempotent() { std::cout << "test_sgraph_find_idempotent\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -276,7 +282,8 @@ static void test_sgraph_mk_concat() { std::cout << "test_sgraph_mk_concat\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -310,7 +317,8 @@ static void test_sgraph_mk_power() { std::cout << "test_sgraph_mk_power\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); arith_util arith(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -335,7 +343,8 @@ static void test_sgraph_first_last() { std::cout << "test_sgraph_first_last\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -372,7 +381,8 @@ static void test_sgraph_concat_metadata() { std::cout << "test_sgraph_concat_metadata\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -414,7 +424,8 @@ static void test_sgraph_display() { std::cout << "test_sgraph_display\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -436,7 +447,8 @@ static void test_sgraph_factory() { std::cout << "test_sgraph_factory\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); // mk_var euf::snode* x = sg.mk_var(symbol("x")); @@ -485,7 +497,8 @@ static void test_sgraph_indexing() { std::cout << "test_sgraph_indexing\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -532,7 +545,8 @@ static void test_sgraph_drop() { std::cout << "test_sgraph_drop\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -586,7 +600,8 @@ static void test_sgraph_subst() { std::cout << "test_sgraph_subst\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -620,7 +635,8 @@ static void test_sgraph_complex_concat() { std::cout << "test_sgraph_complex_concat\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); // build a string "HELLO" = concat(H, concat(E, concat(L, concat(L, O)))) euf::snode* h = sg.mk_char('H'); @@ -678,7 +694,8 @@ static void test_sgraph_brzozowski() { std::cout << "test_sgraph_brzozowski\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); @@ -713,7 +730,8 @@ static void test_sgraph_minterms() { std::cout << "test_sgraph_minterms\n"; ast_manager m; reg_decl_plugins(m); - euf::sgraph sg(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); From fbd3d445e28464cda5de89fd1eb125641ccc3ab8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:16:46 +0000 Subject: [PATCH 4/7] Add clarifying comments for hash matrix overflow safety and cache validity Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_sgraph.cpp | 5 +++++ src/ast/euf/euf_snode.h | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index a080ab3fa..eff926004 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -289,6 +289,10 @@ namespace euf { static const unsigned HASH_BASE = 31; + // Compute a 2x2 polynomial hash matrix for associativity-respecting hashing. + // Unsigned overflow is intentional and well-defined (mod 2^32). + // M[0][0] tracks HASH_BASE^(num_leaves), which is always nonzero since + // HASH_BASE is odd. M[0][1] is the actual hash value. void sgraph::compute_hash_matrix(snode* n) { if (n->is_empty()) { // identity matrix: concat with empty is identity @@ -310,6 +314,7 @@ namespace euf { } else { // leaf/token: [[HASH_BASE, value], [0, 1]] + // +1 avoids zero hash values; wraps safely on unsigned overflow unsigned v = n->get_expr() ? n->get_expr()->get_id() + 1 : n->id() + 1; n->m_hash_matrix[0][0] = HASH_BASE; n->m_hash_matrix[0][1] = v; diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index bf9ce293e..955dd3788 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -109,7 +109,9 @@ namespace euf { unsigned level() const { return m_level; } unsigned length() const { return m_length; } - // associativity-respecting hash: cached if the 2x2 matrix is non-zero + // associativity-respecting hash: cached if the 2x2 matrix is non-zero. + // M[0][0] = HASH_BASE^(num_leaves) which is always nonzero since HASH_BASE + // is odd and gcd(odd, 2^32) = 1, so the check is safe. bool has_cached_hash() const { return m_hash_matrix[0][0] != 0; } unsigned assoc_hash() const { return m_hash_matrix[0][1]; } From 67f4d305775e209fe173d1c2c0d54ff1c13fea9c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 23:55:15 +0000 Subject: [PATCH 5/7] Initial plan From 28d1c4a27f40dd10819bfd17d2f8f4da607cc47a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 00:00:36 +0000 Subject: [PATCH 6/7] Add ZIPT code reviewer agentic workflow (runs 4x daily) Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- .github/workflows/zipt-code-reviewer.lock.yml | 1139 +++++++++++++++++ .github/workflows/zipt-code-reviewer.md | 253 ++++ 2 files changed, 1392 insertions(+) create mode 100644 .github/workflows/zipt-code-reviewer.lock.yml create mode 100644 .github/workflows/zipt-code-reviewer.md diff --git a/.github/workflows/zipt-code-reviewer.lock.yml b/.github/workflows/zipt-code-reviewer.lock.yml new file mode 100644 index 000000000..0de5fc256 --- /dev/null +++ b/.github/workflows/zipt-code-reviewer.lock.yml @@ -0,0 +1,1139 @@ +# +# ___ _ _ +# / _ \ | | (_) +# | |_| | __ _ ___ _ __ | |_ _ ___ +# | _ |/ _` |/ _ \ '_ \| __| |/ __| +# | | | | (_| | __/ | | | |_| | (__ +# \_| |_/\__, |\___|_| |_|\__|_|\___| +# __/ | +# _ _ |___/ +# | | | | / _| | +# | | | | ___ _ __ _ __| |_| | _____ ____ +# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___| +# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ +# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ +# +# This file was automatically generated by gh-aw (v0.51.6). DO NOT EDIT. +# +# To update this file, edit the corresponding .md file and run: +# gh aw compile +# Not all edits will cause changes to this file. +# +# For more information: https://github.github.com/gh-aw/introduction/overview/ +# +# Reviews Z3 string/sequence graph implementation (euf_sgraph, euf_seq_plugin, src/smt/seq) by comparing with the ZIPT reference implementation and reporting improvements as git diffs in GitHub issues +# +# gh-aw-metadata: {"schema_version":"v1","frontmatter_hash":"adecdddc8c5555c7d326638cfa13674b67a5ef94e37a23c4c4d84824ab82ad9c","compiler_version":"v0.51.6"} + +name: "ZIPT Code Reviewer" +"on": + schedule: + - cron: "0 0,6,12,18 * * *" + workflow_dispatch: + +permissions: {} + +concurrency: + group: "gh-aw-${{ github.workflow }}" + +run-name: "ZIPT Code Reviewer" + +jobs: + activation: + runs-on: ubuntu-slim + permissions: + contents: read + outputs: + comment_id: "" + comment_repo: "" + model: ${{ steps.generate_aw_info.outputs.model }} + secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.51.6 + with: + destination: /opt/gh-aw/actions + - name: Generate agentic run info + id: generate_aw_info + env: + GH_AW_INFO_ENGINE_ID: "copilot" + GH_AW_INFO_ENGINE_NAME: "GitHub Copilot CLI" + GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }} + GH_AW_INFO_VERSION: "" + GH_AW_INFO_AGENT_VERSION: "0.0.420" + GH_AW_INFO_CLI_VERSION: "v0.51.6" + GH_AW_INFO_WORKFLOW_NAME: "ZIPT Code Reviewer" + GH_AW_INFO_EXPERIMENTAL: "false" + GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" + GH_AW_INFO_STAGED: "false" + GH_AW_INFO_ALLOWED_DOMAINS: '["defaults","github"]' + GH_AW_INFO_FIREWALL_ENABLED: "true" + GH_AW_INFO_AWF_VERSION: "v0.23.0" + GH_AW_INFO_AWMG_VERSION: "" + GH_AW_INFO_FIREWALL_TYPE: "squid" + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { main } = require('/opt/gh-aw/actions/generate_aw_info.cjs'); + await main(core, context); + - name: Validate COPILOT_GITHUB_TOKEN secret + id: validate-secret + run: /opt/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Checkout .github and .agents folders + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + sparse-checkout: | + .github + .agents + sparse-checkout-cone-mode: true + fetch-depth: 1 + persist-credentials: false + - name: Check workflow file timestamps + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_WORKFLOW_FILE: "zipt-code-reviewer.lock.yml" + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/check_workflow_timestamp_api.cjs'); + await main(); + - name: Create prompt with built-in context + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + run: | + bash /opt/gh-aw/actions/create_prompt_first.sh + { + cat << 'GH_AW_PROMPT_EOF' + + GH_AW_PROMPT_EOF + cat "/opt/gh-aw/prompts/xpia.md" + cat "/opt/gh-aw/prompts/temp_folder_prompt.md" + cat "/opt/gh-aw/prompts/markdown.md" + cat "/opt/gh-aw/prompts/cache_memory_prompt.md" + cat "/opt/gh-aw/prompts/safe_outputs_prompt.md" + cat << 'GH_AW_PROMPT_EOF' + + Tools: create_issue, missing_tool, missing_data, noop + + + The following GitHub context information is available for this workflow: + {{#if __GH_AW_GITHUB_ACTOR__ }} + - **actor**: __GH_AW_GITHUB_ACTOR__ + {{/if}} + {{#if __GH_AW_GITHUB_REPOSITORY__ }} + - **repository**: __GH_AW_GITHUB_REPOSITORY__ + {{/if}} + {{#if __GH_AW_GITHUB_WORKSPACE__ }} + - **workspace**: __GH_AW_GITHUB_WORKSPACE__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }} + - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }} + - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }} + - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }} + - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__ + {{/if}} + {{#if __GH_AW_GITHUB_RUN_ID__ }} + - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__ + {{/if}} + + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' + {{#runtime-import .github/workflows/zipt-code-reviewer.md}} + GH_AW_PROMPT_EOF + } > "$GH_AW_PROMPT" + - name: Interpolate variables and render templates + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/interpolate_prompt.cjs'); + await main(); + - name: Substitute placeholders + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_ALLOWED_EXTENSIONS: '' + GH_AW_CACHE_DESCRIPTION: '' + GH_AW_CACHE_DIR: '/tmp/gh-aw/cache-memory/' + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + + const substitutePlaceholders = require('/opt/gh-aw/actions/substitute_placeholders.cjs'); + + // Call the substitution function + return await substitutePlaceholders({ + file: process.env.GH_AW_PROMPT, + substitutions: { + GH_AW_ALLOWED_EXTENSIONS: process.env.GH_AW_ALLOWED_EXTENSIONS, + GH_AW_CACHE_DESCRIPTION: process.env.GH_AW_CACHE_DESCRIPTION, + GH_AW_CACHE_DIR: process.env.GH_AW_CACHE_DIR, + GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, + GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER, + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, + GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, + GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, + GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE + } + }); + - name: Validate prompt placeholders + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: bash /opt/gh-aw/actions/validate_prompt_placeholders.sh + - name: Print prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: bash /opt/gh-aw/actions/print_prompt_summary.sh + - name: Upload activation artifact + if: success() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: activation + path: | + /tmp/gh-aw/aw_info.json + /tmp/gh-aw/aw-prompts/prompt.txt + retention-days: 1 + + agent: + needs: activation + runs-on: ubuntu-latest + permissions: read-all + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GH_AW_ASSETS_ALLOWED_EXTS: "" + GH_AW_ASSETS_BRANCH: "" + GH_AW_ASSETS_MAX_SIZE_KB: 0 + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + GH_AW_SAFE_OUTPUTS: /opt/gh-aw/safeoutputs/outputs.jsonl + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json + GH_AW_WORKFLOW_ID_SANITIZED: ziptcodereviewer + outputs: + checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} + detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }} + detection_success: ${{ steps.detection_conclusion.outputs.success }} + has_patch: ${{ steps.collect_output.outputs.has_patch }} + model: ${{ needs.activation.outputs.model }} + output: ${{ steps.collect_output.outputs.output }} + output_types: ${{ steps.collect_output.outputs.output_types }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.51.6 + with: + destination: /opt/gh-aw/actions + - name: Create gh-aw temp directory + run: bash /opt/gh-aw/actions/create_gh_aw_tmp_dir.sh + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v5 + with: + persist-credentials: false + + # Cache memory file share configuration from frontmatter processed below + - name: Create cache-memory directory + run: bash /opt/gh-aw/actions/create_cache_memory_dir.sh + - name: Restore cache-memory file share data + uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 + with: + key: memory-${{ env.GH_AW_WORKFLOW_ID_SANITIZED }}-${{ github.run_id }} + path: /tmp/gh-aw/cache-memory + restore-keys: | + memory-${{ env.GH_AW_WORKFLOW_ID_SANITIZED }}- + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + git config --global am.keepcr true + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Checkout PR branch + id: checkout-pr + if: | + (github.event.pull_request) || (github.event.issue.pull_request) + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/checkout_pr_branch.cjs'); + await main(); + - name: Install GitHub Copilot CLI + run: /opt/gh-aw/actions/install_copilot_cli.sh 0.0.420 + - name: Install awf binary + run: bash /opt/gh-aw/actions/install_awf_binary.sh v0.23.0 + - name: Determine automatic lockdown mode for GitHub MCP Server + id: determine-automatic-lockdown + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + with: + script: | + const determineAutomaticLockdown = require('/opt/gh-aw/actions/determine_automatic_lockdown.cjs'); + await determineAutomaticLockdown(github, context, core); + - name: Download container images + run: bash /opt/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.23.0 ghcr.io/github/gh-aw-firewall/api-proxy:0.23.0 ghcr.io/github/gh-aw-firewall/squid:0.23.0 ghcr.io/github/gh-aw-mcpg:v0.1.6 ghcr.io/github/github-mcp-server:v0.31.0 node:lts-alpine + - name: Write Safe Outputs Config + run: | + mkdir -p /opt/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs + cat > /opt/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_EOF' + {"create_issue":{"max":3},"create_missing_tool_issue":{"max":1,"title_prefix":"[missing tool]"},"missing_data":{},"missing_tool":{},"noop":{"max":1}} + GH_AW_SAFE_OUTPUTS_CONFIG_EOF + cat > /opt/gh-aw/safeoutputs/tools.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_EOF' + [ + { + "description": "Create a new GitHub issue for tracking bugs, feature requests, or tasks. Use this for actionable work items that need assignment, labeling, and status tracking. For reports, announcements, or status updates that don't require task tracking, use create_discussion instead. CONSTRAINTS: Maximum 3 issue(s) can be created. Title will be prefixed with \"[zipt-review] \". Labels [code-quality automated string-solver] will be automatically added.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "body": { + "description": "Detailed issue description in Markdown. Do NOT repeat the title as a heading since it already appears as the issue's h1. Include context, reproduction steps, or acceptance criteria as appropriate.", + "type": "string" + }, + "labels": { + "description": "Labels to categorize the issue (e.g., 'bug', 'enhancement'). Labels must exist in the repository.", + "items": { + "type": "string" + }, + "type": "array" + }, + "parent": { + "description": "Parent issue number for creating sub-issues. This is the numeric ID from the GitHub URL (e.g., 42 in github.com/owner/repo/issues/42). Can also be a temporary_id (e.g., 'aw_abc123', 'aw_Test123') from a previously created issue in the same workflow run.", + "type": [ + "number", + "string" + ] + }, + "temporary_id": { + "description": "Unique temporary identifier for referencing this issue before it's created. Format: 'aw_' followed by 3 to 8 alphanumeric characters (e.g., 'aw_abc1', 'aw_Test123'). Use '#aw_ID' in body text to reference other issues by their temporary_id; these are replaced with actual issue numbers after creation.", + "pattern": "^aw_[A-Za-z0-9]{3,8}$", + "type": "string" + }, + "title": { + "description": "Concise issue title summarizing the bug, feature, or task. The title appears as the main heading, so keep it brief and descriptive.", + "type": "string" + } + }, + "required": [ + "title", + "body" + ], + "type": "object" + }, + "name": "create_issue" + }, + { + "description": "Report that a tool or capability needed to complete the task is not available, or share any information you deem important about missing functionality or limitations. Use this when you cannot accomplish what was requested because the required functionality is missing or access is restricted.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "alternatives": { + "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).", + "type": "string" + }, + "reason": { + "description": "Explanation of why this tool is needed or what information you want to share about the limitation (max 256 characters).", + "type": "string" + }, + "tool": { + "description": "Optional: Name or description of the missing tool or capability (max 128 characters). Be specific about what functionality is needed.", + "type": "string" + } + }, + "required": [ + "reason" + ], + "type": "object" + }, + "name": "missing_tool" + }, + { + "description": "Log a transparency message when no significant actions are needed. Use this to confirm workflow completion and provide visibility when analysis is complete but no changes or outputs are required (e.g., 'No issues found', 'All checks passed'). This ensures the workflow produces human-visible output even when no other actions are taken.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "message": { + "description": "Status or completion message to log. Should explain what was analyzed and the outcome (e.g., 'Code review complete - no issues found', 'Analysis complete - all tests passing').", + "type": "string" + } + }, + "required": [ + "message" + ], + "type": "object" + }, + "name": "noop" + }, + { + "description": "Report that data or information needed to complete the task is not available. Use this when you cannot accomplish what was requested because required data, context, or information is missing.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "alternatives": { + "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).", + "type": "string" + }, + "context": { + "description": "Additional context about the missing data or where it should come from (max 256 characters).", + "type": "string" + }, + "data_type": { + "description": "Type or description of the missing data or information (max 128 characters). Be specific about what data is needed.", + "type": "string" + }, + "reason": { + "description": "Explanation of why this data is needed to complete the task (max 256 characters).", + "type": "string" + } + }, + "required": [], + "type": "object" + }, + "name": "missing_data" + } + ] + GH_AW_SAFE_OUTPUTS_TOOLS_EOF + cat > /opt/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_EOF' + { + "create_issue": { + "defaultMax": 1, + "fields": { + "body": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "labels": { + "type": "array", + "itemType": "string", + "itemSanitize": true, + "itemMaxLength": 128 + }, + "parent": { + "issueOrPRNumber": true + }, + "repo": { + "type": "string", + "maxLength": 256 + }, + "temporary_id": { + "type": "string" + }, + "title": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "missing_data": { + "defaultMax": 20, + "fields": { + "alternatives": { + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "context": { + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "data_type": { + "type": "string", + "sanitize": true, + "maxLength": 128 + }, + "reason": { + "type": "string", + "sanitize": true, + "maxLength": 256 + } + } + }, + "missing_tool": { + "defaultMax": 20, + "fields": { + "alternatives": { + "type": "string", + "sanitize": true, + "maxLength": 512 + }, + "reason": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "tool": { + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "noop": { + "defaultMax": 1, + "fields": { + "message": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + } + } + } + } + GH_AW_SAFE_OUTPUTS_VALIDATION_EOF + - name: Generate Safe Outputs MCP Server Config + id: safe-outputs-config + run: | + # Generate a secure random API key (360 bits of entropy, 40+ chars) + # Mask immediately to prevent timing vulnerabilities + API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${API_KEY}" + + PORT=3001 + + # Set outputs for next steps + { + echo "safe_outputs_api_key=${API_KEY}" + echo "safe_outputs_port=${PORT}" + } >> "$GITHUB_OUTPUT" + + echo "Safe Outputs MCP server will run on port ${PORT}" + + - name: Start Safe Outputs MCP HTTP Server + id: safe-outputs-start + env: + DEBUG: '*' + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-config.outputs.safe_outputs_port }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-config.outputs.safe_outputs_api_key }} + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + run: | + # Environment variables are set above to prevent template injection + export DEBUG + export GH_AW_SAFE_OUTPUTS_PORT + export GH_AW_SAFE_OUTPUTS_API_KEY + export GH_AW_SAFE_OUTPUTS_TOOLS_PATH + export GH_AW_SAFE_OUTPUTS_CONFIG_PATH + export GH_AW_MCP_LOG_DIR + + bash /opt/gh-aw/actions/start_safe_outputs_server.sh + + - name: Start MCP Gateway + id: start-mcp-gateway + env: + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-start.outputs.api_key }} + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-start.outputs.port }} + GITHUB_MCP_LOCKDOWN: ${{ steps.determine-automatic-lockdown.outputs.lockdown == 'true' && '1' || '0' }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + run: | + set -eo pipefail + mkdir -p /tmp/gh-aw/mcp-config + + # Export gateway environment variables for MCP config and gateway script + export MCP_GATEWAY_PORT="80" + export MCP_GATEWAY_DOMAIN="host.docker.internal" + MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${MCP_GATEWAY_API_KEY}" + export MCP_GATEWAY_API_KEY + export MCP_GATEWAY_PAYLOAD_DIR="/tmp/gh-aw/mcp-payloads" + mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}" + export MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD="524288" + export DEBUG="*" + + export GH_AW_ENGINE="copilot" + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_LOCKDOWN -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.1.6' + + mkdir -p /home/runner/.copilot + cat << GH_AW_MCP_CONFIG_EOF | bash /opt/gh-aw/actions/start_mcp_gateway.sh + { + "mcpServers": { + "github": { + "type": "stdio", + "container": "ghcr.io/github/github-mcp-server:v0.31.0", + "env": { + "GITHUB_LOCKDOWN_MODE": "$GITHUB_MCP_LOCKDOWN", + "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}", + "GITHUB_READ_ONLY": "1", + "GITHUB_TOOLSETS": "context,repos,issues,pull_requests" + } + }, + "safeoutputs": { + "type": "http", + "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT", + "headers": { + "Authorization": "\${GH_AW_SAFE_OUTPUTS_API_KEY}" + } + } + }, + "gateway": { + "port": $MCP_GATEWAY_PORT, + "domain": "${MCP_GATEWAY_DOMAIN}", + "apiKey": "${MCP_GATEWAY_API_KEY}", + "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" + } + } + GH_AW_MCP_CONFIG_EOF + - name: Download activation artifact + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8 + with: + name: activation + path: /tmp/gh-aw + - name: Clean git credentials + run: bash /opt/gh-aw/actions/clean_git_credentials.sh + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + # --allow-tool github + # --allow-tool safeoutputs + # --allow-tool shell(cat) + # --allow-tool shell(clang-format:*) + # --allow-tool shell(date) + # --allow-tool shell(echo) + # --allow-tool shell(git diff:*) + # --allow-tool shell(git log:*) + # --allow-tool shell(git show:*) + # --allow-tool shell(git status) + # --allow-tool shell(grep) + # --allow-tool shell(head) + # --allow-tool shell(ls) + # --allow-tool shell(pwd) + # --allow-tool shell(sort) + # --allow-tool shell(tail) + # --allow-tool shell(uniq) + # --allow-tool shell(wc) + # --allow-tool shell(yq) + # --allow-tool web_fetch + # --allow-tool write + timeout-minutes: 30 + run: | + set -o pipefail + # shellcheck disable=SC1003 + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --allow-domains "*.githubusercontent.com,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,codeload.github.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github-cloud.githubusercontent.com,github-cloud.s3.amazonaws.com,github.com,github.githubassets.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,lfs.github.com,objects.githubusercontent.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.23.0 --skip-pull --enable-api-proxy \ + -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-tool github --allow-tool safeoutputs --allow-tool '\''shell(cat)'\'' --allow-tool '\''shell(clang-format:*)'\'' --allow-tool '\''shell(date)'\'' --allow-tool '\''shell(echo)'\'' --allow-tool '\''shell(git diff:*)'\'' --allow-tool '\''shell(git log:*)'\'' --allow-tool '\''shell(git show:*)'\'' --allow-tool '\''shell(git status)'\'' --allow-tool '\''shell(grep)'\'' --allow-tool '\''shell(head)'\'' --allow-tool '\''shell(ls)'\'' --allow-tool '\''shell(pwd)'\'' --allow-tool '\''shell(sort)'\'' --allow-tool '\''shell(tail)'\'' --allow-tool '\''shell(uniq)'\'' --allow-tool '\''shell(wc)'\'' --allow-tool '\''shell(yq)'\'' --allow-tool web_fetch --allow-tool write --add-dir /tmp/gh-aw/cache-memory/ --allow-all-paths --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"${GH_AW_MODEL_AGENT_COPILOT:+ --model "$GH_AW_MODEL_AGENT_COPILOT"}' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json + GH_AW_MODEL_AGENT_COPILOT: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }} + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GITHUB_API_URL: ${{ github.api_url }} + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_WORKSPACE: ${{ github.workspace }} + XDG_CONFIG_HOME: /home/runner + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + git config --global am.keepcr true + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Copy Copilot session state files to logs + if: always() + continue-on-error: true + run: | + # Copy Copilot session state files to logs folder for artifact collection + # This ensures they are in /tmp/gh-aw/ where secret redaction can scan them + SESSION_STATE_DIR="$HOME/.copilot/session-state" + LOGS_DIR="/tmp/gh-aw/sandbox/agent/logs" + + if [ -d "$SESSION_STATE_DIR" ]; then + echo "Copying Copilot session state files from $SESSION_STATE_DIR to $LOGS_DIR" + mkdir -p "$LOGS_DIR" + cp -v "$SESSION_STATE_DIR"/*.jsonl "$LOGS_DIR/" 2>/dev/null || true + echo "Session state files copied successfully" + else + echo "No session-state directory found at $SESSION_STATE_DIR" + fi + - name: Stop MCP Gateway + if: always() + continue-on-error: true + env: + MCP_GATEWAY_PORT: ${{ steps.start-mcp-gateway.outputs.gateway-port }} + MCP_GATEWAY_API_KEY: ${{ steps.start-mcp-gateway.outputs.gateway-api-key }} + GATEWAY_PID: ${{ steps.start-mcp-gateway.outputs.gateway-pid }} + run: | + bash /opt/gh-aw/actions/stop_mcp_gateway.sh "$GATEWAY_PID" + - name: Redact secrets in logs + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/redact_secrets.cjs'); + await main(); + env: + GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN' + SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Upload Safe Outputs + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: safe-output + path: ${{ env.GH_AW_SAFE_OUTPUTS }} + if-no-files-found: warn + - name: Ingest agent output + id: collect_output + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_ALLOWED_DOMAINS: "*.githubusercontent.com,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,codeload.github.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github-cloud.githubusercontent.com,github-cloud.s3.amazonaws.com,github.com,github.githubassets.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,lfs.github.com,objects.githubusercontent.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com" + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_API_URL: ${{ github.api_url }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/collect_ndjson_output.cjs'); + await main(); + - name: Upload sanitized agent output + if: always() && env.GH_AW_AGENT_OUTPUT + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: agent-output + path: ${{ env.GH_AW_AGENT_OUTPUT }} + if-no-files-found: warn + - name: Upload engine output files + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: agent_outputs + path: | + /tmp/gh-aw/sandbox/agent/logs/ + /tmp/gh-aw/redacted-urls.log + if-no-files-found: ignore + - name: Parse agent logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_copilot_log.cjs'); + await main(); + - name: Parse MCP Gateway logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_mcp_gateway_log.cjs'); + await main(); + - name: Print firewall logs + if: always() + continue-on-error: true + env: + AWF_LOGS_DIR: /tmp/gh-aw/sandbox/firewall/logs + run: | + # Fix permissions on firewall logs so they can be uploaded as artifacts + # AWF runs with sudo, creating files owned by root + sudo chmod -R a+r /tmp/gh-aw/sandbox/firewall/logs 2>/dev/null || true + # Only run awf logs summary if awf command exists (it may not be installed if workflow failed before install step) + if command -v awf &> /dev/null; then + awf logs summary | tee -a "$GITHUB_STEP_SUMMARY" + else + echo 'AWF binary not installed, skipping firewall log summary' + fi + - name: Upload cache-memory data as artifact + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + if: always() + with: + name: cache-memory + path: /tmp/gh-aw/cache-memory + - name: Upload agent artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: agent-artifacts + path: | + /tmp/gh-aw/aw-prompts/prompt.txt + /tmp/gh-aw/mcp-logs/ + /tmp/gh-aw/sandbox/firewall/logs/ + /tmp/gh-aw/agent-stdio.log + /tmp/gh-aw/agent/ + if-no-files-found: ignore + # --- Threat Detection (inline) --- + - name: Check if detection needed + id: detection_guard + if: always() + env: + OUTPUT_TYPES: ${{ steps.collect_output.outputs.output_types }} + HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} + run: | + if [[ -n "$OUTPUT_TYPES" || "$HAS_PATCH" == "true" ]]; then + echo "run_detection=true" >> "$GITHUB_OUTPUT" + echo "Detection will run: output_types=$OUTPUT_TYPES, has_patch=$HAS_PATCH" + else + echo "run_detection=false" >> "$GITHUB_OUTPUT" + echo "Detection skipped: no agent outputs or patches to analyze" + fi + - name: Clear MCP configuration for detection + if: always() && steps.detection_guard.outputs.run_detection == 'true' + run: | + rm -f /tmp/gh-aw/mcp-config/mcp-servers.json + rm -f /home/runner/.copilot/mcp-config.json + rm -f "$GITHUB_WORKSPACE/.gemini/settings.json" + - name: Prepare threat detection files + if: always() && steps.detection_guard.outputs.run_detection == 'true' + run: | + mkdir -p /tmp/gh-aw/threat-detection/aw-prompts + cp /tmp/gh-aw/aw-prompts/prompt.txt /tmp/gh-aw/threat-detection/aw-prompts/prompt.txt 2>/dev/null || true + cp /tmp/gh-aw/agent_output.json /tmp/gh-aw/threat-detection/agent_output.json 2>/dev/null || true + for f in /tmp/gh-aw/aw-*.patch; do + [ -f "$f" ] && cp "$f" /tmp/gh-aw/threat-detection/ 2>/dev/null || true + done + echo "Prepared threat detection files:" + ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true + - name: Setup threat detection + if: always() && steps.detection_guard.outputs.run_detection == 'true' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + WORKFLOW_NAME: "ZIPT Code Reviewer" + WORKFLOW_DESCRIPTION: "Reviews Z3 string/sequence graph implementation (euf_sgraph, euf_seq_plugin, src/smt/seq) by comparing with the ZIPT reference implementation and reporting improvements as git diffs in GitHub issues" + HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/setup_threat_detection.cjs'); + await main(); + - name: Ensure threat-detection directory and log + if: always() && steps.detection_guard.outputs.run_detection == 'true' + run: | + mkdir -p /tmp/gh-aw/threat-detection + touch /tmp/gh-aw/threat-detection/detection.log + - name: Execute GitHub Copilot CLI + if: always() && steps.detection_guard.outputs.run_detection == 'true' + id: detection_agentic_execution + # Copilot CLI tool arguments (sorted): + # --allow-tool shell(cat) + # --allow-tool shell(grep) + # --allow-tool shell(head) + # --allow-tool shell(jq) + # --allow-tool shell(ls) + # --allow-tool shell(tail) + # --allow-tool shell(wc) + timeout-minutes: 20 + run: | + set -o pipefail + # shellcheck disable=SC1003 + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,raw.githubusercontent.com,registry.npmjs.org,telemetry.enterprise.githubcopilot.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.23.0 --skip-pull --enable-api-proxy \ + -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-tool '\''shell(cat)'\'' --allow-tool '\''shell(grep)'\'' --allow-tool '\''shell(head)'\'' --allow-tool '\''shell(jq)'\'' --allow-tool '\''shell(ls)'\'' --allow-tool '\''shell(tail)'\'' --allow-tool '\''shell(wc)'\'' --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"${GH_AW_MODEL_DETECTION_COPILOT:+ --model "$GH_AW_MODEL_DETECTION_COPILOT"}' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + GH_AW_MODEL_DETECTION_COPILOT: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GITHUB_API_URL: ${{ github.api_url }} + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_WORKSPACE: ${{ github.workspace }} + XDG_CONFIG_HOME: /home/runner + - name: Parse threat detection results + id: parse_detection_results + if: always() && steps.detection_guard.outputs.run_detection == 'true' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_threat_detection_results.cjs'); + await main(); + - name: Upload threat detection log + if: always() && steps.detection_guard.outputs.run_detection == 'true' + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: threat-detection.log + path: /tmp/gh-aw/threat-detection/detection.log + if-no-files-found: ignore + - name: Set detection conclusion + id: detection_conclusion + if: always() + env: + RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }} + DETECTION_SUCCESS: ${{ steps.parse_detection_results.outputs.success }} + run: | + if [[ "$RUN_DETECTION" != "true" ]]; then + echo "conclusion=skipped" >> "$GITHUB_OUTPUT" + echo "success=true" >> "$GITHUB_OUTPUT" + echo "Detection was not needed, marking as skipped" + elif [[ "$DETECTION_SUCCESS" == "true" ]]; then + echo "conclusion=success" >> "$GITHUB_OUTPUT" + echo "success=true" >> "$GITHUB_OUTPUT" + echo "Detection passed successfully" + else + echo "conclusion=failure" >> "$GITHUB_OUTPUT" + echo "success=false" >> "$GITHUB_OUTPUT" + echo "Detection found issues" + fi + + conclusion: + needs: + - activation + - agent + - safe_outputs + - update_cache_memory + if: (always()) && (needs.agent.result != 'skipped') + runs-on: ubuntu-slim + permissions: + contents: read + issues: write + outputs: + noop_message: ${{ steps.noop.outputs.noop_message }} + tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} + total_count: ${{ steps.missing_tool.outputs.total_count }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.51.6 + with: + destination: /opt/gh-aw/actions + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8 + with: + name: agent-output + path: /tmp/gh-aw/safeoutputs/ + - name: Setup agent output environment variable + run: | + mkdir -p /tmp/gh-aw/safeoutputs/ + find "/tmp/gh-aw/safeoutputs/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV" + - name: Process No-Op Messages + id: noop + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_NOOP_MAX: "1" + GH_AW_WORKFLOW_NAME: "ZIPT Code Reviewer" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/noop.cjs'); + await main(); + - name: Record Missing Tool + id: missing_tool + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_MISSING_TOOL_CREATE_ISSUE: "true" + GH_AW_MISSING_TOOL_TITLE_PREFIX: "[missing tool]" + GH_AW_WORKFLOW_NAME: "ZIPT Code Reviewer" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/missing_tool.cjs'); + await main(); + - name: Handle Agent Failure + id: handle_agent_failure + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "ZIPT Code Reviewer" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_WORKFLOW_ID: "zipt-code-reviewer" + GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }} + GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} + GH_AW_GROUP_REPORTS: "false" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_agent_failure.cjs'); + await main(); + - name: Handle No-Op Message + id: handle_noop_message + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "ZIPT Code Reviewer" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }} + GH_AW_NOOP_REPORT_AS_ISSUE: "true" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_noop_message.cjs'); + await main(); + + safe_outputs: + needs: agent + if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (needs.agent.outputs.detection_success == 'true') + runs-on: ubuntu-slim + permissions: + contents: read + issues: write + timeout-minutes: 15 + env: + GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/${{ github.workflow }}" + GH_AW_ENGINE_ID: "copilot" + GH_AW_WORKFLOW_ID: "zipt-code-reviewer" + GH_AW_WORKFLOW_NAME: "ZIPT Code Reviewer" + outputs: + code_push_failure_count: ${{ steps.process_safe_outputs.outputs.code_push_failure_count }} + code_push_failure_errors: ${{ steps.process_safe_outputs.outputs.code_push_failure_errors }} + create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }} + create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }} + created_issue_number: ${{ steps.process_safe_outputs.outputs.created_issue_number }} + created_issue_url: ${{ steps.process_safe_outputs.outputs.created_issue_url }} + process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} + process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.51.6 + with: + destination: /opt/gh-aw/actions + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8 + with: + name: agent-output + path: /tmp/gh-aw/safeoutputs/ + - name: Setup agent output environment variable + run: | + mkdir -p /tmp/gh-aw/safeoutputs/ + find "/tmp/gh-aw/safeoutputs/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV" + - name: Process Safe Outputs + id: process_safe_outputs + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_ALLOWED_DOMAINS: "*.githubusercontent.com,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,codeload.github.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github-cloud.githubusercontent.com,github-cloud.s3.amazonaws.com,github.com,github.githubassets.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,lfs.github.com,objects.githubusercontent.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com" + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_API_URL: ${{ github.api_url }} + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_issue\":{\"labels\":[\"code-quality\",\"automated\",\"string-solver\"],\"max\":3,\"title_prefix\":\"[zipt-review] \"},\"missing_data\":{},\"missing_tool\":{}}" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/safe_output_handler_manager.cjs'); + await main(); + - name: Upload safe output items manifest + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + with: + name: safe-output-items + path: /tmp/safe-output-items.jsonl + if-no-files-found: warn + + update_cache_memory: + needs: agent + if: always() && needs.agent.outputs.detection_success == 'true' + runs-on: ubuntu-latest + permissions: {} + env: + GH_AW_WORKFLOW_ID_SANITIZED: ziptcodereviewer + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@v0.51.6 + with: + destination: /opt/gh-aw/actions + - name: Download cache-memory artifact (default) + id: download_cache_default + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8 + continue-on-error: true + with: + name: cache-memory + path: /tmp/gh-aw/cache-memory + - name: Check if cache-memory folder has content (default) + id: check_cache_default + shell: bash + run: | + if [ -d "/tmp/gh-aw/cache-memory" ] && [ "$(ls -A /tmp/gh-aw/cache-memory 2>/dev/null)" ]; then + echo "has_content=true" >> "$GITHUB_OUTPUT" + else + echo "has_content=false" >> "$GITHUB_OUTPUT" + fi + - name: Save cache-memory to cache (default) + if: steps.check_cache_default.outputs.has_content == 'true' + uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 + with: + key: memory-${{ env.GH_AW_WORKFLOW_ID_SANITIZED }}-${{ github.run_id }} + path: /tmp/gh-aw/cache-memory + diff --git a/.github/workflows/zipt-code-reviewer.md b/.github/workflows/zipt-code-reviewer.md new file mode 100644 index 000000000..08c44a980 --- /dev/null +++ b/.github/workflows/zipt-code-reviewer.md @@ -0,0 +1,253 @@ +--- +description: Reviews Z3 string/sequence graph implementation (euf_sgraph, euf_seq_plugin, src/smt/seq) by comparing with the ZIPT reference implementation and reporting improvements as git diffs in GitHub issues + +on: + schedule: + - cron: "0 0,6,12,18 * * *" + workflow_dispatch: + +permissions: read-all + +network: + allowed: + - defaults + - github + +tools: + cache-memory: true + github: + toolsets: [default] + view: {} + glob: {} + edit: {} + web-fetch: {} + bash: + - "git diff:*" + - "git log:*" + - "git show:*" + - "git status" + - "clang-format:*" + +safe-outputs: + create-issue: + title-prefix: "[zipt-review] " + labels: [code-quality, automated, string-solver] + max: 3 + missing-tool: + create-issue: true + +timeout-minutes: 30 + +steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + persist-credentials: false + +--- + +# ZIPT Code Reviewer + +You are an expert C++ code reviewer specializing in string constraint solvers and the Z3 theorem prover. Your mission is to compare Z3's string/sequence graph implementation with the reference ZIPT implementation, identify concrete code improvements, and present them as git diffs in a GitHub issue. + +## Current Context + +- **Repository**: ${{ github.repository }} +- **Workspace**: ${{ github.workspace }} +- **ZIPT Reference**: https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT + +## Phase 1: Read Z3 Source Files + +Read each of the following Z3 source files in full: + +### String Graph (euf_sgraph / euf_snode) +- `src/ast/euf/euf_snode.h` +- `src/ast/euf/euf_sgraph.h` +- `src/ast/euf/euf_sgraph.cpp` + +### Sequence Plugin (euf_seq_plugin) +- `src/ast/euf/euf_seq_plugin.h` +- `src/ast/euf/euf_seq_plugin.cpp` + +### SMT Sequence Theory (src/smt/seq*) +Use the glob tool to find all relevant files: +``` +src/smt/seq*.h +src/smt/seq*.cpp +``` +Read each matched file. + +## Phase 2: Fetch ZIPT Reference Implementation + +The ZIPT project (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT) is the reference C# implementation that the Z3 string solver is ported from. Fetch the relevant source files to understand the reference algorithms. + +### Step 2.1: Discover ZIPT File Structure + +Fetch the ZIPT repository tree to understand the structure: + +``` +https://raw.githubusercontent.com/CEisenhofer/ZIPT/parikh/ZIPT/ +``` + +Try fetching these likely ZIPT source directories and files: + +1. Repository root listing: `https://api.github.com/repos/CEisenhofer/ZIPT/git/trees/parikh?recursive=1` +2. Key ZIPT source files (fetch the ones you find relevant from the tree): + - Look for files related to: string graphs, sequence plugins, Nielsen graph, Parikh constraints, polynomial hashing, substitution caching + - The ZIPT project is written in C#; the Z3 implementation is a C++ port + +When fetching files, use the raw content URL pattern: +``` +https://raw.githubusercontent.com/CEisenhofer/ZIPT/parikh/ZIPT/ +``` + +### Step 2.2: Identify Corresponding ZIPT Files + +For each Z3 file you read in Phase 1, identify the ZIPT file(s) that implement the same functionality. Focus on: +- String/sequence graph data structures (snode, sgraph equivalents) +- Concat associativity propagation +- Nullable computation +- Kleene star / regex handling +- Polynomial hash matrix computation +- Substitution caching + +## Phase 3: Analyze and Identify Improvements + +Compare the Z3 C++ implementation against the ZIPT C# reference. For each file pair, look for: + +### 3.1 Algorithmic Improvements +- Missing algorithms or edge cases present in ZIPT but absent from Z3 +- More efficient data structures used in ZIPT +- Better asymptotic complexity in ZIPT for key operations +- Missing optimizations (e.g., short-circuit evaluations, caching strategies) + +### 3.2 Correctness Issues +- Logic discrepancies between Z3 and ZIPT for the same algorithm +- Missing null/empty checks present in ZIPT +- Incorrect handling of edge cases (empty strings, epsilon, absorbing elements) +- Off-by-one errors or boundary condition mistakes + +### 3.3 Code Quality Improvements +- Functions in ZIPT that are cleaner or more modular than the Z3 port +- Missing early-exit conditions +- Redundant computations that ZIPT avoids +- Better naming or structure in ZIPT that could improve Z3 readability + +### 3.4 Missing Features +- ZIPT functionality not yet ported to Z3 +- Incomplete ports where only part of the ZIPT logic was transferred + +## Phase 4: Implement Improvements as Code Changes + +For each improvement identified in Phase 3: + +1. **Assess feasibility**: Only implement improvements that are: + - Self-contained (don't require large architectural changes) + - Verifiable (you can confirm correctness by reading the code) + - Safe (don't change public API signatures) + +2. **Apply the change** using the edit tool to modify the Z3 source file + +3. **Track each change**: Note the file, line range, and rationale + +Focus on at most **5 concrete, high-value improvements** per run to keep changes focused and reviewable. + +## Phase 5: Generate Git Diff + +After applying all changes: + +```bash +# Check what was modified +git status + +# Generate a unified diff of all changes +git diff > /tmp/zipt-improvements.diff + +# Read the diff +cat /tmp/zipt-improvements.diff +``` + +If no changes were made because no improvements were found or all were too risky, exit gracefully: + +``` +✅ ZIPT code review complete. No concrete improvements found in this run. +Files examined: [list files] +ZIPT files compared: [list files] +``` + +## Phase 6: Create GitHub Issue + +If improvements were found and changes were applied, create a GitHub issue using the safe-outputs configuration. + +Structure the issue body as follows: + +```markdown +## ZIPT Code Review: Improvements from Reference Implementation + +**Date**: [today's date] +**Files Reviewed**: [list of Z3 files examined] +**ZIPT Reference**: https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT + +### Summary + +[2-3 sentence summary of what was found and changed] + +### Improvements Applied + +For each improvement: + +#### Improvement N: [Short title] + +**File**: `path/to/z3/file.cpp` +**Rationale**: [Why this improves the code, with reference to the ZIPT equivalent] +**ZIPT Reference**: [URL or file path of the corresponding ZIPT code] + +### Git Diff + +The following diff can be applied with `git apply`: + +```diff +[FULL GIT DIFF OUTPUT HERE] +``` + +To apply: +```bash +git apply - << 'EOF' +[FULL GIT DIFF OUTPUT HERE] +EOF +``` + +### Testing + +After applying this diff, build and test with: +```bash +mkdir -p build && cd build +cmake .. +make -j$(nproc) +make test-z3 +./test-z3 euf_sgraph +./test-z3 euf_seq_plugin +``` + +--- +*Generated by ZIPT Code Reviewer agent — comparing Z3 implementation with CEisenhofer/ZIPT@parikh* +``` + +## Important Guidelines + +### Scope +- **Only** examine the files listed in Phase 1 +- **Only** compare against the ZIPT reference at https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT +- Do **not** modify test files +- Do **not** change public API signatures + +### Quality Bar +- Every change must be demonstrably better than the current code +- Cite the specific ZIPT file and function for each improvement +- Prefer small, surgical changes over large refactors + +### Exit Conditions +Exit without creating an issue if: +- ZIPT repository is unreachable +- No concrete, safe improvements can be identified +- All identified improvements require architectural changes beyond the scope of a single diff From 7c8f2d93bbc5eb870fef95a5b0fbb69d9e38e7e0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 00:07:53 +0000 Subject: [PATCH 7/7] Change seq_plugin m_sg to sgraph& reference, enode_concat_hash takes sgraph& directly Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 19 +++++++------------ src/ast/euf/euf_seq_plugin.h | 10 ++++------ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index 2a711a765..618b1699a 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -47,12 +47,9 @@ namespace euf { } unsigned enode_concat_hash::operator()(enode* n) const { - sgraph* sg = sg_ptr ? *sg_ptr : nullptr; - if (sg) { - snode* sn = sg->find(n->get_expr()); - if (sn && sn->has_cached_hash()) - return sn->assoc_hash(); - } + snode* sn = sg.find(n->get_expr()); + if (sn && sn->has_cached_hash()) + return sn->assoc_hash(); if (!is_any_concat(n, seq)) return n->get_id(); enode_vector leaves; @@ -82,18 +79,16 @@ namespace euf { plugin(g), m_seq(g.get_manager()), m_rewriter(g.get_manager()), - m_sg(sg), + m_sg(sg ? *sg : *alloc(sgraph, g.get_manager(), g, false)), m_sg_owned(sg == nullptr), - m_concat_hash(m_seq, &m_sg), + m_concat_hash(m_seq, m_sg), m_concat_eq(m_seq), m_concat_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_concat_hash, m_concat_eq) { - if (!m_sg) - m_sg = alloc(sgraph, g.get_manager(), g, false); } seq_plugin::~seq_plugin() { - if (m_sg_owned && m_sg) - dealloc(m_sg); + if (m_sg_owned) + dealloc(&m_sg); } void seq_plugin::register_node(enode* n) { diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index b6f2637ba..8f72a6837 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -50,14 +50,12 @@ namespace euf { class sgraph; // Associativity-respecting hash for enode concat trees. - // Flattens concat(concat(a,b),c) and concat(a,concat(b,c)) - // to the same leaf sequence [a,b,c] before hashing. + // Uses cached snode hash matrices from the sgraph for O(1) hashing. // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT). - // When an sgraph is available, uses cached snode hash matrices. struct enode_concat_hash { seq_util const& seq; - sgraph* const* sg_ptr; // pointer to the sgraph* member in seq_plugin - enode_concat_hash(seq_util const& s, sgraph* const* sg = nullptr) : seq(s), sg_ptr(sg) {} + sgraph& sg; + enode_concat_hash(seq_util const& s, sgraph& sg) : seq(s), sg(sg) {} unsigned operator()(enode* n) const; }; @@ -78,7 +76,7 @@ namespace euf { seq_util m_seq; seq_rewriter m_rewriter; - sgraph* m_sg = nullptr; // sgraph (may or may not be owned) + sgraph& m_sg; bool m_sg_owned = false; // whether we own the sgraph svector m_undo;