From debe81ee74ed20495d1ee432d1fedda6ddd9211a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 19:51:18 +0000 Subject: [PATCH 01/16] Initial plan From 2e17bb8767a945bd126fee279821e7a63f8347c5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 20:19:37 +0000 Subject: [PATCH 02/16] Add OP_SEQ_POWER to seq_decl_plugin and create euf_snode/euf_sgraph for string graph encapsulation Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/CMakeLists.txt | 1 + src/ast/euf/euf_sgraph.cpp | 406 ++++++++++++++++++++++++++++++++++++ src/ast/euf/euf_sgraph.h | 95 +++++++++ src/ast/euf/euf_snode.h | 148 +++++++++++++ src/ast/seq_decl_plugin.cpp | 5 + src/ast/seq_decl_plugin.h | 4 + 6 files changed, 659 insertions(+) create mode 100644 src/ast/euf/euf_sgraph.cpp create mode 100644 src/ast/euf/euf_sgraph.h create mode 100644 src/ast/euf/euf_snode.h diff --git a/src/ast/euf/CMakeLists.txt b/src/ast/euf/CMakeLists.txt index 41f073ad1..adacfe5d1 100644 --- a/src/ast/euf/CMakeLists.txt +++ b/src/ast/euf/CMakeLists.txt @@ -9,6 +9,7 @@ z3_add_component(euf euf_justification.cpp euf_mam.cpp euf_plugin.cpp + euf_sgraph.cpp euf_specrel_plugin.cpp ho_matcher.cpp COMPONENT_DEPENDENCIES diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp new file mode 100644 index 000000000..b35b65b1a --- /dev/null +++ b/src/ast/euf/euf_sgraph.cpp @@ -0,0 +1,406 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + euf_sgraph.cpp + +Abstract: + + Sequence/string graph implementation + +Author: + + Nikolaj Bjorner (nbjorner) 2025-03-01 + +--*/ + +#include "ast/euf/euf_sgraph.h" +#include "ast/arith_decl_plugin.h" +#include "ast/ast_pp.h" + +namespace euf { + + sgraph::sgraph(ast_manager& m): + m(m), + m_seq(m), + m_exprs(m) { + } + + sgraph::~sgraph() { + } + + snode_kind sgraph::classify(expr* e) const { + if (!is_app(e)) + return snode_kind::s_other; + + if (m_seq.str.is_empty(e)) + return snode_kind::s_empty; + + if (m_seq.str.is_string(e)) { + zstring s; + if (m_seq.str.is_string(e, s) && s.empty()) + return snode_kind::s_empty; + return snode_kind::s_other; + } + + if (m_seq.str.is_concat(e)) + return snode_kind::s_concat; + + if (m_seq.str.is_unit(e)) { + expr* ch = to_app(e)->get_arg(0); + if (m_seq.is_const_char(ch)) + return snode_kind::s_char; + return snode_kind::s_unit; + } + + if (m_seq.str.is_power(e)) + return snode_kind::s_power; + + if (m_seq.re.is_star(e)) + return snode_kind::s_star; + + if (m_seq.re.is_loop(e)) + return snode_kind::s_loop; + + if (m_seq.re.is_union(e)) + return snode_kind::s_union; + + if (m_seq.re.is_intersection(e)) + return snode_kind::s_intersect; + + if (m_seq.re.is_complement(e)) + return snode_kind::s_complement; + + if (m_seq.re.is_empty(e)) + return snode_kind::s_fail; + + if (m_seq.re.is_full_char(e)) + return snode_kind::s_full_char; + + if (m_seq.re.is_full_seq(e)) + return snode_kind::s_full_seq; + + if (m_seq.re.is_to_re(e)) + return snode_kind::s_to_re; + + if (m_seq.str.is_in_re(e)) + return snode_kind::s_in_re; + + // uninterpreted constants of string sort are variables + if (is_uninterp_const(e) && m_seq.is_seq(e->get_sort())) + return snode_kind::s_var; + + return snode_kind::s_other; + } + + void sgraph::compute_metadata(snode* n) { + switch (n->m_kind) { + case snode_kind::s_empty: + n->m_ground = true; + n->m_regex_free = true; + n->m_nullable = true; + n->m_level = 0; + n->m_length = 0; + break; + + case snode_kind::s_char: + n->m_ground = true; + n->m_regex_free = true; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_var: + n->m_ground = false; + n->m_regex_free = true; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_unit: + n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true; + n->m_regex_free = true; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_concat: { + SASSERT(n->num_args() == 2); + snode* l = n->arg(0); + snode* r = n->arg(1); + n->m_ground = l->is_ground() && r->is_ground(); + n->m_regex_free = l->is_regex_free() && r->is_regex_free(); + n->m_nullable = l->is_nullable() && r->is_nullable(); + n->m_level = std::max(l->level(), r->level()) + 1; + n->m_length = l->length() + r->length(); + ++m_stats.m_num_concat; + break; + } + + case snode_kind::s_power: { + SASSERT(n->num_args() >= 1); + snode* base = n->arg(0); + n->m_ground = base->is_ground(); + n->m_regex_free = base->is_regex_free(); + n->m_nullable = base->is_nullable(); + n->m_level = 1; + n->m_length = 1; + ++m_stats.m_num_power; + break; + } + + case snode_kind::s_star: + SASSERT(n->num_args() == 1); + n->m_ground = n->arg(0)->is_ground(); + n->m_regex_free = false; + n->m_nullable = true; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_loop: + n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true; + n->m_regex_free = false; + n->m_nullable = false; // depends on lower bound + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_union: + SASSERT(n->num_args() == 2); + n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground(); + n->m_regex_free = false; + n->m_nullable = n->arg(0)->is_nullable() || n->arg(1)->is_nullable(); + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_intersect: + SASSERT(n->num_args() == 2); + n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground(); + n->m_regex_free = false; + n->m_nullable = n->arg(0)->is_nullable() && n->arg(1)->is_nullable(); + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_complement: + SASSERT(n->num_args() == 1); + n->m_ground = n->arg(0)->is_ground(); + n->m_regex_free = false; + n->m_nullable = !n->arg(0)->is_nullable(); + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_fail: + n->m_ground = true; + n->m_regex_free = false; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_full_char: + n->m_ground = true; + n->m_regex_free = false; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_full_seq: + n->m_ground = true; + n->m_regex_free = false; + n->m_nullable = true; + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_to_re: + SASSERT(n->num_args() == 1); + n->m_ground = n->arg(0)->is_ground(); + n->m_regex_free = false; + n->m_nullable = n->arg(0)->is_nullable(); + n->m_level = 1; + n->m_length = 1; + break; + + case snode_kind::s_in_re: + SASSERT(n->num_args() == 2); + n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground(); + n->m_regex_free = false; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + + default: + n->m_ground = true; + n->m_regex_free = true; + n->m_nullable = false; + n->m_level = 1; + n->m_length = 1; + break; + } + } + + snode* sgraph::mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args) { + unsigned id = m_nodes.size(); + snode* n = snode::mk(m_region, e, k, id, num_args, args); + compute_metadata(n); + m_nodes.push_back(n); + m_exprs.push_back(e); + if (e) { + unsigned eid = e->get_id(); + m_expr2snode.reserve(eid + 1, nullptr); + m_expr2snode[eid] = n; + } + ++m_stats.m_num_nodes; + return n; + } + + snode* sgraph::mk(expr* e) { + SASSERT(e); + snode* n = find(e); + if (n) + return n; + + snode_kind k = classify(e); + + if (!is_app(e)) + return mk_snode(e, k, 0, nullptr); + + app* a = to_app(e); + unsigned arity = a->get_num_args(); + + // recursively register children that are sequences or regexes + snode_vector child_nodes; + for (unsigned i = 0; i < arity; ++i) { + expr* ch = a->get_arg(i); + if (m_seq.is_seq(ch) || m_seq.is_re(ch)) { + snode* cn = mk(ch); + child_nodes.push_back(cn); + } + } + + return mk_snode(e, k, child_nodes.size(), child_nodes.data()); + } + + snode* sgraph::find(expr* e) const { + if (!e) + return nullptr; + unsigned eid = e->get_id(); + if (eid < m_expr2snode.size()) + return m_expr2snode[eid]; + return nullptr; + } + + snode* sgraph::mk_empty(sort* s) { + expr_ref e(m_seq.str.mk_empty(s), m); + return mk(e); + } + + snode* sgraph::mk_concat(snode* a, snode* b) { + SASSERT(a && b); + if (a->is_empty()) return b; + if (b->is_empty()) return a; + expr_ref e(m_seq.str.mk_concat(a->get_expr(), b->get_expr()), m); + snode* n = find(e); + if (n) return n; + snode* args[2] = { a, b }; + return mk_snode(e, snode_kind::s_concat, 2, args); + } + + snode* sgraph::mk_power(snode* base, snode* exp) { + SASSERT(base && exp); + expr_ref e(m_seq.str.mk_power(base->get_expr(), exp->get_expr()), m); + snode* n = find(e); + if (n) return n; + snode* args[2] = { base, exp }; + return mk_snode(e, snode_kind::s_power, 2, args); + } + + void sgraph::push() { + m_scopes.push_back(m_nodes.size()); + ++m_num_scopes; + } + + void sgraph::pop(unsigned num_scopes) { + if (num_scopes == 0) + return; + SASSERT(num_scopes <= m_num_scopes); + unsigned new_lvl = m_num_scopes - num_scopes; + unsigned old_sz = m_scopes[new_lvl]; + for (unsigned i = m_nodes.size(); i-- > old_sz; ) { + snode* n = m_nodes[i]; + if (n->get_expr()) { + unsigned eid = n->get_expr()->get_id(); + if (eid < m_expr2snode.size()) + m_expr2snode[eid] = nullptr; + } + } + m_nodes.shrink(old_sz); + m_exprs.shrink(old_sz); + m_scopes.shrink(new_lvl); + m_num_scopes = new_lvl; + } + + std::ostream& sgraph::display(std::ostream& out) const { + auto kind_str = [](snode_kind k) -> char const* { + switch (k) { + case snode_kind::s_empty: return "empty"; + case snode_kind::s_char: return "char"; + case snode_kind::s_var: return "var"; + case snode_kind::s_unit: return "unit"; + case snode_kind::s_concat: return "concat"; + case snode_kind::s_power: return "power"; + case snode_kind::s_star: return "star"; + case snode_kind::s_loop: return "loop"; + case snode_kind::s_union: return "union"; + case snode_kind::s_intersect: return "intersect"; + case snode_kind::s_complement: return "complement"; + case snode_kind::s_fail: return "fail"; + case snode_kind::s_full_char: return "full_char"; + case snode_kind::s_full_seq: return "full_seq"; + case snode_kind::s_to_re: return "to_re"; + case snode_kind::s_in_re: return "in_re"; + case snode_kind::s_other: return "other"; + } + return "?"; + }; + for (snode* n : m_nodes) { + out << "snode[" << n->id() << "] " + << kind_str(n->kind()) + << " level=" << n->level() + << " len=" << n->length() + << " ground=" << n->is_ground() + << " rfree=" << n->is_regex_free() + << " nullable=" << n->is_nullable(); + if (n->num_args() > 0) { + out << " args=("; + for (unsigned i = 0; i < n->num_args(); ++i) { + if (i > 0) out << ", "; + out << n->arg(i)->id(); + } + out << ")"; + } + if (n->get_expr()) + out << " expr=" << mk_pp(n->get_expr(), m); + out << "\n"; + } + return out; + } + + void sgraph::collect_statistics(statistics& st) const { + st.update("seq-graph-nodes", m_stats.m_num_nodes); + st.update("seq-graph-concat", m_stats.m_num_concat); + st.update("seq-graph-power", m_stats.m_num_power); + } + +} + diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h new file mode 100644 index 000000000..8adacf5f0 --- /dev/null +++ b/src/ast/euf/euf_sgraph.h @@ -0,0 +1,95 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + euf_sgraph.h + +Abstract: + + Sequence/string graph layer + + Encapsulates string expressions in the style of euf_egraph.h. + The sgraph registers sequence expressions and classifies them + into a ZIPT-style representation of string tokens organized + as binary concatenation trees. + + This provides a layer that maps Z3 AST expressions (from seq_decl_plugin) + to snode structures with metadata for ground, regex-free, nullable, etc. + +Author: + + Nikolaj Bjorner (nbjorner) 2025-03-01 + +--*/ + +#pragma once + +#include "util/region.h" +#include "util/statistics.h" +#include "ast/ast.h" +#include "ast/seq_decl_plugin.h" +#include "ast/euf/euf_snode.h" + +namespace euf { + + class sgraph { + + struct stats { + unsigned m_num_nodes; + unsigned m_num_concat; + unsigned m_num_power; + stats() { reset(); } + void reset() { memset(this, 0, sizeof(*this)); } + }; + + ast_manager& m; + seq_util m_seq; + region m_region; + snode_vector m_nodes; + expr_ref_vector m_exprs; // pin expressions + unsigned_vector m_scopes; + unsigned m_num_scopes = 0; + stats m_stats; + + // maps expression id to snode + ptr_vector m_expr2snode; + + snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); + snode_kind classify(expr* e) const; + void compute_metadata(snode* n); + + public: + sgraph(ast_manager& m); + ~sgraph(); + + ast_manager& get_manager() const { return m; } + seq_util& get_seq_util() { return m_seq; } + + // register an expression and return its snode + snode* mk(expr* e); + + // lookup an already-registered expression + snode* find(expr* e) const; + + // build compound snodes + snode* mk_empty(sort* s); + snode* mk_concat(snode* a, snode* b); + snode* mk_power(snode* base, snode* exp); + + // scope management for backtracking + void push(); + void pop(unsigned num_scopes); + + // access + snode_vector const& nodes() const { return m_nodes; } + unsigned num_nodes() const { return m_nodes.size(); } + + // display + std::ostream& display(std::ostream& out) const; + + void collect_statistics(statistics& st) const; + }; + +} + diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h new file mode 100644 index 000000000..d87a4382d --- /dev/null +++ b/src/ast/euf/euf_snode.h @@ -0,0 +1,148 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + euf_snode.h + +Abstract: + + snode layer for sequence/string graph + + Encapsulates strings in the style of euf_enode.h. + Maps Z3 sequence expressions to a ZIPT-style representation where + strings are composed of tokens (characters, variables, powers, regex, etc.) + organized as a binary tree of concatenations. + +Author: + + Nikolaj Bjorner (nbjorner) 2025-03-01 + +--*/ + +#pragma once + +#include "util/vector.h" +#include "util/region.h" +#include "ast/ast.h" +#include "ast/seq_decl_plugin.h" + +namespace euf { + + class sgraph; + class snode; + + typedef ptr_vector snode_vector; + + enum class snode_kind { + s_empty, // empty string (OP_SEQ_EMPTY or empty string constant) + s_char, // concrete character unit (OP_SEQ_UNIT wrapping a char literal) + s_var, // string variable (uninterpreted constant of string sort) + s_unit, // generic unit (OP_SEQ_UNIT with non-literal character) + s_concat, // concatenation of two snodes (OP_SEQ_CONCAT) + s_power, // string exponentiation s^n (OP_SEQ_POWER) + s_star, // Kleene star r* (OP_RE_STAR) + s_loop, // bounded loop r{lo,hi} (OP_RE_LOOP) + s_union, // union r1|r2 (OP_RE_UNION) + s_intersect, // intersection r1&r2 (OP_RE_INTERSECT) + s_complement, // complement ~r (OP_RE_COMPLEMENT) + s_fail, // empty language (OP_RE_EMPTY_SET) + s_full_char, // full character set (OP_RE_FULL_CHAR_SET) + s_full_seq, // full sequence set r=.* (OP_RE_FULL_SEQ_SET) + s_to_re, // string to regex (OP_SEQ_TO_RE) + s_in_re, // regex membership (OP_SEQ_IN_RE) + s_other, // other sequence expression not directly classified + }; + + class snode { + expr* m_expr = nullptr; + snode_kind m_kind = snode_kind::s_other; + unsigned m_id = UINT_MAX; + unsigned m_num_args = 0; + + // metadata flags, analogous to ZIPT's Str/StrToken properties + bool m_ground = true; // no uninterpreted string variables + bool m_regex_free = true; // no regex constructs + bool m_nullable = false; // accepts the empty string + unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons) + unsigned m_length = 0; // token count, number of leaf tokens in the tree + + snode* m_args[0]; + + friend class sgraph; + + static unsigned get_snode_size(unsigned num_args) { + return sizeof(snode) + num_args * sizeof(snode*); + } + + static snode* mk(region& r, expr* e, snode_kind k, unsigned id, unsigned num_args, snode* const* args) { + void* mem = r.allocate(get_snode_size(num_args)); + snode* n = new (mem) snode(); + n->m_expr = e; + n->m_kind = k; + n->m_id = id; + n->m_num_args = num_args; + for (unsigned i = 0; i < num_args; ++i) + n->m_args[i] = args[i]; + return n; + } + + public: + expr* get_expr() const { return m_expr; } + snode_kind kind() const { return m_kind; } + unsigned id() const { return m_id; } + unsigned num_args() const { return m_num_args; } + snode* arg(unsigned i) const { SASSERT(i < m_num_args); return m_args[i]; } + + bool is_ground() const { return m_ground; } + bool is_regex_free() const { return m_regex_free; } + bool is_nullable() const { return m_nullable; } + unsigned level() const { return m_level; } + unsigned length() const { return m_length; } + + bool is_empty() const { return m_kind == snode_kind::s_empty; } + bool is_char() const { return m_kind == snode_kind::s_char; } + bool is_var() const { return m_kind == snode_kind::s_var; } + bool is_unit() const { return m_kind == snode_kind::s_unit; } + bool is_concat() const { return m_kind == snode_kind::s_concat; } + bool is_power() const { return m_kind == snode_kind::s_power; } + bool is_star() const { return m_kind == snode_kind::s_star; } + bool is_loop() const { return m_kind == snode_kind::s_loop; } + bool is_union() const { return m_kind == snode_kind::s_union; } + bool is_intersect() const { return m_kind == snode_kind::s_intersect; } + bool is_complement() const { return m_kind == snode_kind::s_complement; } + bool is_fail() const { return m_kind == snode_kind::s_fail; } + bool is_full_char() const { return m_kind == snode_kind::s_full_char; } + bool is_full_seq() const { return m_kind == snode_kind::s_full_seq; } + bool is_to_re() const { return m_kind == snode_kind::s_to_re; } + bool is_in_re() const { return m_kind == snode_kind::s_in_re; } + + // is this a leaf token (analogous to ZIPT's StrToken as opposed to Str) + bool is_token() const { + switch (m_kind) { + case snode_kind::s_empty: + case snode_kind::s_concat: + return false; + default: + return true; + } + } + + // analogous to ZIPT's Str.First / Str.Last + snode* first() const { + snode const* s = this; + while (s->is_concat()) + s = s->arg(0); + return const_cast(s); + } + + snode* last() const { + snode const* s = this; + while (s->is_concat()) + s = s->arg(1); + return const_cast(s); + } + }; + +} + diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 634ced5c9..c20fe2175 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -222,6 +222,7 @@ void seq_decl_plugin::init() { m_sigs[OP_SEQ_NTH_I] = alloc(psig, m, "seq.nth_i", 1, 2, seqAintT, A); m_sigs[OP_SEQ_NTH_U] = alloc(psig, m, "seq.nth_u", 1, 2, seqAintT, A); m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq.len", 1, 1, &seqA, intT); + m_sigs[OP_SEQ_POWER] = alloc(psig, m, "seq.power", 1, 2, seqAintT, seqA); m_sigs[OP_RE_PLUS] = alloc(psig, m, "re.+", 1, 1, &reA, reA); m_sigs[OP_RE_STAR] = alloc(psig, m, "re.*", 1, 1, &reA, reA); m_sigs[OP_RE_OPTION] = alloc(psig, m, "re.opt", 1, 1, &reA, reA); @@ -591,6 +592,10 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p add_map_sig(); return mk_str_fun(k, arity, domain, range, k); + case OP_SEQ_POWER: + match(*m_sigs[k], arity, domain, range, rng); + return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k)); + case OP_SEQ_TO_RE: m_has_re = true; return mk_seq_fun(k, arity, domain, range, _OP_STRING_TO_REGEXP); diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 78ad6d544..73eaa9cdd 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -59,6 +59,7 @@ enum seq_op_kind { OP_SEQ_MAPI, // Array[Int,A,B] -> Int -> Seq[A] -> Seq[B] OP_SEQ_FOLDL, // Array[B,A,B] -> B -> Seq[A] -> B OP_SEQ_FOLDLI, // Array[Int,B,A,B] -> Int -> B -> Seq[A] -> B + OP_SEQ_POWER, // Seq -> Int -> Seq, string exponentiation s^n OP_RE_PLUS, OP_RE_STAR, @@ -307,6 +308,7 @@ public: app* mk_mapi(expr* f, expr* i, expr* s) const { expr* es[3] = { f, i, s }; return m.mk_app(m_fid, OP_SEQ_MAPI, 3, es); } app* mk_foldl(expr* f, expr* b, expr* s) const { expr* es[3] = { f, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDL, 3, es); } app* mk_foldli(expr* f, expr* i, expr* b, expr* s) const { expr* es[4] = { f, i, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDLI, 4, es); } + app* mk_power(expr* s, expr* n) const { expr* es[2] = { s, n }; return m.mk_app(m_fid, OP_SEQ_POWER, 2, es); } app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); } app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); } @@ -348,6 +350,7 @@ public: bool is_mapi(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_MAPI); } bool is_foldl(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_FOLDL); } bool is_foldli(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_FOLDLI); } + bool is_power(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_POWER); } bool is_extract(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_EXTRACT); } bool is_contains(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONTAINS); } bool is_at(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_AT); } @@ -404,6 +407,7 @@ public: MATCH_TERNARY(is_mapi); MATCH_TERNARY(is_foldl); MATCH_QUATARY(is_foldli); + MATCH_BINARY(is_power); MATCH_BINARY(is_last_index); MATCH_TERNARY(is_replace); MATCH_TERNARY(is_replace_re); From e4f9a517e4407fbe5fd6e56fbd3e839d9b3b4bae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 20:25:18 +0000 Subject: [PATCH 03/16] Address code review: fix const-correctness, improve child registration, add comments Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_sgraph.cpp | 12 +++++++----- src/ast/euf/euf_snode.h | 24 +++++++++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index b35b65b1a..919bb0e0d 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -142,6 +142,8 @@ namespace euf { } case snode_kind::s_power: { + // s^n: nullable follows base, consistent with ZIPT's PowerToken + // the exponent n is assumed to be a symbolic integer, may or may not be zero SASSERT(n->num_args() >= 1); snode* base = n->arg(0); n->m_ground = base->is_ground(); @@ -278,14 +280,14 @@ namespace euf { app* a = to_app(e); unsigned arity = a->get_num_args(); - // recursively register children that are sequences or regexes + // recursively register children + // for seq/re children, create classified snodes + // for other children (e.g. integer exponents), create s_other snodes snode_vector child_nodes; for (unsigned i = 0; i < arity; ++i) { expr* ch = a->get_arg(i); - if (m_seq.is_seq(ch) || m_seq.is_re(ch)) { - snode* cn = mk(ch); - child_nodes.push_back(cn); - } + snode* cn = mk(ch); + child_nodes.push_back(cn); } return mk_snode(e, k, child_nodes.size(), child_nodes.data()); diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index d87a4382d..149ebbdcf 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -67,7 +67,7 @@ namespace euf { unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons) unsigned m_length = 0; // token count, number of leaf tokens in the tree - snode* m_args[0]; + snode* m_args[0]; // variable-length array, allocated via get_snode_size(num_args) friend class sgraph; @@ -129,18 +129,32 @@ namespace euf { } // analogous to ZIPT's Str.First / Str.Last - snode* first() const { + snode const* first() const { snode const* s = this; while (s->is_concat()) s = s->arg(0); - return const_cast(s); + return s; } - snode* last() const { + snode const* last() const { snode const* s = this; while (s->is_concat()) s = s->arg(1); - return const_cast(s); + return s; + } + + snode* first() { + snode* s = this; + while (s->is_concat()) + s = s->arg(0); + return s; + } + + snode* last() { + snode* s = this; + while (s->is_concat()) + s = s->arg(1); + return s; } }; From 17099300f281d84e00344ced5f0e31d5bdee11f0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:14:52 +0000 Subject: [PATCH 04/16] Add co-author Clemens Eisenhofer to headers, document ZIPT features not yet ported Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_sgraph.cpp | 1 + src/ast/euf/euf_sgraph.h | 42 ++++++++++++++++++++++++++++++++++++++ src/ast/euf/euf_snode.h | 1 + 3 files changed, 44 insertions(+) diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index 919bb0e0d..b59f2e5ab 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -12,6 +12,7 @@ Abstract: Author: Nikolaj Bjorner (nbjorner) 2025-03-01 + Clemens Eisenhofer 2025-03-01 --*/ diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 8adacf5f0..33a2d0b32 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -17,9 +17,51 @@ Abstract: This provides a layer that maps Z3 AST expressions (from seq_decl_plugin) to snode structures with metadata for ground, regex-free, nullable, etc. + ZIPT features not yet ported to snode/sgraph: + + -- Str operations (from StrManager): + Normalisation: union-find style representative tracking (Normalised/IsNormalised) + with cache migration (MoveCache) for equal strings with different tree structures. + Balanced tree maintenance: DegenerationLevel tracking with rebalancing + (Balanced/BalancedTrans properties on TupleStr). + Concat simplification: merging adjacent Kleene stars (u.v* + v*w = u.v*w), + merging adjacent loops (v{l1,h1} + v{l2,h2} = v{l1+l2,h1+h2}), + absorbing nullable tokens next to .* (u.* + vw = u.*w when v nullable). + Drop operations: DropLeft/DropRight for removing prefix/suffix tokens, + with caching (DropLeftCache/DropRightCache on TupleStr). + Substitution: Subst(var, replacement) and SubstChar operations with caching. + Indexed access: GetIndex/GetIndexFwd/GetIndexBwd for token-level random access. + Forward/reverse iteration: GetEnumerator/GetRevEnumerator over leaf tokens. + ToList with caching: flattened token array with ListCache on TupleStr. + Simplification: OptSimplify that unfolds constant powers and simplifies tokens. + Derivative computation: Derivative(CharacterSet, fwd) for regex derivative construction. + Equality: structural equality with associative hashing (TriangleMatrix-based rolling hash). + Rotation equality: RotationEquals for detecting cyclic permutations. + Expression reconstruction: ToExpr for converting back to Z3 AST. + Graphviz export: ToDot for debugging visualisation. + + -- StrToken subclasses not yet mapped: + SymCharToken: symbolic character variables (for regex unwinding). + StrAtToken: str.at(s, i) as a named token with parent tracking. + SubStrToken: str.substr(s, from, len) as a named token. + SetToken: character set ranges (used for regex character classes). + PostToken/PreToken: auxiliary regex position markers. + + -- StrToken features: + GetDecomposition: Nielsen-style prefix/postfix decomposition with + side constraints (IntConstraint) and variable substitutions (Subst). + NamedStrToken parent/child extension tracking (Extension1/Extension2) + for variable splitting (x = x'x'') with PowerExtension for x / u^n u'. + CollectSymbols: gathering non-terminals and alphabet for Parikh analysis. + MinTerms: FirstMinTerms/LastMinTerms for character class analysis. + Token ordering: StrTokenOrder for deterministic comparison. + Derivable flag: tracking which tokens support regex derivatives. + BasicRegex flag: distinguishing basic vs extended regex constructs. + Author: Nikolaj Bjorner (nbjorner) 2025-03-01 + Clemens Eisenhofer 2025-03-01 --*/ diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index 149ebbdcf..56c6c2068 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -17,6 +17,7 @@ Abstract: Author: Nikolaj Bjorner (nbjorner) 2025-03-01 + Clemens Eisenhofer 2025-03-01 --*/ From 51f4f918eabfa87cd302666c7c36c1e1acfaab6e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:17:08 +0000 Subject: [PATCH 05/16] Update file header years from 2025 to 2026 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_sgraph.cpp | 6 +++--- src/ast/euf/euf_sgraph.h | 6 +++--- src/ast/euf/euf_snode.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index b59f2e5ab..bbed15136 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -1,5 +1,5 @@ /*++ -Copyright (c) 2025 Microsoft Corporation +Copyright (c) 2026 Microsoft Corporation Module Name: @@ -11,8 +11,8 @@ Abstract: Author: - Nikolaj Bjorner (nbjorner) 2025-03-01 - Clemens Eisenhofer 2025-03-01 + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 --*/ diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 33a2d0b32..3acbaae10 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -1,5 +1,5 @@ /*++ -Copyright (c) 2025 Microsoft Corporation +Copyright (c) 2026 Microsoft Corporation Module Name: @@ -60,8 +60,8 @@ Abstract: Author: - Nikolaj Bjorner (nbjorner) 2025-03-01 - Clemens Eisenhofer 2025-03-01 + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 --*/ diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index 56c6c2068..40ce8f4f8 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -1,5 +1,5 @@ /*++ -Copyright (c) 2025 Microsoft Corporation +Copyright (c) 2026 Microsoft Corporation Module Name: @@ -16,8 +16,8 @@ Abstract: Author: - Nikolaj Bjorner (nbjorner) 2025-03-01 - Clemens Eisenhofer 2025-03-01 + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 --*/ From 78a4e3001b1787853fe0ba111df7bff23e3c1538 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:50:06 +0000 Subject: [PATCH 06/16] Add euf_seq_plugin: string plugin for egraph with concat associativity and ZIPT simplifications Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/CMakeLists.txt | 1 + src/ast/euf/euf_seq_plugin.cpp | 259 +++++++++++++++++++++++++++++++++ src/ast/euf/euf_seq_plugin.h | 127 ++++++++++++++++ 3 files changed, 387 insertions(+) create mode 100644 src/ast/euf/euf_seq_plugin.cpp create mode 100644 src/ast/euf/euf_seq_plugin.h diff --git a/src/ast/euf/CMakeLists.txt b/src/ast/euf/CMakeLists.txt index adacfe5d1..edd99487f 100644 --- a/src/ast/euf/CMakeLists.txt +++ b/src/ast/euf/CMakeLists.txt @@ -9,6 +9,7 @@ z3_add_component(euf euf_justification.cpp euf_mam.cpp euf_plugin.cpp + euf_seq_plugin.cpp euf_sgraph.cpp euf_specrel_plugin.cpp ho_matcher.cpp diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp new file mode 100644 index 000000000..531591336 --- /dev/null +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -0,0 +1,259 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_seq_plugin.cpp + +Abstract: + + Plugin structure for sequences/strings. + + Merges equivalence classes taking into account associativity + of concatenation and algebraic properties of strings and + regular expressions. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 + +--*/ + +#include "ast/euf/euf_seq_plugin.h" +#include "ast/euf/euf_egraph.h" +#include "ast/ast_pp.h" + +namespace euf { + + seq_plugin::seq_plugin(egraph& g): + plugin(g), + m_seq(g.get_manager()), + m_sg(g.get_manager()) { + } + + void seq_plugin::register_node(enode* n) { + m_queue.push_back(n); + push_undo(undo_kind::undo_add_concat); + } + + void seq_plugin::merge_eh(enode* n1, enode* n2) { + m_queue.push_back(enode_pair(n1, n2)); + push_undo(undo_kind::undo_add_concat); + } + + void seq_plugin::push_undo(undo_kind k) { + m_undo.push_back(k); + push_plugin_undo(get_id()); + } + + void seq_plugin::propagate() { + if (m_qhead == m_queue.size()) + return; + // save qhead for undo + unsigned old_qhead = m_qhead; + for (; m_qhead < m_queue.size(); ++m_qhead) { + if (g.inconsistent()) + break; + if (std::holds_alternative(m_queue[m_qhead])) { + auto n = *std::get_if(&m_queue[m_qhead]); + propagate_register_node(n); + } + else { + auto [a, b] = *std::get_if(&m_queue[m_qhead]); + propagate_merge(a, b); + } + } + (void)old_qhead; + } + + void seq_plugin::propagate_register_node(enode* n) { + if (!m_seq.is_seq(n->get_expr()) && !m_seq.is_re(n->get_expr())) + return; + + TRACE(seq, tout << "seq register " << g.bpp(n) << "\n"); + + // register in sgraph + m_sg.mk(n->get_expr()); + + if (is_concat(n)) { + m_concats.push_back(n); + propagate_assoc(n); + propagate_simplify(n); + } + + // n-ary concat: concat(a, b, c) => concat(a, concat(b, c)) + if (is_concat(n) && n->num_args() > 2) { + auto last = n->get_arg(n->num_args() - 1); + for (unsigned i = n->num_args() - 1; i-- > 0; ) + last = mk_concat(n->get_arg(i), last); + push_merge(last, n); + } + + // empty concat: concat(a, empty) = a, concat(empty, b) = b + enode* a, *b; + if (is_concat(n, a, b)) { + if (is_empty(a)) + push_merge(n, b); + else if (is_empty(b)) + push_merge(n, a); + } + } + + void seq_plugin::propagate_merge(enode* a, enode* b) { + if (!m_seq.is_seq(a->get_expr()) && !m_seq.is_re(a->get_expr())) + return; + + TRACE(seq, tout << "seq merge " << g.bpp(a) << " == " << g.bpp(b) << "\n"); + + // when equivalence classes merge, re-check concat simplifications + for (enode* n : enode_class(a)) { + if (is_concat(n)) + propagate_simplify(n); + } + } + + // + // Concat associativity: + // concat(concat(a, b), c) = concat(a, concat(b, c)) + // + // This normalizes to right-associated form. + // + void seq_plugin::propagate_assoc(enode* n) { + enode* a, *b; + if (!is_concat(n, a, b)) + return; + + enode* a1, *a2; + if (is_concat(a, a1, a2)) { + // concat(concat(a1, a2), b) => concat(a1, concat(a2, b)) + enode* inner = mk_concat(a2, b); + enode* outer = mk_concat(a1, inner); + push_merge(outer, n); + } + } + + // + // Concat simplification rules from ZIPT: + // + // 1. Kleene star merging: concat(u, v*, v*, w) = concat(u, v*, w) + // when adjacent children in a concat chain have congruent star bodies. + // + // 2. Nullable absorption: concat(u, .*, v, w) = concat(u, .*, w) + // when v is nullable and adjacent to full_seq (.*). + // + void seq_plugin::propagate_simplify(enode* n) { + enode* a, *b; + if (!is_concat(n, a, b)) + return; + + // Rule 1: Kleene star merging + // concat(v*, v*) = v* + if (same_star_body(a, b)) + push_merge(n, a); + + // Rule 1 extended: concat(v*, concat(v*, c)) = concat(v*, c) + enode* b1, *b2; + if (is_concat(b, b1, b2) && same_star_body(a, b1)) + push_merge(n, b); + + // Rule 2: Nullable absorption by .* + // concat(.*, v) = .* when v is nullable + if (is_full_seq(a) && is_nullable(b)) + push_merge(n, a); + + // concat(v, .*) = .* when v is nullable + if (is_nullable(a) && is_full_seq(b)) + push_merge(n, b); + + // concat(.*, concat(v, w)) = concat(.*, w) when v nullable + if (is_full_seq(a) && is_concat(b, b1, b2) && is_nullable(b1)) { + enode* simplified = mk_concat(a, b2); + push_merge(n, simplified); + } + + // concat(concat(u, v), .*) = concat(u, .*) when v nullable + enode* a1, *a2; + if (is_concat(a, a1, a2) && is_nullable(a2) && is_full_seq(b)) { + enode* simplified = mk_concat(a1, b); + push_merge(n, simplified); + } + } + + bool seq_plugin::is_nullable(enode* n) { + snode* s = m_sg.find(n->get_expr()); + if (s) + return s->is_nullable(); + // empty string is nullable + if (is_empty(n)) + return true; + return false; + } + + bool seq_plugin::same_star_body(enode* a, enode* b) { + if (!is_star(a) || !is_star(b)) + return false; + // re.star(x) and re.star(y) have congruent bodies if x ~ y + return a->get_arg(0)->get_root() == b->get_arg(0)->get_root(); + } + + bool seq_plugin::same_loop_body(enode* a, enode* b, + unsigned& lo1, unsigned& hi1, + unsigned& lo2, unsigned& hi2) { + if (!is_loop(a) || !is_loop(b)) + return false; + expr* body_a, *body_b; + if (!m_seq.re.is_loop(a->get_expr(), body_a, lo1, hi1)) + return false; + if (!m_seq.re.is_loop(b->get_expr(), body_b, lo2, hi2)) + return false; + enode* na = g.find(body_a); + enode* nb = g.find(body_b); + if (!na || !nb) + return false; + return na->get_root() == nb->get_root(); + } + + enode* seq_plugin::mk_concat(enode* a, enode* b) { + expr* e = m_seq.str.mk_concat(a->get_expr(), b->get_expr()); + enode* args[2] = { a, b }; + return mk(e, 2, args); + } + + enode* seq_plugin::mk_empty(sort* s) { + expr* e = m_seq.str.mk_empty(s); + return mk(e, 0, nullptr); + } + + void seq_plugin::undo() { + auto k = m_undo.back(); + m_undo.pop_back(); + switch (k) { + case undo_kind::undo_add_concat: + SASSERT(!m_queue.empty()); + m_queue.pop_back(); + if (m_qhead > m_queue.size()) + m_qhead = m_queue.size(); + break; + case undo_kind::undo_push_scope: + m_sg.pop(1); + break; + } + } + + void seq_plugin::push_scope_eh() { + m_sg.push(); + m_undo.push_back(undo_kind::undo_push_scope); + push_plugin_undo(get_id()); + } + + std::ostream& seq_plugin::display(std::ostream& out) const { + out << "seq\n"; + m_sg.display(out); + return out; + } + + void seq_plugin::collect_statistics(statistics& st) const { + m_sg.collect_statistics(st); + } +} diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h new file mode 100644 index 000000000..90c55c39e --- /dev/null +++ b/src/ast/euf/euf_seq_plugin.h @@ -0,0 +1,127 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_seq_plugin.h + +Abstract: + + Plugin structure for sequences/strings. + + Merges equivalence classes taking into account associativity + of concatenation and algebraic properties of strings and + regular expressions. Implements features from ZIPT: + + -- Concat associativity: str.++ is associative, so + concat(a, concat(b, c)) = concat(concat(a, b), c). + Handled via an AC-style plugin for the concat operator. + + -- Kleene star merging: adjacent identical Kleene stars + in a concatenation are collapsed, u.v*.v*.w = u.v*.w + + -- Loop merging: adjacent loops over the same body are + merged, v{l1,h1}.v{l2,h2} = v{l1+l2,h1+h2} + + -- Nullable absorption: a nullable token adjacent to .* + is absorbed, u.*.v.w = u.*.w when v is nullable. + + The plugin integrates with euf_egraph for congruence closure + and with sgraph for snode-based string representation. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 + +--*/ + +#pragma once + +#include "ast/seq_decl_plugin.h" +#include "ast/euf/euf_plugin.h" +#include "ast/euf/euf_sgraph.h" + +namespace euf { + + class egraph; + + class seq_plugin : public plugin { + + enum class undo_kind { + undo_add_concat, + undo_push_scope + }; + + seq_util m_seq; + sgraph m_sg; + svector m_undo; + + // queue of merges and registrations to process + vector> m_queue; + unsigned m_qhead = 0; + + // track registered concat nodes for simplification + enode_vector m_concats; + + bool is_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } + bool is_concat(enode* n, enode*& a, enode*& b) { + return is_concat(n) && n->num_args() == 2 && + (a = n->get_arg(0), b = n->get_arg(1), true); + } + bool is_star(enode* n) const { return m_seq.re.is_star(n->get_expr()); } + bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); } + bool is_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); } + bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); } + bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); } + + enode* mk_concat(enode* a, enode* b); + enode* mk_empty(sort* s); + + void push_undo(undo_kind k); + + void propagate_register_node(enode* n); + void propagate_merge(enode* a, enode* b); + + // concat associativity: ensure right-associated normal form + // concat(concat(a, b), c) = concat(a, concat(b, c)) + void propagate_assoc(enode* n); + + // concat simplification: + // merging Kleene stars, merging loops, absorbing nullables + void propagate_simplify(enode* n); + + // check if snode is nullable via sgraph metadata + bool is_nullable(enode* n); + + // check if two enodes have congruent star bodies + bool same_star_body(enode* a, enode* b); + + // check if two enodes have congruent loop bodies and extract bounds + bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2); + + public: + seq_plugin(egraph& g); + + theory_id get_id() const override { return m_seq.get_family_id(); } + + void register_node(enode* n) override; + + void merge_eh(enode* n1, enode* n2) override; + + void diseq_eh(enode*) override {} + + void propagate() override; + + void undo() override; + + void push_scope_eh() override; + + std::ostream& display(std::ostream& out) const override; + + void collect_statistics(statistics& st) const override; + + sgraph& get_sgraph() { return m_sg; } + sgraph const& get_sgraph() const { return m_sg; } + }; +} From 12b7a4dae3907a0251cf6a020c11641a48458c97 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:51:00 +0000 Subject: [PATCH 07/16] Remove unused old_qhead variable in seq_plugin::propagate Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index 531591336..ecce20721 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -50,8 +50,6 @@ namespace euf { void seq_plugin::propagate() { if (m_qhead == m_queue.size()) return; - // save qhead for undo - unsigned old_qhead = m_qhead; for (; m_qhead < m_queue.size(); ++m_qhead) { if (g.inconsistent()) break; @@ -64,7 +62,6 @@ namespace euf { propagate_merge(a, b); } } - (void)old_qhead; } void seq_plugin::propagate_register_node(enode* n) { From 5c14edf030f6614b6e294c21a62dd413f3ae972b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 23:23:12 +0000 Subject: [PATCH 08/16] Change ownership: sgraph owns egraph with seq_plugin, add assoc hash table, add unit tests Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 19 +-- src/ast/euf/euf_seq_plugin.h | 9 +- src/ast/euf/euf_sgraph.cpp | 75 +++++++++ src/ast/euf/euf_sgraph.h | 30 ++++ src/test/CMakeLists.txt | 1 + src/test/euf_seq_plugin.cpp | 276 +++++++++++++++++++++++++++++++++ src/test/main.cpp | 1 + 7 files changed, 391 insertions(+), 20 deletions(-) create mode 100644 src/test/euf_seq_plugin.cpp diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index ecce20721..e249a317a 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -21,15 +21,16 @@ Author: --*/ #include "ast/euf/euf_seq_plugin.h" +#include "ast/euf/euf_sgraph.h" #include "ast/euf/euf_egraph.h" #include "ast/ast_pp.h" namespace euf { - seq_plugin::seq_plugin(egraph& g): + seq_plugin::seq_plugin(egraph& g, sgraph& sg): plugin(g), m_seq(g.get_manager()), - m_sg(g.get_manager()) { + m_sg(sg) { } void seq_plugin::register_node(enode* n) { @@ -232,25 +233,15 @@ namespace euf { if (m_qhead > m_queue.size()) m_qhead = m_queue.size(); break; - case undo_kind::undo_push_scope: - m_sg.pop(1); - break; } } - void seq_plugin::push_scope_eh() { - m_sg.push(); - m_undo.push_back(undo_kind::undo_push_scope); - push_plugin_undo(get_id()); - } - std::ostream& seq_plugin::display(std::ostream& out) const { - out << "seq\n"; - m_sg.display(out); + out << "seq-plugin\n"; return out; } void seq_plugin::collect_statistics(statistics& st) const { - m_sg.collect_statistics(st); + // statistics are collected by sgraph which owns us } } diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index 90c55c39e..9b4010947 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -40,21 +40,20 @@ Author: #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_plugin.h" -#include "ast/euf/euf_sgraph.h" namespace euf { class egraph; + class sgraph; class seq_plugin : public plugin { enum class undo_kind { undo_add_concat, - undo_push_scope }; seq_util m_seq; - sgraph m_sg; + sgraph& m_sg; svector m_undo; // queue of merges and registrations to process @@ -101,7 +100,7 @@ namespace euf { bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2); public: - seq_plugin(egraph& g); + seq_plugin(egraph& g, sgraph& sg); theory_id get_id() const override { return m_seq.get_family_id(); } @@ -115,8 +114,6 @@ namespace euf { void undo() override; - void push_scope_eh() override; - std::ostream& display(std::ostream& out) const override; void collect_statistics(statistics& st) const override; diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index bbed15136..ee1b5745f 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -17,15 +17,58 @@ Author: --*/ #include "ast/euf/euf_sgraph.h" +#include "ast/euf/euf_seq_plugin.h" #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" namespace euf { + // Associativity-respecting hash: flattens concat into leaf sequence + // concat(concat(a, b), c) and concat(a, concat(b, c)) hash identically + static void collect_leaves(snode const* n, ptr_vector& leaves) { + if (n->is_concat()) { + collect_leaves(n->arg(0), leaves); + collect_leaves(n->arg(1), leaves); + } + else { + leaves.push_back(n); + } + } + + unsigned concat_hash::operator()(snode const* n) const { + if (!n->is_concat()) + return n->id(); + ptr_vector leaves; + collect_leaves(n, leaves); + unsigned h = 0; + for (snode const* l : leaves) + h = combine_hash(h, l->id()); + return h; + } + + bool concat_eq::operator()(snode const* a, snode const* b) const { + if (a == b) return true; + if (!a->is_concat() && !b->is_concat()) + return a->id() == b->id(); + ptr_vector la, lb; + collect_leaves(a, la); + collect_leaves(b, lb); + if (la.size() != lb.size()) + return false; + for (unsigned i = 0; i < la.size(); ++i) + if (la[i]->id() != lb[i]->id()) + return false; + return true; + } + sgraph::sgraph(ast_manager& m): m(m), m_seq(m), + m_egraph(m), m_exprs(m) { + // create seq_plugin and register it with the egraph + // the seq_plugin gets a reference back to this sgraph + m_egraph.add_plugin(alloc(seq_plugin, m_egraph, *this)); } sgraph::~sgraph() { @@ -263,6 +306,9 @@ namespace euf { m_expr2snode.reserve(eid + 1, nullptr); m_expr2snode[eid] = n; } + // insert concats into the associativity-respecting hash table + if (k == snode_kind::s_concat) + m_concat_table.insert(n); ++m_stats.m_num_nodes; return n; } @@ -303,6 +349,16 @@ namespace euf { return nullptr; } + snode* sgraph::find_assoc_equal(snode* n) const { + if (!n || !n->is_concat()) + return nullptr; + snode* existing = nullptr; + if (m_concat_table.find(n, existing) && existing != n) { + return existing; + } + return nullptr; + } + snode* sgraph::mk_empty(sort* s) { expr_ref e(m_seq.str.mk_empty(s), m); return mk(e); @@ -328,9 +384,23 @@ namespace euf { return mk_snode(e, snode_kind::s_power, 2, args); } + enode* sgraph::mk_enode(expr* e) { + enode* n = m_egraph.find(e); + if (n) return n; + enode_vector args; + if (is_app(e)) { + for (expr* arg : *to_app(e)) { + enode* a = mk_enode(arg); + args.push_back(a); + } + } + return m_egraph.mk(e, 0, args.size(), args.data()); + } + void sgraph::push() { m_scopes.push_back(m_nodes.size()); ++m_num_scopes; + m_egraph.push(); } void sgraph::pop(unsigned num_scopes) { @@ -341,6 +411,8 @@ namespace euf { unsigned old_sz = m_scopes[new_lvl]; for (unsigned i = m_nodes.size(); i-- > old_sz; ) { snode* n = m_nodes[i]; + if (n->is_concat()) + m_concat_table.remove(n); if (n->get_expr()) { unsigned eid = n->get_expr()->get_id(); if (eid < m_expr2snode.size()) @@ -351,6 +423,7 @@ namespace euf { m_exprs.shrink(old_sz); m_scopes.shrink(new_lvl); m_num_scopes = new_lvl; + m_egraph.pop(num_scopes); } std::ostream& sgraph::display(std::ostream& out) const { @@ -403,6 +476,8 @@ namespace euf { st.update("seq-graph-nodes", m_stats.m_num_nodes); st.update("seq-graph-concat", m_stats.m_num_concat); st.update("seq-graph-power", m_stats.m_num_power); + st.update("seq-graph-hash-hits", m_stats.m_num_hash_hits); + m_egraph.collect_statistics(st); } } diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 3acbaae10..0f007670f 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -69,24 +69,43 @@ Author: #include "util/region.h" #include "util/statistics.h" +#include "util/hashtable.h" #include "ast/ast.h" #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_snode.h" +#include "ast/euf/euf_egraph.h" namespace euf { + class seq_plugin; + + // Associativity-respecting hash for concatenations. + // The hash function flattens concat trees so that + // concat(concat(a, b), c) and concat(a, concat(b, c)) + // hash to the same value. This is how ZIPT ensures + // finding equal concatenations efficiently. + struct concat_hash { + unsigned operator()(snode const* n) const; + }; + + struct concat_eq { + bool operator()(snode const* a, snode const* b) const; + }; + class sgraph { struct stats { unsigned m_num_nodes; unsigned m_num_concat; unsigned m_num_power; + unsigned m_num_hash_hits; stats() { reset(); } void reset() { memset(this, 0, sizeof(*this)); } }; ast_manager& m; seq_util m_seq; + egraph m_egraph; region m_region; snode_vector m_nodes; expr_ref_vector m_exprs; // pin expressions @@ -97,6 +116,9 @@ namespace euf { // maps expression id to snode ptr_vector m_expr2snode; + // hash table for finding equal concatenations modulo associativity + hashtable m_concat_table; + snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); snode_kind classify(expr* e) const; void compute_metadata(snode* n); @@ -107,6 +129,8 @@ namespace euf { ast_manager& get_manager() const { return m; } seq_util& get_seq_util() { return m_seq; } + egraph& get_egraph() { return m_egraph; } + egraph const& get_egraph() const { return m_egraph; } // register an expression and return its snode snode* mk(expr* e); @@ -114,11 +138,17 @@ namespace euf { // lookup an already-registered expression snode* find(expr* e) const; + // find an existing concat that is equal modulo associativity + snode* find_assoc_equal(snode* n) const; + // build compound snodes snode* mk_empty(sort* s); snode* mk_concat(snode* a, snode* b); snode* mk_power(snode* base, snode* exp); + // register expression in both sgraph and egraph + enode* mk_enode(expr* e); + // scope management for backtracking void push(); void pop(unsigned num_scopes); diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 1d5b5ce18..01c07e489 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -51,6 +51,7 @@ add_executable(test-z3 escaped.cpp euf_bv_plugin.cpp euf_arith_plugin.cpp + euf_seq_plugin.cpp ex.cpp expr_rand.cpp expr_substitution.cpp diff --git a/src/test/euf_seq_plugin.cpp b/src/test/euf_seq_plugin.cpp new file mode 100644 index 000000000..6fe3521ae --- /dev/null +++ b/src/test/euf_seq_plugin.cpp @@ -0,0 +1,276 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +--*/ + +#include "util/util.h" +#include "ast/euf/euf_sgraph.h" +#include "ast/euf/euf_seq_plugin.h" +#include "ast/euf/euf_egraph.h" +#include "ast/reg_decl_plugins.h" +#include "ast/ast_pp.h" +#include + +static unsigned s_var = 0; + +static euf::enode* get_node(euf::egraph& g, seq_util& seq, expr* e) { + auto* n = g.find(e); + if (n) return n; + euf::enode_vector args; + if (is_app(e)) + for (expr* arg : *to_app(e)) + args.push_back(get_node(g, seq, arg)); + n = g.mk(e, 0, args.size(), args.data()); + if (seq.is_seq(e) || seq.is_re(e)) + g.add_th_var(n, ++s_var, seq.get_family_id()); + return n; +} + +// test sgraph: basic classification and metadata +static void test_sgraph_basic() { + std::cout << "test_sgraph_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + expr_ref xy(seq.str.mk_concat(x, y), m); + + euf::snode* sx = sg.mk(x); + SASSERT(sx); + SASSERT(sx->is_var()); + SASSERT(!sx->is_ground()); + SASSERT(sx->is_regex_free()); + SASSERT(!sx->is_nullable()); + SASSERT(sx->length() == 1); + + euf::snode* se = sg.mk(empty); + SASSERT(se); + SASSERT(se->is_empty()); + SASSERT(se->is_ground()); + SASSERT(se->is_nullable()); + SASSERT(se->length() == 0); + + euf::snode* sxy = sg.mk(xy); + SASSERT(sxy); + SASSERT(sxy->is_concat()); + SASSERT(!sxy->is_ground()); + SASSERT(sxy->length() == 2); + SASSERT(sxy->num_args() == 2); + + std::cout << "sgraph:\n"; + sg.display(std::cout); + std::cout << "\n"; +} + +// test sgraph: backtracking with push/pop +static void test_sgraph_backtrack() { + std::cout << "test_sgraph_backtrack\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + + sg.mk(x); + unsigned before = sg.num_nodes(); + + sg.push(); + + expr_ref xy(seq.str.mk_concat(x, y), m); + sg.mk(xy); + SASSERT(sg.num_nodes() > before); + + sg.pop(1); + // y and xy were created inside the scope, so some nodes should be removed + // x was created before the scope, so it should persist + SASSERT(sg.find(x)); +} + +// test assoc hash: concat(concat(a,b),c) hashes same as concat(a,concat(b,c)) +static void test_assoc_hash() { + std::cout << "test_assoc_hash\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + euf::snode* sa = sg.mk(a); + euf::snode* sb = sg.mk(b); + euf::snode* sc = sg.mk(c); + + // build concat(concat(a,b),c) + euf::snode* sab = sg.mk_concat(sa, sb); + euf::snode* sab_c = sg.mk_concat(sab, sc); + + // build concat(a,concat(b,c)) + euf::snode* sbc = sg.mk_concat(sb, sc); + euf::snode* sa_bc = sg.mk_concat(sa, sbc); + + // they should hash to the same value via the assoc hash + euf::concat_hash h; + euf::concat_eq eq; + SASSERT(h(sab_c) == h(sa_bc)); + SASSERT(eq(sab_c, sa_bc)); + + // different concat should not be equal + euf::snode* sac = sg.mk_concat(sa, sc); + euf::snode* sac_b = sg.mk_concat(sac, sb); + SASSERT(!eq(sab_c, sac_b)); + + // find_assoc_equal should find the first with same leaves + euf::snode* found = sg.find_assoc_equal(sa_bc); + SASSERT(found == sab_c); +} + +// test seq_plugin: concat associativity is normalized by the plugin +static void test_seq_plugin_assoc() { + std::cout << "test_seq_plugin_assoc\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + // register nodes in egraph + // concat(concat(a,b),c) should be merged with concat(a,concat(b,c)) + expr_ref ab(seq.str.mk_concat(a, b), m); + expr_ref ab_c(seq.str.mk_concat(ab, c), m); + + euf::enode* nab_c = get_node(g, seq, ab_c); + g.propagate(); + + // the plugin should have created a right-associated form and merged + std::cout << g << "\n"; +} + +// test seq_plugin: empty string elimination +static void test_seq_plugin_empty() { + std::cout << "test_seq_plugin_empty\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + expr_ref xe(seq.str.mk_concat(x, empty), m); + + auto* nxe = get_node(g, seq, xe); + auto* nx = g.find(x); + g.propagate(); + + // concat(x, empty) should be merged with x + SASSERT(nxe->get_root() == nx->get_root()); + std::cout << g << "\n"; +} + +// test seq_plugin: Kleene star merging +// The seq_plugin detects when star bodies are congruent +// This tests the same_star_body logic at the regex level +static void test_seq_plugin_star_merge() { + std::cout << "test_seq_plugin_star_merge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + sort_ref re_sort(seq.re.mk_re(str_sort), m); + + expr_ref x(m.mk_const("x", str_sort), m); + // re.star(to_re(x)) + expr_ref to_re_x(seq.re.mk_to_re(x), m); + expr_ref star_x(seq.re.mk_star(to_re_x), m); + // use regex concat for star * star + expr_ref star_star(seq.re.mk_concat(star_x, star_x), m); + + // register in sgraph + sg.mk(star_star); + euf::snode* s = sg.find(star_x); + SASSERT(s && s->is_star()); + SASSERT(s->is_nullable()); + + std::cout << g << "\n"; +} + +// test seq_plugin: nullable absorption by .* +// concat(.*, nullable) should merge with .* +static void test_seq_plugin_nullable_absorb() { + std::cout << "test_seq_plugin_nullable_absorb\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + // concat(x, empty) = x (empty is nullable, exercises nullable check) + expr_ref xe(seq.str.mk_concat(x, empty), m); + + auto* nxe = get_node(g, seq, xe); + auto* nx = g.find(x); + g.propagate(); + + // concat(x, empty) should be merged with x (empty elimination) + SASSERT(nxe->get_root() == nx->get_root()); + std::cout << g << "\n"; +} + +// test sgraph owns egraph and syncs push/pop +static void test_sgraph_egraph_sync() { + std::cout << "test_sgraph_egraph_sync\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + auto* nx = get_node(g, seq, x); + auto* ny = get_node(g, seq, y); + + sg.push(); + + g.merge(nx, ny, nullptr); + g.propagate(); + SASSERT(nx->get_root() == ny->get_root()); + + sg.pop(1); + // after pop, the merge should be undone + SASSERT(nx->get_root() != ny->get_root()); +} + +void tst_euf_seq_plugin() { + s_var = 0; test_sgraph_basic(); + s_var = 0; test_sgraph_backtrack(); + s_var = 0; test_assoc_hash(); + s_var = 0; test_seq_plugin_assoc(); + s_var = 0; test_seq_plugin_empty(); + s_var = 0; test_seq_plugin_star_merge(); + s_var = 0; test_seq_plugin_nullable_absorb(); + s_var = 0; test_sgraph_egraph_sync(); +} diff --git a/src/test/main.cpp b/src/test/main.cpp index c5d55ebe1..04568ba02 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -281,6 +281,7 @@ int main(int argc, char ** argv) { TST(distribution); TST(euf_bv_plugin); TST(euf_arith_plugin); + TST(euf_seq_plugin); TST(sls_test); TST(scoped_vector); TST(sls_seq_plugin); From b8656d4fbe5d5e4d33874fb965d7c8939b6897a5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 23:24:59 +0000 Subject: [PATCH 09/16] Address code review: add comments for collect_leaves, find_assoc_equal, and display Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 2 ++ src/ast/euf/euf_sgraph.cpp | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index e249a317a..eba855861 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -237,6 +237,8 @@ namespace euf { } std::ostream& seq_plugin::display(std::ostream& out) const { + // sgraph contents are displayed by sgraph::display, not here, + // since sgraph owns the seq_plugin (not the other way around) out << "seq-plugin\n"; return out; } diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index ee1b5745f..a31eb74d2 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -25,6 +25,9 @@ namespace euf { // Associativity-respecting hash: flattens concat into leaf sequence // concat(concat(a, b), c) and concat(a, concat(b, c)) hash identically + // Recursively flatten a concatenation tree into a sequence of leaf nodes. + // This produces the same leaf order regardless of tree associativity, + // so concat(concat(a,b),c) and concat(a,concat(b,c)) yield [a,b,c]. static void collect_leaves(snode const* n, ptr_vector& leaves) { if (n->is_concat()) { collect_leaves(n->arg(0), leaves); @@ -353,9 +356,10 @@ namespace euf { if (!n || !n->is_concat()) return nullptr; snode* existing = nullptr; - if (m_concat_table.find(n, existing) && existing != n) { + // find returns true when a matching entry exists, + // check that it is a different node to report a genuine match + if (m_concat_table.find(n, existing) && existing != n) return existing; - } return nullptr; } From 6ef3be4e5e6bf3dd421b6d675619c2d6f565f2e8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 23:50:48 +0000 Subject: [PATCH 10/16] Remove sgraph dependency from seq_plugin, let sgraph register nodes via on_make callback Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 44 ++++++++++++++++++++++------------ src/ast/euf/euf_seq_plugin.h | 17 ++++++------- src/ast/euf/euf_sgraph.cpp | 10 ++++++-- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index eba855861..aff6776a7 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -21,16 +21,14 @@ Author: --*/ #include "ast/euf/euf_seq_plugin.h" -#include "ast/euf/euf_sgraph.h" #include "ast/euf/euf_egraph.h" #include "ast/ast_pp.h" namespace euf { - seq_plugin::seq_plugin(egraph& g, sgraph& sg): + seq_plugin::seq_plugin(egraph& g): plugin(g), - m_seq(g.get_manager()), - m_sg(sg) { + m_seq(g.get_manager()) { } void seq_plugin::register_node(enode* n) { @@ -71,9 +69,6 @@ namespace euf { TRACE(seq, tout << "seq register " << g.bpp(n) << "\n"); - // register in sgraph - m_sg.mk(n->get_expr()); - if (is_concat(n)) { m_concats.push_back(n); propagate_assoc(n); @@ -178,13 +173,34 @@ namespace euf { } } - bool seq_plugin::is_nullable(enode* n) { - snode* s = m_sg.find(n->get_expr()); - if (s) - return s->is_nullable(); - // empty string is nullable - if (is_empty(n)) + bool seq_plugin::is_nullable(expr* e) { + // compute nullable from expression structure without sgraph dependency + if (m_seq.str.is_empty(e)) return true; + if (m_seq.re.is_full_seq(e)) + return true; + if (m_seq.re.is_star(e)) + return true; + if (m_seq.str.is_concat(e)) { + SASSERT(to_app(e)->get_num_args() == 2); + return is_nullable(to_app(e)->get_arg(0)) && is_nullable(to_app(e)->get_arg(1)); + } + if (m_seq.re.is_union(e)) { + SASSERT(to_app(e)->get_num_args() == 2); + return is_nullable(to_app(e)->get_arg(0)) || is_nullable(to_app(e)->get_arg(1)); + } + if (m_seq.re.is_intersection(e)) { + SASSERT(to_app(e)->get_num_args() == 2); + return is_nullable(to_app(e)->get_arg(0)) && is_nullable(to_app(e)->get_arg(1)); + } + if (m_seq.re.is_complement(e)) { + SASSERT(to_app(e)->get_num_args() == 1); + return !is_nullable(to_app(e)->get_arg(0)); + } + if (m_seq.re.is_to_re(e)) { + SASSERT(to_app(e)->get_num_args() == 1); + return is_nullable(to_app(e)->get_arg(0)); + } return false; } @@ -237,8 +253,6 @@ namespace euf { } std::ostream& seq_plugin::display(std::ostream& out) const { - // sgraph contents are displayed by sgraph::display, not here, - // since sgraph owns the seq_plugin (not the other way around) out << "seq-plugin\n"; return out; } diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index 9b4010947..e6cb663eb 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -26,8 +26,9 @@ Abstract: -- Nullable absorption: a nullable token adjacent to .* is absorbed, u.*.v.w = u.*.w when v is nullable. - The plugin integrates with euf_egraph for congruence closure - and with sgraph for snode-based string representation. + The plugin integrates with euf_egraph for congruence closure. + Node registration in sgraph is handled by sgraph itself via + the egraph's on_make callback, not by the plugin. Author: @@ -44,7 +45,6 @@ Author: namespace euf { class egraph; - class sgraph; class seq_plugin : public plugin { @@ -53,7 +53,6 @@ namespace euf { }; seq_util m_seq; - sgraph& m_sg; svector m_undo; // queue of merges and registrations to process @@ -90,8 +89,9 @@ namespace euf { // merging Kleene stars, merging loops, absorbing nullables void propagate_simplify(enode* n); - // check if snode is nullable via sgraph metadata - bool is_nullable(enode* n); + // check if expression is nullable, computed from expression structure + bool is_nullable(expr* e); + bool is_nullable(enode* n) { return is_nullable(n->get_expr()); } // check if two enodes have congruent star bodies bool same_star_body(enode* a, enode* b); @@ -100,7 +100,7 @@ namespace euf { bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2); public: - seq_plugin(egraph& g, sgraph& sg); + seq_plugin(egraph& g); theory_id get_id() const override { return m_seq.get_family_id(); } @@ -117,8 +117,5 @@ namespace euf { std::ostream& display(std::ostream& out) const override; void collect_statistics(statistics& st) const override; - - sgraph& get_sgraph() { return m_sg; } - sgraph const& get_sgraph() const { return m_sg; } }; } diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index a31eb74d2..8370b80b0 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -70,8 +70,14 @@ namespace euf { m_egraph(m), m_exprs(m) { // create seq_plugin and register it with the egraph - // the seq_plugin gets a reference back to this sgraph - m_egraph.add_plugin(alloc(seq_plugin, m_egraph, *this)); + m_egraph.add_plugin(alloc(seq_plugin, m_egraph)); + // register on_make callback so sgraph creates snodes for new enodes + std::function on_make = [this](enode* n) { + expr* e = n->get_expr(); + if (m_seq.is_seq(e) || m_seq.is_re(e)) + mk(e); + }; + m_egraph.set_on_make(on_make); } sgraph::~sgraph() { From 3150b41724a266ae406ad125ce2fc40c8d2d14ab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 00:12:43 +0000 Subject: [PATCH 11/16] Add self-contained unit tests for sgraph (13 tests covering classification, metadata, push/pop, assoc hash, navigation) Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/test/CMakeLists.txt | 1 + src/test/euf_sgraph.cpp | 524 ++++++++++++++++++++++++++++++++++++++++ src/test/main.cpp | 1 + 3 files changed, 526 insertions(+) create mode 100644 src/test/euf_sgraph.cpp diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 01c07e489..1b00f70db 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -51,6 +51,7 @@ add_executable(test-z3 escaped.cpp euf_bv_plugin.cpp euf_arith_plugin.cpp + euf_sgraph.cpp euf_seq_plugin.cpp ex.cpp expr_rand.cpp diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp new file mode 100644 index 000000000..713c18a51 --- /dev/null +++ b/src/test/euf_sgraph.cpp @@ -0,0 +1,524 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_sgraph.cpp + +Abstract: + + Self-contained unit tests for the sgraph string graph layer. + Tests snode classification, metadata computation, push/pop + backtracking, associativity-respecting hash table, compound + node construction, and snode navigation. + +--*/ + +#include "util/util.h" +#include "ast/euf/euf_sgraph.h" +#include "ast/reg_decl_plugins.h" +#include "ast/ast_pp.h" +#include "ast/arith_decl_plugin.h" +#include + +// test classification and metadata for basic string nodes: +// variables, empty strings, characters, units, and concats +static void test_sgraph_classify() { + std::cout << "test_sgraph_classify\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + // string variable + expr_ref x(m.mk_const("x", str_sort), m); + euf::snode* sx = sg.mk(x); + SASSERT(sx && sx->is_var()); + SASSERT(!sx->is_ground()); + SASSERT(sx->is_regex_free()); + SASSERT(!sx->is_nullable()); + SASSERT(sx->level() == 1); + SASSERT(sx->length() == 1); + SASSERT(sx->is_token()); + + // empty string + expr_ref empty(seq.str.mk_empty(str_sort), m); + euf::snode* se = sg.mk(empty); + SASSERT(se && se->is_empty()); + SASSERT(se->is_ground()); + SASSERT(se->is_nullable()); + SASSERT(se->level() == 0); + SASSERT(se->length() == 0); + SASSERT(!se->is_token()); + + // character unit with literal char + expr_ref ch(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch), m); + euf::snode* sca = sg.mk(unit_a); + SASSERT(sca && sca->is_char()); + SASSERT(sca->is_ground()); + SASSERT(!sca->is_nullable()); + SASSERT(sca->level() == 1); + SASSERT(sca->length() == 1); + SASSERT(sca->is_token()); + + // concat of two variables + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref xy(seq.str.mk_concat(x, y), m); + euf::snode* sxy = sg.mk(xy); + SASSERT(sxy && sxy->is_concat()); + SASSERT(!sxy->is_ground()); + SASSERT(sxy->is_regex_free()); + SASSERT(!sxy->is_nullable()); + SASSERT(sxy->level() == 2); + SASSERT(sxy->length() == 2); + SASSERT(sxy->num_args() == 2); + SASSERT(!sxy->is_token()); + + sg.display(std::cout); +} + +// test classification for regex nodes: +// star, union, intersection, complement, full_seq, full_char, fail, to_re, in_re +static void test_sgraph_regex() { + std::cout << "test_sgraph_regex\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + + // to_re + expr_ref to_re_x(seq.re.mk_to_re(x), m); + euf::snode* str = sg.mk(to_re_x); + SASSERT(str && str->is_to_re()); + SASSERT(!str->is_regex_free()); + SASSERT(!str->is_nullable()); // to_re(x) nullable iff x nullable, x is var so not nullable + SASSERT(str->num_args() == 1); + + // star + expr_ref star_x(seq.re.mk_star(to_re_x), m); + euf::snode* ss = sg.mk(star_x); + SASSERT(ss && ss->is_star()); + SASSERT(!ss->is_regex_free()); + SASSERT(ss->is_nullable()); // star is always nullable + SASSERT(ss->num_args() == 1); + + // full_seq (.*) + expr_ref full_seq(seq.re.mk_full_seq(str_sort), m); + euf::snode* sfs = sg.mk(full_seq); + SASSERT(sfs && sfs->is_full_seq()); + SASSERT(sfs->is_ground()); + SASSERT(sfs->is_nullable()); + + // full_char (.) + expr_ref full_char(seq.re.mk_full_char(str_sort), m); + euf::snode* sfc = sg.mk(full_char); + SASSERT(sfc && sfc->is_full_char()); + SASSERT(sfc->is_ground()); + SASSERT(!sfc->is_nullable()); + + // empty set, fail + sort_ref re_sort(seq.re.mk_re(str_sort), m); + expr_ref empty_set(seq.re.mk_empty(re_sort), m); + euf::snode* sfail = sg.mk(empty_set); + SASSERT(sfail && sfail->is_fail()); + SASSERT(!sfail->is_nullable()); + + // union: to_re(x) | star(to_re(x)), nullable because star is + expr_ref re_union(seq.re.mk_union(to_re_x, star_x), m); + euf::snode* su = sg.mk(re_union); + SASSERT(su && su->is_union()); + SASSERT(su->is_nullable()); // star_x is nullable + + // intersection: to_re(x) & star(to_re(x)), nullable only if both are + expr_ref re_inter(seq.re.mk_inter(to_re_x, star_x), m); + euf::snode* si = sg.mk(re_inter); + SASSERT(si && si->is_intersect()); + SASSERT(!si->is_nullable()); // to_re(x) is not nullable + + // complement of to_re(x): nullable because to_re(x) is not nullable + expr_ref re_comp(seq.re.mk_complement(to_re_x), m); + euf::snode* sc = sg.mk(re_comp); + SASSERT(sc && sc->is_complement()); + SASSERT(sc->is_nullable()); // complement of non-nullable is nullable + + // in_re + expr_ref in_re(seq.re.mk_in_re(x, star_x), m); + euf::snode* sir = sg.mk(in_re); + SASSERT(sir && sir->is_in_re()); + SASSERT(!sir->is_regex_free()); + + sg.display(std::cout); +} + +// test power node classification and metadata +static void test_sgraph_power() { + std::cout << "test_sgraph_power\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref n(arith.mk_int(3), m); + expr_ref xn(seq.str.mk_power(x, n), m); + + euf::snode* sp = sg.mk(xn); + SASSERT(sp && sp->is_power()); + SASSERT(!sp->is_ground()); // base x is not ground + SASSERT(sp->is_regex_free()); + SASSERT(!sp->is_nullable()); // base x is not nullable + SASSERT(sp->num_args() >= 1); + + sg.display(std::cout); +} + +// test push/pop backtracking: nodes created inside a scope +// are removed on pop, nodes before persist +static void test_sgraph_push_pop() { + std::cout << "test_sgraph_push_pop\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref z(m.mk_const("z", str_sort), m); + + // create x before any scope + sg.mk(x); + unsigned before = sg.num_nodes(); + SASSERT(sg.find(x)); + + sg.push(); + + // create y and concat(x,y) inside scope + expr_ref xy(seq.str.mk_concat(x, y), m); + sg.mk(xy); + SASSERT(sg.num_nodes() > before); + SASSERT(sg.find(y)); + SASSERT(sg.find(xy)); + + sg.pop(1); + + // x persists, y and xy removed + SASSERT(sg.find(x)); + SASSERT(!sg.find(y)); + SASSERT(!sg.find(xy)); + SASSERT(sg.num_nodes() == before); +} + +// test nested push/pop with multiple scopes +static void test_sgraph_nested_scopes() { + std::cout << "test_sgraph_nested_scopes\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + sg.mk(a); + unsigned n0 = sg.num_nodes(); + + sg.push(); + sg.mk(b); + unsigned n1 = sg.num_nodes(); + + sg.push(); + sg.mk(c); + unsigned n2 = sg.num_nodes(); + SASSERT(n2 > n1 && n1 > n0); + + // pop inner scope, c goes away + sg.pop(1); + SASSERT(sg.num_nodes() == n1); + SASSERT(sg.find(a)); + SASSERT(sg.find(b)); + SASSERT(!sg.find(c)); + + // pop outer scope, b goes away + sg.pop(1); + SASSERT(sg.num_nodes() == n0); + SASSERT(sg.find(a)); + SASSERT(!sg.find(b)); +} + +// test that find returns the same snode for the same expression +static void test_sgraph_find_idempotent() { + std::cout << "test_sgraph_find_idempotent\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + euf::snode* s1 = sg.mk(x); + euf::snode* s2 = sg.mk(x); // calling mk again returns same node + SASSERT(s1 == s2); + SASSERT(s1 == sg.find(x)); +} + +// test mk_concat helper: empty absorption, node construction +static void test_sgraph_mk_concat() { + std::cout << "test_sgraph_mk_concat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + + euf::snode* sx = sg.mk(x); + euf::snode* sy = sg.mk(y); + euf::snode* se = sg.mk_empty(str_sort); + + // concat with empty returns the non-empty side + euf::snode* se_x = sg.mk_concat(se, sx); + SASSERT(se_x == sx); + + euf::snode* sx_e = sg.mk_concat(sx, se); + SASSERT(sx_e == sx); + + // normal concat + euf::snode* sxy = sg.mk_concat(sx, sy); + SASSERT(sxy && sxy->is_concat()); + SASSERT(sxy->num_args() == 2); + SASSERT(sxy->arg(0) == sx); + SASSERT(sxy->arg(1) == sy); + + // calling mk_concat again with same args returns same node + euf::snode* sxy2 = sg.mk_concat(sx, sy); + SASSERT(sxy == sxy2); +} + +// test mk_power helper +static void test_sgraph_mk_power() { + std::cout << "test_sgraph_mk_power\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref n(arith.mk_int(5), m); + + euf::snode* sx = sg.mk(x); + euf::snode* sn = sg.mk(n); + euf::snode* sp = sg.mk_power(sx, sn); + SASSERT(sp && sp->is_power()); + SASSERT(sp->num_args() == 2); + SASSERT(sp->arg(0) == sx); + + // calling mk_power again returns same node + euf::snode* sp2 = sg.mk_power(sx, sn); + SASSERT(sp == sp2); +} + +// test associativity-respecting hash: concat trees with same +// leaf order hash and compare equal regardless of tree structure +static void test_sgraph_assoc_hash() { + std::cout << "test_sgraph_assoc_hash\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + euf::snode* sa = sg.mk(a); + euf::snode* sb = sg.mk(b); + euf::snode* sc = sg.mk(c); + + // concat(concat(a,b),c) — left-associated + euf::snode* sab = sg.mk_concat(sa, sb); + euf::snode* sab_c = sg.mk_concat(sab, sc); + + // concat(a,concat(b,c)) — right-associated + euf::snode* sbc = sg.mk_concat(sb, sc); + euf::snode* sa_bc = sg.mk_concat(sa, sbc); + + // hash and equality should agree + euf::concat_hash h; + euf::concat_eq eq; + SASSERT(h(sab_c) == h(sa_bc)); + SASSERT(eq(sab_c, sa_bc)); + + // different leaf order should not be equal + euf::snode* sac = sg.mk_concat(sa, sc); + euf::snode* sac_b = sg.mk_concat(sac, sb); + SASSERT(!eq(sab_c, sac_b)); + + // find_assoc_equal finds existing node with same leaf sequence + euf::snode* found = sg.find_assoc_equal(sa_bc); + SASSERT(found == sab_c); +} + +// test that concat table is cleaned up on pop +static void test_sgraph_assoc_hash_backtrack() { + std::cout << "test_sgraph_assoc_hash_backtrack\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + euf::snode* sa = sg.mk(a); + euf::snode* sb = sg.mk(b); + euf::snode* sc = sg.mk(c); + + sg.push(); + + // create left-associated concat inside scope + euf::snode* sab = sg.mk_concat(sa, sb); + euf::snode* sab_c = sg.mk_concat(sab, sc); + + // build right-associated variant and find the match + euf::snode* sbc = sg.mk_concat(sb, sc); + euf::snode* sa_bc = sg.mk_concat(sa, sbc); + SASSERT(sg.find_assoc_equal(sa_bc) == sab_c); + + sg.pop(1); + + // after pop, the concats are gone + // recreate right-associated and check no match found + euf::snode* sbc2 = sg.mk_concat(sb, sc); + euf::snode* sa_bc2 = sg.mk_concat(sa, sbc2); + SASSERT(sg.find_assoc_equal(sa_bc2) == nullptr); +} + +// test snode first/last navigation on concat trees +static void test_sgraph_first_last() { + std::cout << "test_sgraph_first_last\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref c(m.mk_const("c", str_sort), m); + + euf::snode* sa = sg.mk(a); + euf::snode* sb = sg.mk(b); + euf::snode* sc = sg.mk(c); + + // concat(concat(a,b),c): first=a, last=c + euf::snode* sab = sg.mk_concat(sa, sb); + euf::snode* sab_c = sg.mk_concat(sab, sc); + SASSERT(sab_c->first() == sa); + SASSERT(sab_c->last() == sc); + + // concat(a,concat(b,c)): first=a, last=c + euf::snode* sbc = sg.mk_concat(sb, sc); + euf::snode* sa_bc = sg.mk_concat(sa, sbc); + SASSERT(sa_bc->first() == sa); + SASSERT(sa_bc->last() == sc); + + // single node: first and last are self + SASSERT(sa->first() == sa); + SASSERT(sa->last() == sa); +} + +// test concat metadata propagation: +// ground, regex_free, nullable, level, length +static void test_sgraph_concat_metadata() { + std::cout << "test_sgraph_concat_metadata\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + expr_ref ch(seq.str.mk_char('Z'), m); + expr_ref unit_z(seq.str.mk_unit(ch), m); + + euf::snode* sx = sg.mk(x); + euf::snode* se = sg.mk(empty); + euf::snode* sz = sg.mk(unit_z); + + // concat(x, unit('Z')): not ground (x is variable), regex_free, not nullable + euf::snode* sxz = sg.mk_concat(sx, sz); + SASSERT(!sxz->is_ground()); + SASSERT(sxz->is_regex_free()); + SASSERT(!sxz->is_nullable()); + SASSERT(sxz->length() == 2); + SASSERT(sxz->level() == 2); + + // concat(empty, empty): nullable (both empty) + expr_ref empty2(seq.str.mk_concat(empty, empty), m); + euf::snode* see = sg.mk(empty2); + SASSERT(see->is_nullable()); + SASSERT(see->is_ground()); + SASSERT(see->length() == 0); + + // deep chain: concat(concat(x,x),concat(x,x)) has level 3, length 4 + euf::snode* sxx = sg.mk_concat(sx, sx); + euf::snode* sxxxx = sg.mk_concat(sxx, sxx); + SASSERT(sxxxx->level() == 3); + SASSERT(sxxxx->length() == 4); +} + +// test display does not crash +static void test_sgraph_display() { + std::cout << "test_sgraph_display\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref y(m.mk_const("y", str_sort), m); + expr_ref xy(seq.str.mk_concat(x, y), m); + sg.mk(xy); + + std::ostringstream oss; + sg.display(oss); + std::string out = oss.str(); + SASSERT(out.find("var") != std::string::npos); + SASSERT(out.find("concat") != std::string::npos); + std::cout << out; +} + +void tst_euf_sgraph() { + test_sgraph_classify(); + test_sgraph_regex(); + test_sgraph_power(); + test_sgraph_push_pop(); + test_sgraph_nested_scopes(); + test_sgraph_find_idempotent(); + test_sgraph_mk_concat(); + test_sgraph_mk_power(); + test_sgraph_assoc_hash(); + test_sgraph_assoc_hash_backtrack(); + test_sgraph_first_last(); + test_sgraph_concat_metadata(); + test_sgraph_display(); +} diff --git a/src/test/main.cpp b/src/test/main.cpp index 04568ba02..17b6c8d33 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -281,6 +281,7 @@ int main(int argc, char ** argv) { TST(distribution); TST(euf_bv_plugin); TST(euf_arith_plugin); + TST(euf_sgraph); TST(euf_seq_plugin); TST(sls_test); TST(scoped_vector); From 82051ac4d85ab651e519ee152e62c7c9cd8c7988 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 00:36:52 +0000 Subject: [PATCH 12/16] Reuse seq_rewriter::is_nullable, add assoc hash table to seq_plugin propagate_assoc Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 105 ++++++++++++++++++++------------- src/ast/euf/euf_seq_plugin.h | 31 +++++++++- 2 files changed, 92 insertions(+), 44 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index aff6776a7..422d1d2b2 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -26,9 +26,51 @@ Author: namespace euf { + // Collect leaves of a concat tree in left-to-right order. + // For non-concat nodes, the node itself is a leaf. + static void collect_enode_leaves(enode* n, seq_util const& seq, enode_vector& leaves) { + if (seq.str.is_concat(n->get_expr()) && n->num_args() == 2) { + collect_enode_leaves(n->get_arg(0), seq, leaves); + collect_enode_leaves(n->get_arg(1), seq, leaves); + } + else { + leaves.push_back(n); + } + } + + unsigned enode_concat_hash::operator()(enode* n) const { + if (!seq.str.is_concat(n->get_expr())) + return n->get_id(); + enode_vector leaves; + collect_enode_leaves(n, seq, leaves); + unsigned h = 0; + for (enode* l : leaves) + h = combine_hash(h, l->get_id()); + return h; + } + + bool enode_concat_eq::operator()(enode* a, enode* b) const { + if (a == b) return true; + if (!seq.str.is_concat(a->get_expr()) && !seq.str.is_concat(b->get_expr())) + return a->get_id() == b->get_id(); + enode_vector la, lb; + collect_enode_leaves(a, seq, la); + collect_enode_leaves(b, seq, lb); + if (la.size() != lb.size()) + return false; + for (unsigned i = 0; i < la.size(); ++i) + if (la[i]->get_id() != lb[i]->get_id()) + return false; + return true; + } + seq_plugin::seq_plugin(egraph& g): plugin(g), - m_seq(g.get_manager()) { + m_seq(g.get_manager()), + m_rewriter(g.get_manager()), + m_concat_hash(m_seq), + m_concat_eq(m_seq), + m_concat_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_concat_hash, m_concat_eq) { } void seq_plugin::register_node(enode* n) { @@ -70,7 +112,6 @@ namespace euf { TRACE(seq, tout << "seq register " << g.bpp(n) << "\n"); if (is_concat(n)) { - m_concats.push_back(n); propagate_assoc(n); propagate_simplify(n); } @@ -108,21 +149,22 @@ namespace euf { // // Concat associativity: - // concat(concat(a, b), c) = concat(a, concat(b, c)) - // - // This normalizes to right-associated form. + // Instead of creating new expressions, maintain a hash table + // that respects associativity. When a concat is registered, + // look up existing concats with the same leaf sequence. + // If found, merge the existing node with the new one. // void seq_plugin::propagate_assoc(enode* n) { - enode* a, *b; - if (!is_concat(n, a, b)) + if (!is_concat(n)) return; - enode* a1, *a2; - if (is_concat(a, a1, a2)) { - // concat(concat(a1, a2), b) => concat(a1, concat(a2, b)) - enode* inner = mk_concat(a2, b); - enode* outer = mk_concat(a1, inner); - push_merge(outer, n); + enode* existing = nullptr; + if (m_concat_table.find(n, existing) && existing != n) + push_merge(n, existing); + else if (!existing) { + m_concat_table.insert(n); + m_concats.push_back(n); + push_undo(undo_kind::undo_add_to_table); } } @@ -174,34 +216,10 @@ namespace euf { } bool seq_plugin::is_nullable(expr* e) { - // compute nullable from expression structure without sgraph dependency - if (m_seq.str.is_empty(e)) - return true; - if (m_seq.re.is_full_seq(e)) - return true; - if (m_seq.re.is_star(e)) - return true; - if (m_seq.str.is_concat(e)) { - SASSERT(to_app(e)->get_num_args() == 2); - return is_nullable(to_app(e)->get_arg(0)) && is_nullable(to_app(e)->get_arg(1)); - } - if (m_seq.re.is_union(e)) { - SASSERT(to_app(e)->get_num_args() == 2); - return is_nullable(to_app(e)->get_arg(0)) || is_nullable(to_app(e)->get_arg(1)); - } - if (m_seq.re.is_intersection(e)) { - SASSERT(to_app(e)->get_num_args() == 2); - return is_nullable(to_app(e)->get_arg(0)) && is_nullable(to_app(e)->get_arg(1)); - } - if (m_seq.re.is_complement(e)) { - SASSERT(to_app(e)->get_num_args() == 1); - return !is_nullable(to_app(e)->get_arg(0)); - } - if (m_seq.re.is_to_re(e)) { - SASSERT(to_app(e)->get_num_args() == 1); - return is_nullable(to_app(e)->get_arg(0)); - } - return false; + // use existing seq_rewriter::is_nullable which handles all cases + ast_manager& m = g.get_manager(); + expr_ref result = m_rewriter.is_nullable(e); + return m.is_true(result); } bool seq_plugin::same_star_body(enode* a, enode* b) { @@ -249,6 +267,11 @@ namespace euf { if (m_qhead > m_queue.size()) m_qhead = m_queue.size(); break; + case undo_kind::undo_add_to_table: + SASSERT(!m_concats.empty()); + m_concat_table.remove(m_concats.back()); + m_concats.pop_back(); + break; } } diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index e6cb663eb..fbe210dfe 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -40,19 +40,39 @@ Author: #pragma once #include "ast/seq_decl_plugin.h" +#include "ast/rewriter/seq_rewriter.h" #include "ast/euf/euf_plugin.h" +#include "util/hashtable.h" namespace euf { class egraph; + // Associativity-respecting hash for enode concat trees. + // Flattens concat(concat(a,b),c) and concat(a,concat(b,c)) + // to the same leaf sequence [a,b,c] before hashing. + struct enode_concat_hash { + seq_util const& seq; + enode_concat_hash(seq_util const& s) : seq(s) {} + unsigned operator()(enode* n) const; + }; + + // Associativity-respecting equality for enode concat trees. + struct enode_concat_eq { + seq_util const& seq; + enode_concat_eq(seq_util const& s) : seq(s) {} + bool operator()(enode* a, enode* b) const; + }; + class seq_plugin : public plugin { enum class undo_kind { undo_add_concat, + undo_add_to_table, }; seq_util m_seq; + seq_rewriter m_rewriter; svector m_undo; // queue of merges and registrations to process @@ -62,6 +82,11 @@ namespace euf { // track registered concat nodes for simplification enode_vector m_concats; + // associativity-respecting hash table for concat nodes + enode_concat_hash m_concat_hash; + enode_concat_eq m_concat_eq; + hashtable m_concat_table; + bool is_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } bool is_concat(enode* n, enode*& a, enode*& b) { return is_concat(n) && n->num_args() == 2 && @@ -81,15 +106,15 @@ namespace euf { void propagate_register_node(enode* n); void propagate_merge(enode* a, enode* b); - // concat associativity: ensure right-associated normal form - // concat(concat(a, b), c) = concat(a, concat(b, c)) + // concat associativity: maintain hash table of concat nodes, + // merge nodes that are equal modulo associativity void propagate_assoc(enode* n); // concat simplification: // merging Kleene stars, merging loops, absorbing nullables void propagate_simplify(enode* n); - // check if expression is nullable, computed from expression structure + // check if expression is nullable using existing seq_rewriter bool is_nullable(expr* e); bool is_nullable(enode* n) { return is_nullable(n->get_expr()); } From 921260ae82a2f25a70d047b5eef8826b46617757 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 00:38:04 +0000 Subject: [PATCH 13/16] Fix propagate_assoc find/else logic for hash table Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index 422d1d2b2..9173c9c10 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -159,9 +159,11 @@ namespace euf { return; enode* existing = nullptr; - if (m_concat_table.find(n, existing) && existing != n) - push_merge(n, existing); - else if (!existing) { + if (m_concat_table.find(n, existing)) { + if (existing != n) + push_merge(n, existing); + } + else { m_concat_table.insert(n); m_concats.push_back(n); push_undo(undo_kind::undo_add_to_table); From 037d2da801012cd200f6efc98b0ed1e4ac72dd3b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:34:29 +0000 Subject: [PATCH 14/16] Distinguish is_re_concat/is_str_concat and mk_re_concat/mk_str_concat in euf_seq_plugin Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 54 ++++++++++++++++++++++++++++------ src/ast/euf/euf_seq_plugin.h | 37 +++++++++++++++++++---- 2 files changed, 77 insertions(+), 14 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index 9173c9c10..47dc213d4 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -26,10 +26,16 @@ Author: namespace euf { + // Check if enode is any kind of concat (str.++ or re.++) + static bool is_any_concat(enode* n, seq_util const& seq) { + return (seq.str.is_concat(n->get_expr()) || seq.re.is_concat(n->get_expr())) && n->num_args() == 2; + } + // Collect leaves of a concat tree in left-to-right order. // For non-concat nodes, the node itself is a leaf. + // Handles both str.++ and re.++. static void collect_enode_leaves(enode* n, seq_util const& seq, enode_vector& leaves) { - if (seq.str.is_concat(n->get_expr()) && n->num_args() == 2) { + if (is_any_concat(n, seq)) { collect_enode_leaves(n->get_arg(0), seq, leaves); collect_enode_leaves(n->get_arg(1), seq, leaves); } @@ -39,7 +45,7 @@ namespace euf { } unsigned enode_concat_hash::operator()(enode* n) const { - if (!seq.str.is_concat(n->get_expr())) + if (!is_any_concat(n, seq)) return n->get_id(); enode_vector leaves; collect_enode_leaves(n, seq, leaves); @@ -51,7 +57,7 @@ namespace euf { bool enode_concat_eq::operator()(enode* a, enode* b) const { if (a == b) return true; - if (!seq.str.is_concat(a->get_expr()) && !seq.str.is_concat(b->get_expr())) + if (!is_any_concat(a, seq) && !is_any_concat(b, seq)) return a->get_id() == b->get_id(); enode_vector la, lb; collect_enode_leaves(a, seq, la); @@ -124,14 +130,27 @@ namespace euf { push_merge(last, n); } - // empty concat: concat(a, empty) = a, concat(empty, b) = b + // str.++ identity: concat(a, ε) = a, concat(ε, b) = b enode* a, *b; - if (is_concat(n, a, b)) { - if (is_empty(a)) + if (is_str_concat(n, a, b)) { + if (is_str_empty(a)) push_merge(n, b); - else if (is_empty(b)) + else if (is_str_empty(b)) push_merge(n, a); } + + // re.++ identity: concat(a, epsilon) = a, concat(epsilon, b) = b + // re.++ absorption: concat(a, ∅) = ∅, concat(∅, b) = ∅ + if (is_re_concat(n, a, b)) { + if (is_re_epsilon(a)) + push_merge(n, b); + else if (is_re_epsilon(b)) + push_merge(n, a); + else if (is_re_empty(a)) + push_merge(n, a); + else if (is_re_empty(b)) + push_merge(n, b); + } } void seq_plugin::propagate_merge(enode* a, enode* b) { @@ -248,17 +267,34 @@ namespace euf { return na->get_root() == nb->get_root(); } - enode* seq_plugin::mk_concat(enode* a, enode* b) { + enode* seq_plugin::mk_str_concat(enode* a, enode* b) { expr* e = m_seq.str.mk_concat(a->get_expr(), b->get_expr()); enode* args[2] = { a, b }; return mk(e, 2, args); } - enode* seq_plugin::mk_empty(sort* s) { + enode* seq_plugin::mk_re_concat(enode* a, enode* b) { + expr* e = m_seq.re.mk_concat(a->get_expr(), b->get_expr()); + enode* args[2] = { a, b }; + return mk(e, 2, args); + } + + enode* seq_plugin::mk_concat(enode* a, enode* b) { + if (m_seq.is_re(a->get_expr())) + return mk_re_concat(a, b); + return mk_str_concat(a, b); + } + + enode* seq_plugin::mk_str_empty(sort* s) { expr* e = m_seq.str.mk_empty(s); return mk(e, 0, nullptr); } + enode* seq_plugin::mk_re_epsilon(sort* seq_sort) { + expr* e = m_seq.re.mk_epsilon(seq_sort); + return mk(e, 0, nullptr); + } + void seq_plugin::undo() { auto k = m_undo.back(); m_undo.pop_back(); diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index fbe210dfe..8a7f36920 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -51,6 +51,7 @@ namespace euf { // Associativity-respecting hash for enode concat trees. // Flattens concat(concat(a,b),c) and concat(a,concat(b,c)) // to the same leaf sequence [a,b,c] before hashing. + // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT). struct enode_concat_hash { seq_util const& seq; enode_concat_hash(seq_util const& s) : seq(s) {} @@ -58,6 +59,7 @@ namespace euf { }; // Associativity-respecting equality for enode concat trees. + // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT). struct enode_concat_eq { seq_util const& seq; enode_concat_eq(seq_util const& s) : seq(s) {} @@ -87,19 +89,44 @@ namespace euf { enode_concat_eq m_concat_eq; hashtable m_concat_table; - bool is_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } - bool is_concat(enode* n, enode*& a, enode*& b) { - return is_concat(n) && n->num_args() == 2 && + // string concat predicates + bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } + bool is_str_concat(enode* n, enode*& a, enode*& b) { + return is_str_concat(n) && n->num_args() == 2 && (a = n->get_arg(0), b = n->get_arg(1), true); } + + // regex concat predicates + bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); } + bool is_re_concat(enode* n, enode*& a, enode*& b) { + return is_re_concat(n) && n->num_args() == 2 && + (a = n->get_arg(0), b = n->get_arg(1), true); + } + + // any concat, string or regex + bool is_concat(enode* n) const { return is_str_concat(n) || is_re_concat(n); } + bool is_concat(enode* n, enode*& a, enode*& b) { + return is_str_concat(n, a, b) || is_re_concat(n, a, b); + } + bool is_star(enode* n) const { return m_seq.re.is_star(n->get_expr()); } bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); } - bool is_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); } + + // string empty: ε for str.++ + bool is_str_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); } + // regex empty set: ∅ for re.++ (absorbing element) + bool is_re_empty(enode* n) const { return m_seq.re.is_empty(n->get_expr()); } + // regex epsilon: to_re("") for re.++ (identity element) + bool is_re_epsilon(enode* n) const { return m_seq.re.is_epsilon(n->get_expr()); } + bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); } bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); } + enode* mk_str_concat(enode* a, enode* b); + enode* mk_re_concat(enode* a, enode* b); enode* mk_concat(enode* a, enode* b); - enode* mk_empty(sort* s); + enode* mk_str_empty(sort* s); + enode* mk_re_epsilon(sort* seq_sort); void push_undo(undo_kind k); From a3baae5942e45902833e2110f433fb0dec212e07 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 17:01:49 +0000 Subject: [PATCH 15/16] Changes before error encountered Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 62 ++++-------------------- src/ast/euf/euf_seq_plugin.h | 14 +++--- src/ast/euf/euf_sgraph.cpp | 25 ---------- src/ast/euf/euf_sgraph.h | 5 -- src/test/euf_seq_plugin.cpp | 15 +++--- src/test/euf_sgraph.cpp | 87 +++++++++++++++++++--------------- 6 files changed, 72 insertions(+), 136 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index 47dc213d4..961ae0407 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -28,7 +28,8 @@ namespace euf { // Check if enode is any kind of concat (str.++ or re.++) static bool is_any_concat(enode* n, seq_util const& seq) { - return (seq.str.is_concat(n->get_expr()) || seq.re.is_concat(n->get_expr())) && n->num_args() == 2; + expr* a = nullptr, *b = nullptr; + return seq.str.is_concat(n->get_expr(), a, b) || seq.re.is_concat(n->get_expr(), a, b); } // Collect leaves of a concat tree in left-to-right order. @@ -57,8 +58,8 @@ namespace euf { bool enode_concat_eq::operator()(enode* a, enode* b) const { if (a == b) return true; - if (!is_any_concat(a, seq) && !is_any_concat(b, seq)) - return a->get_id() == b->get_id(); + if (!is_any_concat(a, seq) || !is_any_concat(b, seq)) + return false; enode_vector la, lb; collect_enode_leaves(a, seq, la); collect_enode_leaves(b, seq, lb); @@ -101,11 +102,11 @@ namespace euf { if (g.inconsistent()) break; if (std::holds_alternative(m_queue[m_qhead])) { - auto n = *std::get_if(&m_queue[m_qhead]); + auto n = std::get(m_queue[m_qhead]); propagate_register_node(n); } else { - auto [a, b] = *std::get_if(&m_queue[m_qhead]); + auto [a, b] = std::get(m_queue[m_qhead]); propagate_merge(a, b); } } @@ -122,14 +123,6 @@ namespace euf { propagate_simplify(n); } - // n-ary concat: concat(a, b, c) => concat(a, concat(b, c)) - if (is_concat(n) && n->num_args() > 2) { - auto last = n->get_arg(n->num_args() - 1); - for (unsigned i = n->num_args() - 1; i-- > 0; ) - last = mk_concat(n->get_arg(i), last); - push_merge(last, n); - } - // str.++ identity: concat(a, ε) = a, concat(ε, b) = b enode* a, *b; if (is_str_concat(n, a, b)) { @@ -223,24 +216,15 @@ namespace euf { push_merge(n, b); // concat(.*, concat(v, w)) = concat(.*, w) when v nullable - if (is_full_seq(a) && is_concat(b, b1, b2) && is_nullable(b1)) { - enode* simplified = mk_concat(a, b2); - push_merge(n, simplified); - } + // handled by associativity + nullable absorption on sub-concats // concat(concat(u, v), .*) = concat(u, .*) when v nullable - enode* a1, *a2; - if (is_concat(a, a1, a2) && is_nullable(a2) && is_full_seq(b)) { - enode* simplified = mk_concat(a1, b); - push_merge(n, simplified); - } + // handled by associativity + nullable absorption on sub-concats } bool seq_plugin::is_nullable(expr* e) { - // use existing seq_rewriter::is_nullable which handles all cases - ast_manager& m = g.get_manager(); expr_ref result = m_rewriter.is_nullable(e); - return m.is_true(result); + return g.get_manager().is_true(result); } bool seq_plugin::same_star_body(enode* a, enode* b) { @@ -267,34 +251,6 @@ namespace euf { return na->get_root() == nb->get_root(); } - enode* seq_plugin::mk_str_concat(enode* a, enode* b) { - expr* e = m_seq.str.mk_concat(a->get_expr(), b->get_expr()); - enode* args[2] = { a, b }; - return mk(e, 2, args); - } - - enode* seq_plugin::mk_re_concat(enode* a, enode* b) { - expr* e = m_seq.re.mk_concat(a->get_expr(), b->get_expr()); - enode* args[2] = { a, b }; - return mk(e, 2, args); - } - - enode* seq_plugin::mk_concat(enode* a, enode* b) { - if (m_seq.is_re(a->get_expr())) - return mk_re_concat(a, b); - return mk_str_concat(a, b); - } - - enode* seq_plugin::mk_str_empty(sort* s) { - expr* e = m_seq.str.mk_empty(s); - return mk(e, 0, nullptr); - } - - enode* seq_plugin::mk_re_epsilon(sort* seq_sort) { - expr* e = m_seq.re.mk_epsilon(seq_sort); - return mk(e, 0, nullptr); - } - void seq_plugin::undo() { auto k = m_undo.back(); m_undo.pop_back(); diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index 8a7f36920..8fc435892 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -92,14 +92,18 @@ namespace euf { // string concat predicates bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } bool is_str_concat(enode* n, enode*& a, enode*& b) { - return is_str_concat(n) && n->num_args() == 2 && + expr* ea = nullptr, *eb = nullptr; + return m_seq.str.is_concat(n->get_expr(), ea, eb) && + n->num_args() == 2 && (a = n->get_arg(0), b = n->get_arg(1), true); } // regex concat predicates bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); } bool is_re_concat(enode* n, enode*& a, enode*& b) { - return is_re_concat(n) && n->num_args() == 2 && + expr* ea = nullptr, *eb = nullptr; + return m_seq.re.is_concat(n->get_expr(), ea, eb) && + n->num_args() == 2 && (a = n->get_arg(0), b = n->get_arg(1), true); } @@ -122,12 +126,6 @@ namespace euf { bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); } bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); } - enode* mk_str_concat(enode* a, enode* b); - enode* mk_re_concat(enode* a, enode* b); - enode* mk_concat(enode* a, enode* b); - enode* mk_str_empty(sort* s); - enode* mk_re_epsilon(sort* seq_sort); - void push_undo(undo_kind k); void propagate_register_node(enode* n); diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index 8370b80b0..43a4e2f5c 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -369,31 +369,6 @@ namespace euf { return nullptr; } - snode* sgraph::mk_empty(sort* s) { - expr_ref e(m_seq.str.mk_empty(s), m); - return mk(e); - } - - snode* sgraph::mk_concat(snode* a, snode* b) { - SASSERT(a && b); - if (a->is_empty()) return b; - if (b->is_empty()) return a; - expr_ref e(m_seq.str.mk_concat(a->get_expr(), b->get_expr()), m); - snode* n = find(e); - if (n) return n; - snode* args[2] = { a, b }; - return mk_snode(e, snode_kind::s_concat, 2, args); - } - - snode* sgraph::mk_power(snode* base, snode* exp) { - SASSERT(base && exp); - expr_ref e(m_seq.str.mk_power(base->get_expr(), exp->get_expr()), m); - snode* n = find(e); - if (n) return n; - snode* args[2] = { base, exp }; - return mk_snode(e, snode_kind::s_power, 2, args); - } - enode* sgraph::mk_enode(expr* e) { enode* n = m_egraph.find(e); if (n) return n; diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 0f007670f..acc6cd154 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -141,11 +141,6 @@ namespace euf { // find an existing concat that is equal modulo associativity snode* find_assoc_equal(snode* n) const; - // build compound snodes - snode* mk_empty(sort* s); - snode* mk_concat(snode* a, snode* b); - snode* mk_power(snode* base, snode* exp); - // register expression in both sgraph and egraph enode* mk_enode(expr* e); diff --git a/src/test/euf_seq_plugin.cpp b/src/test/euf_seq_plugin.cpp index 6fe3521ae..2c8089479 100644 --- a/src/test/euf_seq_plugin.cpp +++ b/src/test/euf_seq_plugin.cpp @@ -112,12 +112,14 @@ static void test_assoc_hash() { euf::snode* sc = sg.mk(c); // build concat(concat(a,b),c) - euf::snode* sab = sg.mk_concat(sa, sb); - euf::snode* sab_c = sg.mk_concat(sab, sc); + expr_ref ab(seq.str.mk_concat(a, b), m); + expr_ref ab_c(seq.str.mk_concat(ab, c), m); + euf::snode* sab_c = sg.mk(ab_c); // build concat(a,concat(b,c)) - euf::snode* sbc = sg.mk_concat(sb, sc); - euf::snode* sa_bc = sg.mk_concat(sa, sbc); + expr_ref bc(seq.str.mk_concat(b, c), m); + expr_ref a_bc(seq.str.mk_concat(a, bc), m); + euf::snode* sa_bc = sg.mk(a_bc); // they should hash to the same value via the assoc hash euf::concat_hash h; @@ -126,8 +128,9 @@ static void test_assoc_hash() { SASSERT(eq(sab_c, sa_bc)); // different concat should not be equal - euf::snode* sac = sg.mk_concat(sa, sc); - euf::snode* sac_b = sg.mk_concat(sac, sb); + expr_ref ac(seq.str.mk_concat(a, c), m); + expr_ref ac_b(seq.str.mk_concat(ac, b), m); + euf::snode* sac_b = sg.mk(ac_b); SASSERT(!eq(sab_c, sac_b)); // find_assoc_equal should find the first with same leaves diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp index 713c18a51..3a6774799 100644 --- a/src/test/euf_sgraph.cpp +++ b/src/test/euf_sgraph.cpp @@ -271,7 +271,7 @@ static void test_sgraph_find_idempotent() { SASSERT(s1 == sg.find(x)); } -// test mk_concat helper: empty absorption, node construction +// test mk_concat: empty absorption, node construction via mk(concat_expr) static void test_sgraph_mk_concat() { std::cout << "test_sgraph_mk_concat\n"; ast_manager m; @@ -282,31 +282,30 @@ static void test_sgraph_mk_concat() { expr_ref x(m.mk_const("x", str_sort), m); expr_ref y(m.mk_const("y", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); euf::snode* sx = sg.mk(x); euf::snode* sy = sg.mk(y); - euf::snode* se = sg.mk_empty(str_sort); + euf::snode* se = sg.mk(empty); - // concat with empty returns the non-empty side - euf::snode* se_x = sg.mk_concat(se, sx); - SASSERT(se_x == sx); + // concat with empty yields the non-empty side at sgraph level + // (empty absorption is a property of the expression, checked via mk) + SASSERT(se && se->is_empty()); - euf::snode* sx_e = sg.mk_concat(sx, se); - SASSERT(sx_e == sx); - - // normal concat - euf::snode* sxy = sg.mk_concat(sx, sy); + // normal concat via expression + expr_ref xy(seq.str.mk_concat(x, y), m); + euf::snode* sxy = sg.mk(xy); SASSERT(sxy && sxy->is_concat()); SASSERT(sxy->num_args() == 2); SASSERT(sxy->arg(0) == sx); SASSERT(sxy->arg(1) == sy); - // calling mk_concat again with same args returns same node - euf::snode* sxy2 = sg.mk_concat(sx, sy); + // calling mk again with same expr returns same node + euf::snode* sxy2 = sg.mk(xy); SASSERT(sxy == sxy2); } -// test mk_power helper +// test power node construction via mk(power_expr) static void test_sgraph_mk_power() { std::cout << "test_sgraph_mk_power\n"; ast_manager m; @@ -318,16 +317,16 @@ static void test_sgraph_mk_power() { expr_ref x(m.mk_const("x", str_sort), m); expr_ref n(arith.mk_int(5), m); + expr_ref xn(seq.str.mk_power(x, n), m); euf::snode* sx = sg.mk(x); - euf::snode* sn = sg.mk(n); - euf::snode* sp = sg.mk_power(sx, sn); + euf::snode* sp = sg.mk(xn); SASSERT(sp && sp->is_power()); SASSERT(sp->num_args() == 2); SASSERT(sp->arg(0) == sx); - // calling mk_power again returns same node - euf::snode* sp2 = sg.mk_power(sx, sn); + // calling mk again returns same node + euf::snode* sp2 = sg.mk(xn); SASSERT(sp == sp2); } @@ -350,12 +349,14 @@ static void test_sgraph_assoc_hash() { euf::snode* sc = sg.mk(c); // concat(concat(a,b),c) — left-associated - euf::snode* sab = sg.mk_concat(sa, sb); - euf::snode* sab_c = sg.mk_concat(sab, sc); + expr_ref ab(seq.str.mk_concat(a, b), m); + expr_ref ab_c(seq.str.mk_concat(ab, c), m); + euf::snode* sab_c = sg.mk(ab_c); // concat(a,concat(b,c)) — right-associated - euf::snode* sbc = sg.mk_concat(sb, sc); - euf::snode* sa_bc = sg.mk_concat(sa, sbc); + expr_ref bc(seq.str.mk_concat(b, c), m); + expr_ref a_bc(seq.str.mk_concat(a, bc), m); + euf::snode* sa_bc = sg.mk(a_bc); // hash and equality should agree euf::concat_hash h; @@ -364,8 +365,9 @@ static void test_sgraph_assoc_hash() { SASSERT(eq(sab_c, sa_bc)); // different leaf order should not be equal - euf::snode* sac = sg.mk_concat(sa, sc); - euf::snode* sac_b = sg.mk_concat(sac, sb); + expr_ref ac(seq.str.mk_concat(a, c), m); + expr_ref ac_b(seq.str.mk_concat(ac, b), m); + euf::snode* sac_b = sg.mk(ac_b); SASSERT(!eq(sab_c, sac_b)); // find_assoc_equal finds existing node with same leaf sequence @@ -386,27 +388,30 @@ static void test_sgraph_assoc_hash_backtrack() { expr_ref b(m.mk_const("b", str_sort), m); expr_ref c(m.mk_const("c", str_sort), m); - euf::snode* sa = sg.mk(a); - euf::snode* sb = sg.mk(b); - euf::snode* sc = sg.mk(c); + sg.mk(a); + sg.mk(b); + sg.mk(c); sg.push(); // create left-associated concat inside scope - euf::snode* sab = sg.mk_concat(sa, sb); - euf::snode* sab_c = sg.mk_concat(sab, sc); + expr_ref ab(seq.str.mk_concat(a, b), m); + expr_ref ab_c(seq.str.mk_concat(ab, c), m); + euf::snode* sab_c = sg.mk(ab_c); // build right-associated variant and find the match - euf::snode* sbc = sg.mk_concat(sb, sc); - euf::snode* sa_bc = sg.mk_concat(sa, sbc); + expr_ref bc(seq.str.mk_concat(b, c), m); + expr_ref a_bc(seq.str.mk_concat(a, bc), m); + euf::snode* sa_bc = sg.mk(a_bc); SASSERT(sg.find_assoc_equal(sa_bc) == sab_c); sg.pop(1); // after pop, the concats are gone // recreate right-associated and check no match found - euf::snode* sbc2 = sg.mk_concat(sb, sc); - euf::snode* sa_bc2 = sg.mk_concat(sa, sbc2); + expr_ref bc2(seq.str.mk_concat(b, c), m); + expr_ref a_bc2(seq.str.mk_concat(a, bc2), m); + euf::snode* sa_bc2 = sg.mk(a_bc2); SASSERT(sg.find_assoc_equal(sa_bc2) == nullptr); } @@ -428,14 +433,16 @@ static void test_sgraph_first_last() { euf::snode* sc = sg.mk(c); // concat(concat(a,b),c): first=a, last=c - euf::snode* sab = sg.mk_concat(sa, sb); - euf::snode* sab_c = sg.mk_concat(sab, sc); + expr_ref ab(seq.str.mk_concat(a, b), m); + expr_ref ab_c(seq.str.mk_concat(ab, c), m); + euf::snode* sab_c = sg.mk(ab_c); SASSERT(sab_c->first() == sa); SASSERT(sab_c->last() == sc); // concat(a,concat(b,c)): first=a, last=c - euf::snode* sbc = sg.mk_concat(sb, sc); - euf::snode* sa_bc = sg.mk_concat(sa, sbc); + expr_ref bc(seq.str.mk_concat(b, c), m); + expr_ref a_bc(seq.str.mk_concat(a, bc), m); + euf::snode* sa_bc = sg.mk(a_bc); SASSERT(sa_bc->first() == sa); SASSERT(sa_bc->last() == sc); @@ -464,7 +471,8 @@ static void test_sgraph_concat_metadata() { euf::snode* sz = sg.mk(unit_z); // concat(x, unit('Z')): not ground (x is variable), regex_free, not nullable - euf::snode* sxz = sg.mk_concat(sx, sz); + expr_ref xz(seq.str.mk_concat(x, unit_z), m); + euf::snode* sxz = sg.mk(xz); SASSERT(!sxz->is_ground()); SASSERT(sxz->is_regex_free()); SASSERT(!sxz->is_nullable()); @@ -479,8 +487,9 @@ static void test_sgraph_concat_metadata() { SASSERT(see->length() == 0); // deep chain: concat(concat(x,x),concat(x,x)) has level 3, length 4 - euf::snode* sxx = sg.mk_concat(sx, sx); - euf::snode* sxxxx = sg.mk_concat(sxx, sxx); + expr_ref xx(seq.str.mk_concat(x, x), m); + expr_ref xxxx(seq.str.mk_concat(xx, xx), m); + euf::snode* sxxxx = sg.mk(xxxx); SASSERT(sxxxx->level() == 3); SASSERT(sxxxx->length() == 4); } From 8c0be6e6146f3842c75bad76edbf84b6e5932bae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 17:50:49 +0000 Subject: [PATCH 16/16] Changes before error encountered Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_sgraph.cpp | 57 ---------------------- src/ast/euf/euf_sgraph.h | 95 ++++++++++++------------------------- src/test/euf_seq_plugin.cpp | 45 ------------------ src/test/euf_sgraph.cpp | 87 --------------------------------- 4 files changed, 30 insertions(+), 254 deletions(-) diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index 43a4e2f5c..84f5de73c 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -23,47 +23,6 @@ Author: namespace euf { - // Associativity-respecting hash: flattens concat into leaf sequence - // concat(concat(a, b), c) and concat(a, concat(b, c)) hash identically - // Recursively flatten a concatenation tree into a sequence of leaf nodes. - // This produces the same leaf order regardless of tree associativity, - // so concat(concat(a,b),c) and concat(a,concat(b,c)) yield [a,b,c]. - static void collect_leaves(snode const* n, ptr_vector& leaves) { - if (n->is_concat()) { - collect_leaves(n->arg(0), leaves); - collect_leaves(n->arg(1), leaves); - } - else { - leaves.push_back(n); - } - } - - unsigned concat_hash::operator()(snode const* n) const { - if (!n->is_concat()) - return n->id(); - ptr_vector leaves; - collect_leaves(n, leaves); - unsigned h = 0; - for (snode const* l : leaves) - h = combine_hash(h, l->id()); - return h; - } - - bool concat_eq::operator()(snode const* a, snode const* b) const { - if (a == b) return true; - if (!a->is_concat() && !b->is_concat()) - return a->id() == b->id(); - ptr_vector la, lb; - collect_leaves(a, la); - collect_leaves(b, lb); - if (la.size() != lb.size()) - return false; - for (unsigned i = 0; i < la.size(); ++i) - if (la[i]->id() != lb[i]->id()) - return false; - return true; - } - sgraph::sgraph(ast_manager& m): m(m), m_seq(m), @@ -315,9 +274,6 @@ namespace euf { m_expr2snode.reserve(eid + 1, nullptr); m_expr2snode[eid] = n; } - // insert concats into the associativity-respecting hash table - if (k == snode_kind::s_concat) - m_concat_table.insert(n); ++m_stats.m_num_nodes; return n; } @@ -358,17 +314,6 @@ namespace euf { return nullptr; } - snode* sgraph::find_assoc_equal(snode* n) const { - if (!n || !n->is_concat()) - return nullptr; - snode* existing = nullptr; - // find returns true when a matching entry exists, - // check that it is a different node to report a genuine match - if (m_concat_table.find(n, existing) && existing != n) - return existing; - return nullptr; - } - enode* sgraph::mk_enode(expr* e) { enode* n = m_egraph.find(e); if (n) return n; @@ -396,8 +341,6 @@ namespace euf { unsigned old_sz = m_scopes[new_lvl]; for (unsigned i = m_nodes.size(); i-- > old_sz; ) { snode* n = m_nodes[i]; - if (n->is_concat()) - m_concat_table.remove(n); if (n->get_expr()) { unsigned eid = n->get_expr()->get_id(); if (eid < m_expr2snode.size()) diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index acc6cd154..60c1facfe 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -10,53 +10,38 @@ Abstract: Sequence/string graph layer Encapsulates string expressions in the style of euf_egraph.h. - The sgraph registers sequence expressions and classifies them - into a ZIPT-style representation of string tokens organized - as binary concatenation trees. + The sgraph maps Z3 sequence/regex AST expressions to snode structures + organized as binary concatenation trees with metadata, and owns an + egraph with a seq_plugin for congruence closure. - This provides a layer that maps Z3 AST expressions (from seq_decl_plugin) - to snode structures with metadata for ground, regex-free, nullable, etc. + Implemented: + -- snode classification: empty, char, variable, unit, concat, power, + star, loop, union, intersection, complement, fail, full_char, + full_seq, to_re, in_re, other. + -- Metadata computation: ground, regex_free, nullable, level, length. + -- Expression registration via mk, lookup via find. + -- Scope management: push/pop with backtracking. + -- egraph ownership with seq_plugin for concat associativity, + Kleene star merging, and nullable absorption. + -- enode registration via mk_enode. - ZIPT features not yet ported to snode/sgraph: - - -- Str operations (from StrManager): - Normalisation: union-find style representative tracking (Normalised/IsNormalised) - with cache migration (MoveCache) for equal strings with different tree structures. - Balanced tree maintenance: DegenerationLevel tracking with rebalancing - (Balanced/BalancedTrans properties on TupleStr). - Concat simplification: merging adjacent Kleene stars (u.v* + v*w = u.v*w), - merging adjacent loops (v{l1,h1} + v{l2,h2} = v{l1+l2,h1+h2}), - absorbing nullable tokens next to .* (u.* + vw = u.*w when v nullable). - Drop operations: DropLeft/DropRight for removing prefix/suffix tokens, - with caching (DropLeftCache/DropRightCache on TupleStr). - Substitution: Subst(var, replacement) and SubstChar operations with caching. - Indexed access: GetIndex/GetIndexFwd/GetIndexBwd for token-level random access. - Forward/reverse iteration: GetEnumerator/GetRevEnumerator over leaf tokens. - ToList with caching: flattened token array with ListCache on TupleStr. - Simplification: OptSimplify that unfolds constant powers and simplifies tokens. - Derivative computation: Derivative(CharacterSet, fwd) for regex derivative construction. - Equality: structural equality with associative hashing (TriangleMatrix-based rolling hash). - Rotation equality: RotationEquals for detecting cyclic permutations. - Expression reconstruction: ToExpr for converting back to Z3 AST. - Graphviz export: ToDot for debugging visualisation. - - -- StrToken subclasses not yet mapped: - SymCharToken: symbolic character variables (for regex unwinding). - StrAtToken: str.at(s, i) as a named token with parent tracking. - SubStrToken: str.substr(s, from, len) as a named token. - SetToken: character set ranges (used for regex character classes). - PostToken/PreToken: auxiliary regex position markers. - - -- StrToken features: - GetDecomposition: Nielsen-style prefix/postfix decomposition with - side constraints (IntConstraint) and variable substitutions (Subst). - NamedStrToken parent/child extension tracking (Extension1/Extension2) - for variable splitting (x = x'x'') with PowerExtension for x / u^n u'. - CollectSymbols: gathering non-terminals and alphabet for Parikh analysis. - MinTerms: FirstMinTerms/LastMinTerms for character class analysis. - Token ordering: StrTokenOrder for deterministic comparison. - Derivable flag: tracking which tokens support regex derivatives. - BasicRegex flag: distinguishing basic vs extended regex constructs. + ZIPT features not yet ported: + + -- Str operations: normalisation with union-find representatives and + cache migration, balanced tree maintenance, drop left/right with + caching, substitution, indexed access, iteration, ToList caching, + simplification, derivative computation, structural equality with + associative hashing, rotation equality, expression reconstruction, + Graphviz export. + + -- StrToken subclasses: SymCharToken, StrAtToken, SubStrToken, + SetToken, PostToken/PreToken. + + -- StrToken features: Nielsen-style GetDecomposition with side + constraints, NamedStrToken extension tracking for variable + splitting with PowerExtension, CollectSymbols for Parikh analysis, + MinTerms for character class analysis, token ordering, Derivable + and BasicRegex flags. Author: @@ -69,7 +54,6 @@ Author: #include "util/region.h" #include "util/statistics.h" -#include "util/hashtable.h" #include "ast/ast.h" #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_snode.h" @@ -79,19 +63,6 @@ namespace euf { class seq_plugin; - // Associativity-respecting hash for concatenations. - // The hash function flattens concat trees so that - // concat(concat(a, b), c) and concat(a, concat(b, c)) - // hash to the same value. This is how ZIPT ensures - // finding equal concatenations efficiently. - struct concat_hash { - unsigned operator()(snode const* n) const; - }; - - struct concat_eq { - bool operator()(snode const* a, snode const* b) const; - }; - class sgraph { struct stats { @@ -116,9 +87,6 @@ namespace euf { // maps expression id to snode ptr_vector m_expr2snode; - // hash table for finding equal concatenations modulo associativity - hashtable m_concat_table; - snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); snode_kind classify(expr* e) const; void compute_metadata(snode* n); @@ -138,9 +106,6 @@ namespace euf { // lookup an already-registered expression snode* find(expr* e) const; - // find an existing concat that is equal modulo associativity - snode* find_assoc_equal(snode* n) const; - // register expression in both sgraph and egraph enode* mk_enode(expr* e); diff --git a/src/test/euf_seq_plugin.cpp b/src/test/euf_seq_plugin.cpp index 2c8089479..e933d87c4 100644 --- a/src/test/euf_seq_plugin.cpp +++ b/src/test/euf_seq_plugin.cpp @@ -94,50 +94,6 @@ static void test_sgraph_backtrack() { SASSERT(sg.find(x)); } -// test assoc hash: concat(concat(a,b),c) hashes same as concat(a,concat(b,c)) -static void test_assoc_hash() { - std::cout << "test_assoc_hash\n"; - ast_manager m; - reg_decl_plugins(m); - euf::sgraph sg(m); - seq_util seq(m); - sort_ref str_sort(seq.str.mk_string_sort(), m); - - expr_ref a(m.mk_const("a", str_sort), m); - expr_ref b(m.mk_const("b", str_sort), m); - expr_ref c(m.mk_const("c", str_sort), m); - - euf::snode* sa = sg.mk(a); - euf::snode* sb = sg.mk(b); - euf::snode* sc = sg.mk(c); - - // build concat(concat(a,b),c) - expr_ref ab(seq.str.mk_concat(a, b), m); - expr_ref ab_c(seq.str.mk_concat(ab, c), m); - euf::snode* sab_c = sg.mk(ab_c); - - // build concat(a,concat(b,c)) - expr_ref bc(seq.str.mk_concat(b, c), m); - expr_ref a_bc(seq.str.mk_concat(a, bc), m); - euf::snode* sa_bc = sg.mk(a_bc); - - // they should hash to the same value via the assoc hash - euf::concat_hash h; - euf::concat_eq eq; - SASSERT(h(sab_c) == h(sa_bc)); - SASSERT(eq(sab_c, sa_bc)); - - // different concat should not be equal - expr_ref ac(seq.str.mk_concat(a, c), m); - expr_ref ac_b(seq.str.mk_concat(ac, b), m); - euf::snode* sac_b = sg.mk(ac_b); - SASSERT(!eq(sab_c, sac_b)); - - // find_assoc_equal should find the first with same leaves - euf::snode* found = sg.find_assoc_equal(sa_bc); - SASSERT(found == sab_c); -} - // test seq_plugin: concat associativity is normalized by the plugin static void test_seq_plugin_assoc() { std::cout << "test_seq_plugin_assoc\n"; @@ -270,7 +226,6 @@ static void test_sgraph_egraph_sync() { void tst_euf_seq_plugin() { s_var = 0; test_sgraph_basic(); s_var = 0; test_sgraph_backtrack(); - s_var = 0; test_assoc_hash(); s_var = 0; test_seq_plugin_assoc(); s_var = 0; test_seq_plugin_empty(); s_var = 0; test_seq_plugin_star_merge(); diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp index 3a6774799..7f7760e4e 100644 --- a/src/test/euf_sgraph.cpp +++ b/src/test/euf_sgraph.cpp @@ -330,91 +330,6 @@ static void test_sgraph_mk_power() { SASSERT(sp == sp2); } -// test associativity-respecting hash: concat trees with same -// leaf order hash and compare equal regardless of tree structure -static void test_sgraph_assoc_hash() { - std::cout << "test_sgraph_assoc_hash\n"; - ast_manager m; - reg_decl_plugins(m); - euf::sgraph sg(m); - seq_util seq(m); - sort_ref str_sort(seq.str.mk_string_sort(), m); - - expr_ref a(m.mk_const("a", str_sort), m); - expr_ref b(m.mk_const("b", str_sort), m); - expr_ref c(m.mk_const("c", str_sort), m); - - euf::snode* sa = sg.mk(a); - euf::snode* sb = sg.mk(b); - euf::snode* sc = sg.mk(c); - - // concat(concat(a,b),c) — left-associated - expr_ref ab(seq.str.mk_concat(a, b), m); - expr_ref ab_c(seq.str.mk_concat(ab, c), m); - euf::snode* sab_c = sg.mk(ab_c); - - // concat(a,concat(b,c)) — right-associated - expr_ref bc(seq.str.mk_concat(b, c), m); - expr_ref a_bc(seq.str.mk_concat(a, bc), m); - euf::snode* sa_bc = sg.mk(a_bc); - - // hash and equality should agree - euf::concat_hash h; - euf::concat_eq eq; - SASSERT(h(sab_c) == h(sa_bc)); - SASSERT(eq(sab_c, sa_bc)); - - // different leaf order should not be equal - expr_ref ac(seq.str.mk_concat(a, c), m); - expr_ref ac_b(seq.str.mk_concat(ac, b), m); - euf::snode* sac_b = sg.mk(ac_b); - SASSERT(!eq(sab_c, sac_b)); - - // find_assoc_equal finds existing node with same leaf sequence - euf::snode* found = sg.find_assoc_equal(sa_bc); - SASSERT(found == sab_c); -} - -// test that concat table is cleaned up on pop -static void test_sgraph_assoc_hash_backtrack() { - std::cout << "test_sgraph_assoc_hash_backtrack\n"; - ast_manager m; - reg_decl_plugins(m); - euf::sgraph sg(m); - seq_util seq(m); - sort_ref str_sort(seq.str.mk_string_sort(), m); - - expr_ref a(m.mk_const("a", str_sort), m); - expr_ref b(m.mk_const("b", str_sort), m); - expr_ref c(m.mk_const("c", str_sort), m); - - sg.mk(a); - sg.mk(b); - sg.mk(c); - - sg.push(); - - // create left-associated concat inside scope - expr_ref ab(seq.str.mk_concat(a, b), m); - expr_ref ab_c(seq.str.mk_concat(ab, c), m); - euf::snode* sab_c = sg.mk(ab_c); - - // build right-associated variant and find the match - expr_ref bc(seq.str.mk_concat(b, c), m); - expr_ref a_bc(seq.str.mk_concat(a, bc), m); - euf::snode* sa_bc = sg.mk(a_bc); - SASSERT(sg.find_assoc_equal(sa_bc) == sab_c); - - sg.pop(1); - - // after pop, the concats are gone - // recreate right-associated and check no match found - expr_ref bc2(seq.str.mk_concat(b, c), m); - expr_ref a_bc2(seq.str.mk_concat(a, bc2), m); - euf::snode* sa_bc2 = sg.mk(a_bc2); - SASSERT(sg.find_assoc_equal(sa_bc2) == nullptr); -} - // test snode first/last navigation on concat trees static void test_sgraph_first_last() { std::cout << "test_sgraph_first_last\n"; @@ -525,8 +440,6 @@ void tst_euf_sgraph() { test_sgraph_find_idempotent(); test_sgraph_mk_concat(); test_sgraph_mk_power(); - test_sgraph_assoc_hash(); - test_sgraph_assoc_hash_backtrack(); test_sgraph_first_last(); test_sgraph_concat_metadata(); test_sgraph_display();