From 5bbeedaff0fb41852ce1bcf983830c9bd7604b01 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 18:32:56 +0000 Subject: [PATCH 1/4] Initial plan From 40b99311e32b3777a613a51afe834033b418b4c0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 18:58:21 +0000 Subject: [PATCH 2/4] Fix enode_concat_eq leaf comparison, simplify is_str/re_concat, update sgraph summary Address remaining review feedback from PR #8820: - Use pointer comparison instead of ID comparison in enode_concat_eq - Remove redundant num_args() == 2 check from is_str_concat/is_re_concat - Update euf_sgraph.h abstract to detail seq_plugin features Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 2 +- src/ast/euf/euf_seq_plugin.h | 16 ++++++++++------ src/ast/euf/euf_sgraph.h | 15 +++++++++------ 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index 961ae0407..da366eb1f 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -66,7 +66,7 @@ namespace euf { if (la.size() != lb.size()) return false; for (unsigned i = 0; i < la.size(); ++i) - if (la[i]->get_id() != lb[i]->get_id()) + if (la[i] != lb[i]) return false; return true; } diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index 8fc435892..24624c6e0 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -93,18 +93,22 @@ namespace euf { bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } bool is_str_concat(enode* n, enode*& a, enode*& b) { expr* ea = nullptr, *eb = nullptr; - return m_seq.str.is_concat(n->get_expr(), ea, eb) && - n->num_args() == 2 && - (a = n->get_arg(0), b = n->get_arg(1), true); + if (!m_seq.str.is_concat(n->get_expr(), ea, eb)) + return false; + a = n->get_arg(0); + b = n->get_arg(1); + return true; } // regex concat predicates bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); } bool is_re_concat(enode* n, enode*& a, enode*& b) { expr* ea = nullptr, *eb = nullptr; - return m_seq.re.is_concat(n->get_expr(), ea, eb) && - n->num_args() == 2 && - (a = n->get_arg(0), b = n->get_arg(1), true); + if (!m_seq.re.is_concat(n->get_expr(), ea, eb)) + return false; + a = n->get_arg(0); + b = n->get_arg(1); + return true; } // any concat, string or regex diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 60c1facfe..d6da7e184 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -9,21 +9,24 @@ Abstract: Sequence/string graph layer - Encapsulates string expressions in the style of euf_egraph.h. + Encapsulates string and regex expressions for the string solver. The sgraph maps Z3 sequence/regex AST expressions to snode structures organized as binary concatenation trees with metadata, and owns an egraph with a seq_plugin for congruence closure. - Implemented: -- snode classification: empty, char, variable, unit, concat, power, star, loop, union, intersection, complement, fail, full_char, full_seq, to_re, in_re, other. -- Metadata computation: ground, regex_free, nullable, level, length. - -- Expression registration via mk, lookup via find. + -- Expression registration via mk(expr*), lookup via find(expr*). -- Scope management: push/pop with backtracking. - -- egraph ownership with seq_plugin for concat associativity, - Kleene star merging, and nullable absorption. - -- enode registration via mk_enode. + -- egraph ownership with seq_plugin for: + * concat associativity via associativity-respecting hash table, + * Kleene star merging (u.v*.v*.w = u.v*.w), + * nullable absorption next to .* (u.*.v.w = u.*.w when v nullable), + * str.++ identity elimination (concat(a, ε) = a), + * re.++ identity/absorption (concat(a, epsilon) = a, concat(a, ∅) = ∅). + -- enode registration via mk_enode(expr*). ZIPT features not yet ported: From 150f1fe2ead370fad958bb2d5c404c44dc4da41f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:28:32 +0000 Subject: [PATCH 3/4] Revert is_concat changes, add ZIPT URL, implement snode/sgraph operations and tests - Revert is_str_concat/is_re_concat to original form (PR #8820 review) - Add ZIPT URL (https://github.com/CEisenhofer/ZIPT) to euf_sgraph.h - Add snode::at() for token indexing and collect_tokens() for enumeration - Add sgraph factory methods: mk_var, mk_char, mk_empty, mk_concat - Add sgraph drop operations: drop_first, drop_last, drop_left, drop_right - Add sgraph substitution: subst(snode*, snode*, snode*) - Add Brzozowski derivative via seq_rewriter::mk_derivative - Add minterm computation from regex predicates - Add 7 new unit tests covering all new operations with complex concats Co-authored-by: NikolajBjorner <56730610+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.h | 16 +- src/ast/euf/euf_sgraph.cpp | 141 +++++++++++++++- src/ast/euf/euf_sgraph.h | 26 +++ src/ast/euf/euf_snode.h | 25 +++ src/test/euf_sgraph.cpp | 304 +++++++++++++++++++++++++++++++++++ 5 files changed, 501 insertions(+), 11 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index 24624c6e0..8fc435892 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -93,22 +93,18 @@ namespace euf { bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } bool is_str_concat(enode* n, enode*& a, enode*& b) { expr* ea = nullptr, *eb = nullptr; - if (!m_seq.str.is_concat(n->get_expr(), ea, eb)) - return false; - a = n->get_arg(0); - b = n->get_arg(1); - return true; + return m_seq.str.is_concat(n->get_expr(), ea, eb) && + n->num_args() == 2 && + (a = n->get_arg(0), b = n->get_arg(1), true); } // regex concat predicates bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); } bool is_re_concat(enode* n, enode*& a, enode*& b) { expr* ea = nullptr, *eb = nullptr; - if (!m_seq.re.is_concat(n->get_expr(), ea, eb)) - return false; - a = n->get_arg(0); - b = n->get_arg(1); - return true; + return m_seq.re.is_concat(n->get_expr(), ea, eb) && + n->num_args() == 2 && + (a = n->get_arg(0), b = n->get_arg(1), true); } // any concat, string or regex diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index 84f5de73c..f8c518131 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -26,8 +26,10 @@ namespace euf { sgraph::sgraph(ast_manager& m): m(m), m_seq(m), + m_rewriter(m), m_egraph(m), - m_exprs(m) { + m_exprs(m), + m_str_sort(m_seq.str.mk_string_sort(), m) { // create seq_plugin and register it with the egraph m_egraph.add_plugin(alloc(seq_plugin, m_egraph)); // register on_make callback so sgraph creates snodes for new enodes @@ -354,6 +356,143 @@ namespace euf { m_egraph.pop(num_scopes); } + snode* sgraph::mk_var(symbol const& name) { + expr_ref e(m.mk_const(name, m_str_sort), m); + return mk(e); + } + + snode* sgraph::mk_char(unsigned ch) { + expr_ref c(m_seq.str.mk_char(ch), m); + expr_ref u(m_seq.str.mk_unit(c), m); + return mk(u); + } + + snode* sgraph::mk_empty() { + expr_ref e(m_seq.str.mk_empty(m_str_sort), m); + return mk(e); + } + + snode* sgraph::mk_concat(snode* a, snode* b) { + if (a->is_empty()) return b; + if (b->is_empty()) return a; + expr_ref e(m_seq.str.mk_concat(a->get_expr(), b->get_expr()), m); + return mk(e); + } + + snode* sgraph::drop_first(snode* n) { + if (n->is_empty() || n->is_token()) + return mk_empty(); + SASSERT(n->is_concat()); + snode* l = n->arg(0); + snode* r = n->arg(1); + if (l->is_token() || l->is_empty()) + return r; + return mk_concat(drop_first(l), r); + } + + snode* sgraph::drop_last(snode* n) { + if (n->is_empty() || n->is_token()) + return mk_empty(); + SASSERT(n->is_concat()); + snode* l = n->arg(0); + snode* r = n->arg(1); + if (r->is_token() || r->is_empty()) + return l; + return mk_concat(l, drop_last(r)); + } + + snode* sgraph::drop_left(snode* n, unsigned count) { + for (unsigned i = 0; i < count && !n->is_empty(); ++i) + n = drop_first(n); + return n; + } + + snode* sgraph::drop_right(snode* n, unsigned count) { + for (unsigned i = 0; i < count && !n->is_empty(); ++i) + n = drop_last(n); + return n; + } + + snode* sgraph::subst(snode* n, snode* var, snode* replacement) { + if (n == var) + return replacement; + if (n->is_empty() || n->is_char()) + return n; + if (n->is_concat()) + return mk_concat(subst(n->arg(0), var, replacement), + subst(n->arg(1), var, replacement)); + // for non-concat compound nodes (power, star, etc.), no substitution into children + return n; + } + + snode* sgraph::brzozowski_deriv(snode* re, snode* elem) { + expr* re_expr = re->get_expr(); + expr* elem_expr = elem->get_expr(); + if (!re_expr || !elem_expr) + return nullptr; + // unwrap str.unit to get the character expression + expr* ch = nullptr; + if (m_seq.str.is_unit(elem_expr, ch)) + elem_expr = ch; + expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr); + if (!result) + return nullptr; + return mk(result); + } + + void sgraph::collect_re_predicates(snode* re, expr_ref_vector& preds) { + if (!re || !re->get_expr()) + return; + expr* e = re->get_expr(); + expr* ch = nullptr, *lo = nullptr, *hi = nullptr; + // leaf regex predicates: character ranges and single characters + if (m_seq.re.is_range(e, lo, hi)) { + preds.push_back(e); + return; + } + if (m_seq.re.is_to_re(e)) + return; + if (m_seq.re.is_full_char(e)) + return; + if (m_seq.re.is_full_seq(e)) + return; + if (m_seq.re.is_empty(e)) + return; + // recurse into compound regex operators + for (unsigned i = 0; i < re->num_args(); ++i) + collect_re_predicates(re->arg(i), preds); + } + + void sgraph::compute_minterms(snode* re, snode_vector& minterms) { + // extract character predicates from the regex + expr_ref_vector preds(m); + collect_re_predicates(re, preds); + if (preds.empty()) { + // no predicates means the whole alphabet is one minterm + // represented by full_char + expr_ref fc(m_seq.re.mk_full_char(m_str_sort), m); + minterms.push_back(mk(fc)); + return; + } + // generate minterms as conjunctions/negations of predicates + // for n predicates, there are up to 2^n minterms + unsigned n = preds.size(); + for (unsigned mask = 0; mask < (1u << n); ++mask) { + expr_ref_vector conj(m); + for (unsigned i = 0; i < n; ++i) { + if (mask & (1u << i)) + conj.push_back(preds.get(i)); + else + conj.push_back(m_seq.re.mk_complement(preds.get(i))); + } + // intersect all terms + expr_ref mt(conj.get(0), m); + for (unsigned i = 1; i < conj.size(); ++i) + mt = m_seq.re.mk_inter(mt, conj.get(i)); + minterms.push_back(mk(mt)); + } + } + std::ostream& sgraph::display(std::ostream& out) const { auto kind_str = [](snode_kind k) -> char const* { switch (k) { diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index d6da7e184..2eedb3c75 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -10,6 +10,7 @@ Abstract: Sequence/string graph layer Encapsulates string and regex expressions for the string solver. + Implements the string graph layer from ZIPT (https://github.com/CEisenhofer/ZIPT). The sgraph maps Z3 sequence/regex AST expressions to snode structures organized as binary concatenation trees with metadata, and owns an egraph with a seq_plugin for congruence closure. @@ -59,6 +60,7 @@ Author: #include "util/statistics.h" #include "ast/ast.h" #include "ast/seq_decl_plugin.h" +#include "ast/rewriter/seq_rewriter.h" #include "ast/euf/euf_snode.h" #include "ast/euf/euf_egraph.h" @@ -79,10 +81,12 @@ namespace euf { ast_manager& m; seq_util m_seq; + seq_rewriter m_rewriter; egraph m_egraph; region m_region; snode_vector m_nodes; expr_ref_vector m_exprs; // pin expressions + sort_ref m_str_sort; // cached string sort unsigned_vector m_scopes; unsigned m_num_scopes = 0; stats m_stats; @@ -93,6 +97,7 @@ namespace euf { snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); snode_kind classify(expr* e) const; void compute_metadata(snode* n); + void collect_re_predicates(snode* re, expr_ref_vector& preds); public: sgraph(ast_manager& m); @@ -112,6 +117,27 @@ namespace euf { // register expression in both sgraph and egraph enode* mk_enode(expr* e); + // factory methods for creating snodes with corresponding expressions + snode* mk_var(symbol const& name); + snode* mk_char(unsigned ch); + snode* mk_empty(); + snode* mk_concat(snode* a, snode* b); + + // drop operations: remove tokens from the front/back of a concat tree + snode* drop_first(snode* n); + snode* drop_last(snode* n); + snode* drop_left(snode* n, unsigned count); + snode* drop_right(snode* n, unsigned count); + + // substitution: replace all occurrences of var in n by replacement + snode* subst(snode* n, snode* var, snode* replacement); + + // Brzozowski derivative of regex re with respect to element elem + snode* brzozowski_deriv(snode* re, snode* elem); + + // compute minterms (character class partition) from a regex + void compute_minterms(snode* re, snode_vector& minterms); + // scope management for backtracking void push(); void pop(unsigned num_scopes); diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index 40ce8f4f8..26de2d3ec 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -157,6 +157,31 @@ namespace euf { s = s->arg(1); return s; } + + // collect all leaf tokens in left-to-right order + void collect_tokens(snode_vector& tokens) const { + if (is_concat()) { + arg(0)->collect_tokens(tokens); + arg(1)->collect_tokens(tokens); + } + else if (!is_empty()) { + tokens.push_back(const_cast(this)); + } + } + + // access the i-th token (0-based, left-to-right order) + // returns nullptr if i >= length() + snode* at(unsigned i) const { + if (is_concat()) { + unsigned left_len = arg(0)->length(); + if (i < left_len) + return arg(0)->at(i); + return arg(1)->at(i - left_len); + } + if (is_empty()) + return nullptr; + return i == 0 ? const_cast(this) : nullptr; + } }; } diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp index 7f7760e4e..028573bec 100644 --- a/src/test/euf_sgraph.cpp +++ b/src/test/euf_sgraph.cpp @@ -431,6 +431,303 @@ static void test_sgraph_display() { std::cout << out; } +// test sgraph factory methods: mk_var, mk_char, mk_empty, mk_concat +static void test_sgraph_factory() { + std::cout << "test_sgraph_factory\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + + // mk_var + euf::snode* x = sg.mk_var(symbol("x")); + SASSERT(x && x->is_var()); + SASSERT(!x->is_ground()); + SASSERT(x->length() == 1); + + // mk_char + euf::snode* a = sg.mk_char('A'); + SASSERT(a && a->is_char()); + SASSERT(a->is_ground()); + SASSERT(a->length() == 1); + + // mk_empty + euf::snode* e = sg.mk_empty(); + SASSERT(e && e->is_empty()); + SASSERT(e->is_nullable()); + SASSERT(e->length() == 0); + + // mk_concat with empty absorption + euf::snode* xe = sg.mk_concat(x, e); + SASSERT(xe == x); + euf::snode* ex = sg.mk_concat(e, x); + SASSERT(ex == x); + + // mk_concat of two variables + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* xy = sg.mk_concat(x, y); + SASSERT(xy && xy->is_concat()); + SASSERT(xy->length() == 2); + SASSERT(xy->arg(0) == x); + SASSERT(xy->arg(1) == y); + + // mk_concat of multiple characters + euf::snode* b = sg.mk_char('B'); + euf::snode* c = sg.mk_char('C'); + euf::snode* abc = sg.mk_concat(sg.mk_concat(a, b), c); + SASSERT(abc->length() == 3); + SASSERT(abc->is_ground()); + SASSERT(abc->first() == a); + SASSERT(abc->last() == c); +} + +// test snode::at() and snode::collect_tokens() +static void test_sgraph_indexing() { + std::cout << "test_sgraph_indexing\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* c = sg.mk_char('C'); + euf::snode* x = sg.mk_var(symbol("x")); + + // build concat(concat(a, b), concat(c, x)) => [A, B, C, x] + euf::snode* ab = sg.mk_concat(a, b); + euf::snode* cx = sg.mk_concat(c, x); + euf::snode* abcx = sg.mk_concat(ab, cx); + + SASSERT(abcx->length() == 4); + + // test at() + SASSERT(abcx->at(0) == a); + SASSERT(abcx->at(1) == b); + SASSERT(abcx->at(2) == c); + SASSERT(abcx->at(3) == x); + SASSERT(abcx->at(4) == nullptr); // out of bounds + + // test collect_tokens() + euf::snode_vector tokens; + abcx->collect_tokens(tokens); + SASSERT(tokens.size() == 4); + SASSERT(tokens[0] == a); + SASSERT(tokens[1] == b); + SASSERT(tokens[2] == c); + SASSERT(tokens[3] == x); + + // single token: at(0) is self + SASSERT(a->at(0) == a); + SASSERT(a->at(1) == nullptr); + + // empty: at(0) is nullptr + euf::snode* e = sg.mk_empty(); + SASSERT(e->at(0) == nullptr); + euf::snode_vector empty_tokens; + e->collect_tokens(empty_tokens); + SASSERT(empty_tokens.empty()); +} + +// test sgraph drop operations +static void test_sgraph_drop() { + std::cout << "test_sgraph_drop\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* c = sg.mk_char('C'); + euf::snode* d = sg.mk_char('D'); + + // build concat(concat(a, b), concat(c, d)) => [A, B, C, D] + euf::snode* ab = sg.mk_concat(a, b); + euf::snode* cd = sg.mk_concat(c, d); + euf::snode* abcd = sg.mk_concat(ab, cd); + + SASSERT(abcd->length() == 4); + + // drop_first: [A, B, C, D] => [B, C, D] + euf::snode* bcd = sg.drop_first(abcd); + SASSERT(bcd->length() == 3); + SASSERT(bcd->first() == b); + SASSERT(bcd->last() == d); + + // drop_last: [A, B, C, D] => [A, B, C] + euf::snode* abc = sg.drop_last(abcd); + SASSERT(abc->length() == 3); + SASSERT(abc->first() == a); + SASSERT(abc->last() == c); + + // drop_left(2): [A, B, C, D] => [C, D] + euf::snode* cd2 = sg.drop_left(abcd, 2); + SASSERT(cd2->length() == 2); + SASSERT(cd2->first() == c); + + // drop_right(2): [A, B, C, D] => [A, B] + euf::snode* ab2 = sg.drop_right(abcd, 2); + SASSERT(ab2->length() == 2); + SASSERT(ab2->last() == b); + + // drop all: [A, B, C, D] => empty + euf::snode* empty = sg.drop_left(abcd, 4); + SASSERT(empty->is_empty()); + + // drop from single token: [A] => empty + euf::snode* e = sg.drop_first(a); + SASSERT(e->is_empty()); + + // drop from empty: no change + euf::snode* ee = sg.drop_first(sg.mk_empty()); + SASSERT(ee->is_empty()); +} + +// test sgraph substitution +static void test_sgraph_subst() { + std::cout << "test_sgraph_subst\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + + // concat(x, concat(a, x)) with x -> b gives concat(b, concat(a, b)) + euf::snode* ax = sg.mk_concat(a, x); + euf::snode* xax = sg.mk_concat(x, ax); + SASSERT(xax->length() == 3); + + euf::snode* result = sg.subst(xax, x, b); + SASSERT(result->length() == 3); + SASSERT(result->first() == b); + SASSERT(result->last() == b); + SASSERT(result->at(1) == a); // middle is still 'A' + + // substitution of non-occurring variable is identity + euf::snode* same = sg.subst(xax, y, b); + SASSERT(same == xax); + + // substitution of variable with empty + euf::snode* e = sg.mk_empty(); + euf::snode* collapsed = sg.subst(xax, x, e); + SASSERT(collapsed->length() == 1); // just 'a' remains + SASSERT(collapsed == a); +} + +// test complex concatenation creation, merging and simplification +static void test_sgraph_complex_concat() { + std::cout << "test_sgraph_complex_concat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + + // build a string "HELLO" = concat(H, concat(E, concat(L, concat(L, O)))) + euf::snode* h = sg.mk_char('H'); + euf::snode* e = sg.mk_char('E'); + euf::snode* l = sg.mk_char('L'); + euf::snode* o = sg.mk_char('O'); + + euf::snode* lo = sg.mk_concat(l, o); + euf::snode* llo = sg.mk_concat(l, lo); + euf::snode* ello = sg.mk_concat(e, llo); + euf::snode* hello = sg.mk_concat(h, ello); + + SASSERT(hello->length() == 5); + SASSERT(hello->is_ground()); + SASSERT(hello->first() == h); + SASSERT(hello->last() == o); + + // index into "HELLO" + SASSERT(hello->at(0) == h); + SASSERT(hello->at(1) == e); + SASSERT(hello->at(2) == l); + SASSERT(hello->at(3) == l); + SASSERT(hello->at(4) == o); + + // drop first 2 from "HELLO" => "LLO" + euf::snode* llo2 = sg.drop_left(hello, 2); + SASSERT(llo2->length() == 3); + SASSERT(llo2->first() == l); + + // drop last 3 from "HELLO" => "HE" + euf::snode* he = sg.drop_right(hello, 3); + SASSERT(he->length() == 2); + SASSERT(he->first() == h); + SASSERT(he->last() == e); + + // mixed variables and characters: concat(x, "AB", y) + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* ab = sg.mk_concat(a, b); + euf::snode* xab = sg.mk_concat(x, ab); + euf::snode* xaby = sg.mk_concat(xab, y); + + SASSERT(xaby->length() == 4); + SASSERT(!xaby->is_ground()); + SASSERT(xaby->at(0) == x); + SASSERT(xaby->at(1) == a); + SASSERT(xaby->at(2) == b); + SASSERT(xaby->at(3) == y); +} + +// test Brzozowski derivative computation +static void test_sgraph_brzozowski() { + std::cout << "test_sgraph_brzozowski\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + // derivative of re.star(to_re("a")) w.r.t. 'a' + // d/da (a*) = a* + expr_ref ch_a(seq.str.mk_char('a'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref star_a(seq.re.mk_star(to_re_a), m); + + euf::snode* s_star_a = sg.mk(star_a); + euf::snode* s_unit_a = sg.mk(unit_a); + + euf::snode* deriv = sg.brzozowski_deriv(s_star_a, s_unit_a); + SASSERT(deriv != nullptr); + std::cout << " d/da(a*) kind: " << (int)deriv->kind() << "\n"; + + // derivative of re.empty w.r.t. 'a' should be re.empty + sort_ref re_sort(seq.re.mk_re(str_sort), m); + expr_ref re_empty(seq.re.mk_empty(re_sort), m); + euf::snode* s_empty = sg.mk(re_empty); + euf::snode* deriv_empty = sg.brzozowski_deriv(s_empty, s_unit_a); + SASSERT(deriv_empty != nullptr); + SASSERT(deriv_empty->is_fail()); // derivative of empty set is empty set + std::cout << " d/da(empty) kind: " << (int)deriv_empty->kind() << "\n"; + + sg.display(std::cout); +} + +// test minterm computation +static void test_sgraph_minterms() { + std::cout << "test_sgraph_minterms\n"; + ast_manager m; + reg_decl_plugins(m); + euf::sgraph sg(m); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + // simple regex with no character predicates: re.all (.*) + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* s_re_all = sg.mk(re_all); + + euf::snode_vector minterms; + sg.compute_minterms(s_re_all, minterms); + // no predicates => single minterm (full_char) + SASSERT(minterms.size() == 1); + std::cout << " re.all minterms: " << minterms.size() << "\n"; +} + void tst_euf_sgraph() { test_sgraph_classify(); test_sgraph_regex(); @@ -443,4 +740,11 @@ void tst_euf_sgraph() { test_sgraph_first_last(); test_sgraph_concat_metadata(); test_sgraph_display(); + test_sgraph_factory(); + test_sgraph_indexing(); + test_sgraph_drop(); + test_sgraph_subst(); + test_sgraph_complex_concat(); + test_sgraph_brzozowski(); + test_sgraph_minterms(); } From 1274a1e3c030214f795018906d41732c2c7a6c0f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:29:41 +0000 Subject: [PATCH 4/4] Add safety cap on minterm computation and assertion for invariant Co-authored-by: NikolajBjorner <56730610+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_sgraph.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index f8c518131..3168c2263 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -477,6 +477,9 @@ namespace euf { // generate minterms as conjunctions/negations of predicates // for n predicates, there are up to 2^n minterms unsigned n = preds.size(); + // cap at reasonable size to prevent exponential blowup + if (n > 20) + n = 20; for (unsigned mask = 0; mask < (1u << n); ++mask) { expr_ref_vector conj(m); for (unsigned i = 0; i < n; ++i) { @@ -485,6 +488,7 @@ namespace euf { else conj.push_back(m_seq.re.mk_complement(preds.get(i))); } + SASSERT(!conj.empty()); // intersect all terms expr_ref mt(conj.get(0), m); for (unsigned i = 1; i < conj.size(); ++i)