From 5bbeedaff0fb41852ce1bcf983830c9bd7604b01 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 2 Mar 2026 18:32:56 +0000
Subject: [PATCH 1/4] Initial plan


From 40b99311e32b3777a613a51afe834033b418b4c0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 2 Mar 2026 18:58:21 +0000
Subject: [PATCH 2/4] Fix enode_concat_eq leaf comparison, simplify
 is_str/re_concat, update sgraph summary

Address remaining review feedback from PR #8820:
- Use pointer comparison instead of ID comparison in enode_concat_eq
- Remove redundant num_args() == 2 check from is_str_concat/is_re_concat
- Update euf_sgraph.h abstract to detail seq_plugin features

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
 src/ast/euf/euf_seq_plugin.cpp |  2 +-
 src/ast/euf/euf_seq_plugin.h   | 16 ++++++++++------
 src/ast/euf/euf_sgraph.h       | 15 +++++++++------
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp
index 961ae0407..da366eb1f 100644
--- a/src/ast/euf/euf_seq_plugin.cpp
+++ b/src/ast/euf/euf_seq_plugin.cpp
@@ -66,7 +66,7 @@ namespace euf {
         if (la.size() != lb.size())
             return false;
         for (unsigned i = 0; i < la.size(); ++i)
-            if (la[i]->get_id() != lb[i]->get_id())
+            if (la[i] != lb[i])
                 return false;
         return true;
     }
diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h
index 8fc435892..24624c6e0 100644
--- a/src/ast/euf/euf_seq_plugin.h
+++ b/src/ast/euf/euf_seq_plugin.h
@@ -93,18 +93,22 @@ namespace euf {
         bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); }
         bool is_str_concat(enode* n, enode*& a, enode*& b) {
             expr* ea = nullptr, *eb = nullptr;
-            return m_seq.str.is_concat(n->get_expr(), ea, eb) &&
-                   n->num_args() == 2 &&
-                   (a = n->get_arg(0), b = n->get_arg(1), true);
+            if (!m_seq.str.is_concat(n->get_expr(), ea, eb))
+                return false;
+            a = n->get_arg(0);
+            b = n->get_arg(1);
+            return true;
         }
 
         // regex concat predicates
         bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); }
         bool is_re_concat(enode* n, enode*& a, enode*& b) {
             expr* ea = nullptr, *eb = nullptr;
-            return m_seq.re.is_concat(n->get_expr(), ea, eb) &&
-                   n->num_args() == 2 &&
-                   (a = n->get_arg(0), b = n->get_arg(1), true);
+            if (!m_seq.re.is_concat(n->get_expr(), ea, eb))
+                return false;
+            a = n->get_arg(0);
+            b = n->get_arg(1);
+            return true;
         }
 
         // any concat, string or regex
diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h
index 60c1facfe..d6da7e184 100644
--- a/src/ast/euf/euf_sgraph.h
+++ b/src/ast/euf/euf_sgraph.h
@@ -9,21 +9,24 @@ Abstract:
 
     Sequence/string graph layer
 
-    Encapsulates string expressions in the style of euf_egraph.h.
+    Encapsulates string and regex expressions for the string solver.
     The sgraph maps Z3 sequence/regex AST expressions to snode structures
     organized as binary concatenation trees with metadata, and owns an
     egraph with a seq_plugin for congruence closure.
 
-    Implemented:
     -- snode classification: empty, char, variable, unit, concat, power,
        star, loop, union, intersection, complement, fail, full_char,
        full_seq, to_re, in_re, other.
     -- Metadata computation: ground, regex_free, nullable, level, length.
-    -- Expression registration via mk, lookup via find.
+    -- Expression registration via mk(expr*), lookup via find(expr*).
     -- Scope management: push/pop with backtracking.
-    -- egraph ownership with seq_plugin for concat associativity,
-       Kleene star merging, and nullable absorption.
-    -- enode registration via mk_enode.
+    -- egraph ownership with seq_plugin for:
+       * concat associativity via associativity-respecting hash table,
+       * Kleene star merging (u.v*.v*.w = u.v*.w),
+       * nullable absorption next to .* (u.*.v.w = u.*.w when v nullable),
+       * str.++ identity elimination (concat(a, ε) = a),
+       * re.++ identity/absorption (concat(a, epsilon) = a, concat(a, ∅) = ∅).
+    -- enode registration via mk_enode(expr*).
 
     ZIPT features not yet ported:
 

From 150f1fe2ead370fad958bb2d5c404c44dc4da41f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 2 Mar 2026 19:28:32 +0000
Subject: [PATCH 3/4] Revert is_concat changes, add ZIPT URL, implement
 snode/sgraph operations and tests

- Revert is_str_concat/is_re_concat to original form (PR #8820 review)
- Add ZIPT URL (https://github.com/CEisenhofer/ZIPT) to euf_sgraph.h
- Add snode::at() for token indexing and collect_tokens() for enumeration
- Add sgraph factory methods: mk_var, mk_char, mk_empty, mk_concat
- Add sgraph drop operations: drop_first, drop_last, drop_left, drop_right
- Add sgraph substitution: subst(snode*, snode*, snode*)
- Add Brzozowski derivative via seq_rewriter::mk_derivative
- Add minterm computation from regex predicates
- Add 7 new unit tests covering all new operations with complex concats

Co-authored-by: NikolajBjorner <56730610+NikolajBjorner@users.noreply.github.com>
---
 src/ast/euf/euf_seq_plugin.h |  16 +-
 src/ast/euf/euf_sgraph.cpp   | 141 +++++++++++++++-
 src/ast/euf/euf_sgraph.h     |  26 +++
 src/ast/euf/euf_snode.h      |  25 +++
 src/test/euf_sgraph.cpp      | 304 +++++++++++++++++++++++++++++++++++
 5 files changed, 501 insertions(+), 11 deletions(-)

diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h
index 24624c6e0..8fc435892 100644
--- a/src/ast/euf/euf_seq_plugin.h
+++ b/src/ast/euf/euf_seq_plugin.h
@@ -93,22 +93,18 @@ namespace euf {
         bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); }
         bool is_str_concat(enode* n, enode*& a, enode*& b) {
             expr* ea = nullptr, *eb = nullptr;
-            if (!m_seq.str.is_concat(n->get_expr(), ea, eb))
-                return false;
-            a = n->get_arg(0);
-            b = n->get_arg(1);
-            return true;
+            return m_seq.str.is_concat(n->get_expr(), ea, eb) &&
+                   n->num_args() == 2 &&
+                   (a = n->get_arg(0), b = n->get_arg(1), true);
         }
 
         // regex concat predicates
         bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); }
         bool is_re_concat(enode* n, enode*& a, enode*& b) {
             expr* ea = nullptr, *eb = nullptr;
-            if (!m_seq.re.is_concat(n->get_expr(), ea, eb))
-                return false;
-            a = n->get_arg(0);
-            b = n->get_arg(1);
-            return true;
+            return m_seq.re.is_concat(n->get_expr(), ea, eb) &&
+                   n->num_args() == 2 &&
+                   (a = n->get_arg(0), b = n->get_arg(1), true);
         }
 
         // any concat, string or regex
diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp
index 84f5de73c..f8c518131 100644
--- a/src/ast/euf/euf_sgraph.cpp
+++ b/src/ast/euf/euf_sgraph.cpp
@@ -26,8 +26,10 @@ namespace euf {
     sgraph::sgraph(ast_manager& m):
         m(m),
         m_seq(m),
+        m_rewriter(m),
         m_egraph(m),
-        m_exprs(m) {
+        m_exprs(m),
+        m_str_sort(m_seq.str.mk_string_sort(), m) {
         // create seq_plugin and register it with the egraph
         m_egraph.add_plugin(alloc(seq_plugin, m_egraph));
         // register on_make callback so sgraph creates snodes for new enodes
@@ -354,6 +356,143 @@ namespace euf {
         m_egraph.pop(num_scopes);
     }
 
+    snode* sgraph::mk_var(symbol const& name) {
+        expr_ref e(m.mk_const(name, m_str_sort), m);
+        return mk(e);
+    }
+
+    snode* sgraph::mk_char(unsigned ch) {
+        expr_ref c(m_seq.str.mk_char(ch), m);
+        expr_ref u(m_seq.str.mk_unit(c), m);
+        return mk(u);
+    }
+
+    snode* sgraph::mk_empty() {
+        expr_ref e(m_seq.str.mk_empty(m_str_sort), m);
+        return mk(e);
+    }
+
+    snode* sgraph::mk_concat(snode* a, snode* b) {
+        if (a->is_empty()) return b;
+        if (b->is_empty()) return a;
+        expr_ref e(m_seq.str.mk_concat(a->get_expr(), b->get_expr()), m);
+        return mk(e);
+    }
+
+    snode* sgraph::drop_first(snode* n) {
+        if (n->is_empty() || n->is_token())
+            return mk_empty();
+        SASSERT(n->is_concat());
+        snode* l = n->arg(0);
+        snode* r = n->arg(1);
+        if (l->is_token() || l->is_empty())
+            return r;
+        return mk_concat(drop_first(l), r);
+    }
+
+    snode* sgraph::drop_last(snode* n) {
+        if (n->is_empty() || n->is_token())
+            return mk_empty();
+        SASSERT(n->is_concat());
+        snode* l = n->arg(0);
+        snode* r = n->arg(1);
+        if (r->is_token() || r->is_empty())
+            return l;
+        return mk_concat(l, drop_last(r));
+    }
+
+    snode* sgraph::drop_left(snode* n, unsigned count) {
+        for (unsigned i = 0; i < count && !n->is_empty(); ++i)
+            n = drop_first(n);
+        return n;
+    }
+
+    snode* sgraph::drop_right(snode* n, unsigned count) {
+        for (unsigned i = 0; i < count && !n->is_empty(); ++i)
+            n = drop_last(n);
+        return n;
+    }
+
+    snode* sgraph::subst(snode* n, snode* var, snode* replacement) {
+        if (n == var)
+            return replacement;
+        if (n->is_empty() || n->is_char())
+            return n;
+        if (n->is_concat())
+            return mk_concat(subst(n->arg(0), var, replacement),
+                             subst(n->arg(1), var, replacement));
+        // for non-concat compound nodes (power, star, etc.), no substitution into children
+        return n;
+    }
+
+    snode* sgraph::brzozowski_deriv(snode* re, snode* elem) {
+        expr* re_expr = re->get_expr();
+        expr* elem_expr = elem->get_expr();
+        if (!re_expr || !elem_expr)
+            return nullptr;
+        // unwrap str.unit to get the character expression
+        expr* ch = nullptr;
+        if (m_seq.str.is_unit(elem_expr, ch))
+            elem_expr = ch;
+        expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr);
+        if (!result)
+            return nullptr;
+        return mk(result);
+    }
+
+    void sgraph::collect_re_predicates(snode* re, expr_ref_vector& preds) {
+        if (!re || !re->get_expr())
+            return;
+        expr* e = re->get_expr();
+        expr* ch = nullptr, *lo = nullptr, *hi = nullptr;
+        // leaf regex predicates: character ranges and single characters
+        if (m_seq.re.is_range(e, lo, hi)) {
+            preds.push_back(e);
+            return;
+        }
+        if (m_seq.re.is_to_re(e))
+            return;
+        if (m_seq.re.is_full_char(e))
+            return;
+        if (m_seq.re.is_full_seq(e))
+            return;
+        if (m_seq.re.is_empty(e))
+            return;
+        // recurse into compound regex operators
+        for (unsigned i = 0; i < re->num_args(); ++i)
+            collect_re_predicates(re->arg(i), preds);
+    }
+
+    void sgraph::compute_minterms(snode* re, snode_vector& minterms) {
+        // extract character predicates from the regex
+        expr_ref_vector preds(m);
+        collect_re_predicates(re, preds);
+        if (preds.empty()) {
+            // no predicates means the whole alphabet is one minterm
+            // represented by full_char
+            expr_ref fc(m_seq.re.mk_full_char(m_str_sort), m);
+            minterms.push_back(mk(fc));
+            return;
+        }
+        // generate minterms as conjunctions/negations of predicates
+        // for n predicates, there are up to 2^n minterms
+        unsigned n = preds.size();
+        for (unsigned mask = 0; mask < (1u << n); ++mask) {
+            expr_ref_vector conj(m);
+            for (unsigned i = 0; i < n; ++i) {
+                if (mask & (1u << i))
+                    conj.push_back(preds.get(i));
+                else
+                    conj.push_back(m_seq.re.mk_complement(preds.get(i)));
+            }
+            // intersect all terms
+            expr_ref mt(conj.get(0), m);
+            for (unsigned i = 1; i < conj.size(); ++i)
+                mt = m_seq.re.mk_inter(mt, conj.get(i));
+            minterms.push_back(mk(mt));
+        }
+    }
+
     std::ostream& sgraph::display(std::ostream& out) const {
         auto kind_str = [](snode_kind k) -> char const* {
             switch (k) {
diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h
index d6da7e184..2eedb3c75 100644
--- a/src/ast/euf/euf_sgraph.h
+++ b/src/ast/euf/euf_sgraph.h
@@ -10,6 +10,7 @@ Abstract:
     Sequence/string graph layer
 
     Encapsulates string and regex expressions for the string solver.
+    Implements the string graph layer from ZIPT (https://github.com/CEisenhofer/ZIPT).
     The sgraph maps Z3 sequence/regex AST expressions to snode structures
     organized as binary concatenation trees with metadata, and owns an
     egraph with a seq_plugin for congruence closure.
@@ -59,6 +60,7 @@ Author:
 #include "util/statistics.h"
 #include "ast/ast.h"
 #include "ast/seq_decl_plugin.h"
+#include "ast/rewriter/seq_rewriter.h"
 #include "ast/euf/euf_snode.h"
 #include "ast/euf/euf_egraph.h"
 
@@ -79,10 +81,12 @@ namespace euf {
 
         ast_manager&     m;
         seq_util         m_seq;
+        seq_rewriter     m_rewriter;
         egraph           m_egraph;
         region           m_region;
         snode_vector     m_nodes;
         expr_ref_vector  m_exprs;       // pin expressions
+        sort_ref         m_str_sort;    // cached string sort
         unsigned_vector  m_scopes;
         unsigned         m_num_scopes = 0;
         stats            m_stats;
@@ -93,6 +97,7 @@ namespace euf {
         snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
         snode_kind classify(expr* e) const;
         void compute_metadata(snode* n);
+        void collect_re_predicates(snode* re, expr_ref_vector& preds);
 
     public:
         sgraph(ast_manager& m);
@@ -112,6 +117,27 @@ namespace euf {
         // register expression in both sgraph and egraph
         enode* mk_enode(expr* e);
 
+        // factory methods for creating snodes with corresponding expressions
+        snode* mk_var(symbol const& name);
+        snode* mk_char(unsigned ch);
+        snode* mk_empty();
+        snode* mk_concat(snode* a, snode* b);
+
+        // drop operations: remove tokens from the front/back of a concat tree
+        snode* drop_first(snode* n);
+        snode* drop_last(snode* n);
+        snode* drop_left(snode* n, unsigned count);
+        snode* drop_right(snode* n, unsigned count);
+
+        // substitution: replace all occurrences of var in n by replacement
+        snode* subst(snode* n, snode* var, snode* replacement);
+
+        // Brzozowski derivative of regex re with respect to element elem
+        snode* brzozowski_deriv(snode* re, snode* elem);
+
+        // compute minterms (character class partition) from a regex
+        void compute_minterms(snode* re, snode_vector& minterms);
+
         // scope management for backtracking
         void push();
         void pop(unsigned num_scopes);
diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h
index 40ce8f4f8..26de2d3ec 100644
--- a/src/ast/euf/euf_snode.h
+++ b/src/ast/euf/euf_snode.h
@@ -157,6 +157,31 @@ namespace euf {
                 s = s->arg(1);
             return s;
         }
+
+        // collect all leaf tokens in left-to-right order
+        void collect_tokens(snode_vector& tokens) const {
+            if (is_concat()) {
+                arg(0)->collect_tokens(tokens);
+                arg(1)->collect_tokens(tokens);
+            }
+            else if (!is_empty()) {
+                tokens.push_back(const_cast<snode*>(this));
+            }
+        }
+
+        // access the i-th token (0-based, left-to-right order)
+        // returns nullptr if i >= length()
+        snode* at(unsigned i) const {
+            if (is_concat()) {
+                unsigned left_len = arg(0)->length();
+                if (i < left_len)
+                    return arg(0)->at(i);
+                return arg(1)->at(i - left_len);
+            }
+            if (is_empty())
+                return nullptr;
+            return i == 0 ? const_cast<snode*>(this) : nullptr;
+        }
     };
 
 }
diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp
index 7f7760e4e..028573bec 100644
--- a/src/test/euf_sgraph.cpp
+++ b/src/test/euf_sgraph.cpp
@@ -431,6 +431,303 @@ static void test_sgraph_display() {
     std::cout << out;
 }
 
+// test sgraph factory methods: mk_var, mk_char, mk_empty, mk_concat
+static void test_sgraph_factory() {
+    std::cout << "test_sgraph_factory\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::sgraph sg(m);
+
+    // mk_var
+    euf::snode* x = sg.mk_var(symbol("x"));
+    SASSERT(x && x->is_var());
+    SASSERT(!x->is_ground());
+    SASSERT(x->length() == 1);
+
+    // mk_char
+    euf::snode* a = sg.mk_char('A');
+    SASSERT(a && a->is_char());
+    SASSERT(a->is_ground());
+    SASSERT(a->length() == 1);
+
+    // mk_empty
+    euf::snode* e = sg.mk_empty();
+    SASSERT(e && e->is_empty());
+    SASSERT(e->is_nullable());
+    SASSERT(e->length() == 0);
+
+    // mk_concat with empty absorption
+    euf::snode* xe = sg.mk_concat(x, e);
+    SASSERT(xe == x);
+    euf::snode* ex = sg.mk_concat(e, x);
+    SASSERT(ex == x);
+
+    // mk_concat of two variables
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* xy = sg.mk_concat(x, y);
+    SASSERT(xy && xy->is_concat());
+    SASSERT(xy->length() == 2);
+    SASSERT(xy->arg(0) == x);
+    SASSERT(xy->arg(1) == y);
+
+    // mk_concat of multiple characters
+    euf::snode* b = sg.mk_char('B');
+    euf::snode* c = sg.mk_char('C');
+    euf::snode* abc = sg.mk_concat(sg.mk_concat(a, b), c);
+    SASSERT(abc->length() == 3);
+    SASSERT(abc->is_ground());
+    SASSERT(abc->first() == a);
+    SASSERT(abc->last() == c);
+}
+
+// test snode::at() and snode::collect_tokens()
+static void test_sgraph_indexing() {
+    std::cout << "test_sgraph_indexing\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::sgraph sg(m);
+
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* b = sg.mk_char('B');
+    euf::snode* c = sg.mk_char('C');
+    euf::snode* x = sg.mk_var(symbol("x"));
+
+    // build concat(concat(a, b), concat(c, x)) => [A, B, C, x]
+    euf::snode* ab = sg.mk_concat(a, b);
+    euf::snode* cx = sg.mk_concat(c, x);
+    euf::snode* abcx = sg.mk_concat(ab, cx);
+
+    SASSERT(abcx->length() == 4);
+
+    // test at()
+    SASSERT(abcx->at(0) == a);
+    SASSERT(abcx->at(1) == b);
+    SASSERT(abcx->at(2) == c);
+    SASSERT(abcx->at(3) == x);
+    SASSERT(abcx->at(4) == nullptr); // out of bounds
+
+    // test collect_tokens()
+    euf::snode_vector tokens;
+    abcx->collect_tokens(tokens);
+    SASSERT(tokens.size() == 4);
+    SASSERT(tokens[0] == a);
+    SASSERT(tokens[1] == b);
+    SASSERT(tokens[2] == c);
+    SASSERT(tokens[3] == x);
+
+    // single token: at(0) is self
+    SASSERT(a->at(0) == a);
+    SASSERT(a->at(1) == nullptr);
+
+    // empty: at(0) is nullptr
+    euf::snode* e = sg.mk_empty();
+    SASSERT(e->at(0) == nullptr);
+    euf::snode_vector empty_tokens;
+    e->collect_tokens(empty_tokens);
+    SASSERT(empty_tokens.empty());
+}
+
+// test sgraph drop operations
+static void test_sgraph_drop() {
+    std::cout << "test_sgraph_drop\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::sgraph sg(m);
+
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* b = sg.mk_char('B');
+    euf::snode* c = sg.mk_char('C');
+    euf::snode* d = sg.mk_char('D');
+
+    // build concat(concat(a, b), concat(c, d)) => [A, B, C, D]
+    euf::snode* ab = sg.mk_concat(a, b);
+    euf::snode* cd = sg.mk_concat(c, d);
+    euf::snode* abcd = sg.mk_concat(ab, cd);
+
+    SASSERT(abcd->length() == 4);
+
+    // drop_first: [A, B, C, D] => [B, C, D]
+    euf::snode* bcd = sg.drop_first(abcd);
+    SASSERT(bcd->length() == 3);
+    SASSERT(bcd->first() == b);
+    SASSERT(bcd->last() == d);
+
+    // drop_last: [A, B, C, D] => [A, B, C]
+    euf::snode* abc = sg.drop_last(abcd);
+    SASSERT(abc->length() == 3);
+    SASSERT(abc->first() == a);
+    SASSERT(abc->last() == c);
+
+    // drop_left(2): [A, B, C, D] => [C, D]
+    euf::snode* cd2 = sg.drop_left(abcd, 2);
+    SASSERT(cd2->length() == 2);
+    SASSERT(cd2->first() == c);
+
+    // drop_right(2): [A, B, C, D] => [A, B]
+    euf::snode* ab2 = sg.drop_right(abcd, 2);
+    SASSERT(ab2->length() == 2);
+    SASSERT(ab2->last() == b);
+
+    // drop all: [A, B, C, D] => empty
+    euf::snode* empty = sg.drop_left(abcd, 4);
+    SASSERT(empty->is_empty());
+
+    // drop from single token: [A] => empty
+    euf::snode* e = sg.drop_first(a);
+    SASSERT(e->is_empty());
+
+    // drop from empty: no change
+    euf::snode* ee = sg.drop_first(sg.mk_empty());
+    SASSERT(ee->is_empty());
+}
+
+// test sgraph substitution
+static void test_sgraph_subst() {
+    std::cout << "test_sgraph_subst\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::sgraph sg(m);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* b = sg.mk_char('B');
+
+    // concat(x, concat(a, x)) with x -> b gives concat(b, concat(a, b))
+    euf::snode* ax = sg.mk_concat(a, x);
+    euf::snode* xax = sg.mk_concat(x, ax);
+    SASSERT(xax->length() == 3);
+
+    euf::snode* result = sg.subst(xax, x, b);
+    SASSERT(result->length() == 3);
+    SASSERT(result->first() == b);
+    SASSERT(result->last() == b);
+    SASSERT(result->at(1) == a); // middle is still 'A'
+
+    // substitution of non-occurring variable is identity
+    euf::snode* same = sg.subst(xax, y, b);
+    SASSERT(same == xax);
+
+    // substitution of variable with empty
+    euf::snode* e = sg.mk_empty();
+    euf::snode* collapsed = sg.subst(xax, x, e);
+    SASSERT(collapsed->length() == 1); // just 'a' remains
+    SASSERT(collapsed == a);
+}
+
+// test complex concatenation creation, merging and simplification
+static void test_sgraph_complex_concat() {
+    std::cout << "test_sgraph_complex_concat\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::sgraph sg(m);
+
+    // build a string "HELLO" = concat(H, concat(E, concat(L, concat(L, O))))
+    euf::snode* h = sg.mk_char('H');
+    euf::snode* e = sg.mk_char('E');
+    euf::snode* l = sg.mk_char('L');
+    euf::snode* o = sg.mk_char('O');
+
+    euf::snode* lo = sg.mk_concat(l, o);
+    euf::snode* llo = sg.mk_concat(l, lo);
+    euf::snode* ello = sg.mk_concat(e, llo);
+    euf::snode* hello = sg.mk_concat(h, ello);
+
+    SASSERT(hello->length() == 5);
+    SASSERT(hello->is_ground());
+    SASSERT(hello->first() == h);
+    SASSERT(hello->last() == o);
+
+    // index into "HELLO"
+    SASSERT(hello->at(0) == h);
+    SASSERT(hello->at(1) == e);
+    SASSERT(hello->at(2) == l);
+    SASSERT(hello->at(3) == l);
+    SASSERT(hello->at(4) == o);
+
+    // drop first 2 from "HELLO" => "LLO"
+    euf::snode* llo2 = sg.drop_left(hello, 2);
+    SASSERT(llo2->length() == 3);
+    SASSERT(llo2->first() == l);
+
+    // drop last 3 from "HELLO" => "HE"
+    euf::snode* he = sg.drop_right(hello, 3);
+    SASSERT(he->length() == 2);
+    SASSERT(he->first() == h);
+    SASSERT(he->last() == e);
+
+    // mixed variables and characters: concat(x, "AB", y)
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* b = sg.mk_char('B');
+    euf::snode* ab = sg.mk_concat(a, b);
+    euf::snode* xab = sg.mk_concat(x, ab);
+    euf::snode* xaby = sg.mk_concat(xab, y);
+
+    SASSERT(xaby->length() == 4);
+    SASSERT(!xaby->is_ground());
+    SASSERT(xaby->at(0) == x);
+    SASSERT(xaby->at(1) == a);
+    SASSERT(xaby->at(2) == b);
+    SASSERT(xaby->at(3) == y);
+}
+
+// test Brzozowski derivative computation
+static void test_sgraph_brzozowski() {
+    std::cout << "test_sgraph_brzozowski\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::sgraph sg(m);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    // derivative of re.star(to_re("a")) w.r.t. 'a'
+    // d/da (a*) = a*
+    expr_ref ch_a(seq.str.mk_char('a'), m);
+    expr_ref unit_a(seq.str.mk_unit(ch_a), m);
+    expr_ref to_re_a(seq.re.mk_to_re(unit_a), m);
+    expr_ref star_a(seq.re.mk_star(to_re_a), m);
+
+    euf::snode* s_star_a = sg.mk(star_a);
+    euf::snode* s_unit_a = sg.mk(unit_a);
+
+    euf::snode* deriv = sg.brzozowski_deriv(s_star_a, s_unit_a);
+    SASSERT(deriv != nullptr);
+    std::cout << "  d/da(a*) kind: " << (int)deriv->kind() << "\n";
+
+    // derivative of re.empty w.r.t. 'a' should be re.empty
+    sort_ref re_sort(seq.re.mk_re(str_sort), m);
+    expr_ref re_empty(seq.re.mk_empty(re_sort), m);
+    euf::snode* s_empty = sg.mk(re_empty);
+    euf::snode* deriv_empty = sg.brzozowski_deriv(s_empty, s_unit_a);
+    SASSERT(deriv_empty != nullptr);
+    SASSERT(deriv_empty->is_fail()); // derivative of empty set is empty set
+    std::cout << "  d/da(empty) kind: " << (int)deriv_empty->kind() << "\n";
+
+    sg.display(std::cout);
+}
+
+// test minterm computation
+static void test_sgraph_minterms() {
+    std::cout << "test_sgraph_minterms\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::sgraph sg(m);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    // simple regex with no character predicates: re.all (.*)
+    expr_ref re_all(seq.re.mk_full_seq(str_sort), m);
+    euf::snode* s_re_all = sg.mk(re_all);
+
+    euf::snode_vector minterms;
+    sg.compute_minterms(s_re_all, minterms);
+    // no predicates => single minterm (full_char)
+    SASSERT(minterms.size() == 1);
+    std::cout << "  re.all minterms: " << minterms.size() << "\n";
+}
+
 void tst_euf_sgraph() {
     test_sgraph_classify();
     test_sgraph_regex();
@@ -443,4 +740,11 @@ void tst_euf_sgraph() {
     test_sgraph_first_last();
     test_sgraph_concat_metadata();
     test_sgraph_display();
+    test_sgraph_factory();
+    test_sgraph_indexing();
+    test_sgraph_drop();
+    test_sgraph_subst();
+    test_sgraph_complex_concat();
+    test_sgraph_brzozowski();
+    test_sgraph_minterms();
 }

From 1274a1e3c030214f795018906d41732c2c7a6c0f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 2 Mar 2026 19:29:41 +0000
Subject: [PATCH 4/4] Add safety cap on minterm computation and assertion for
 invariant

Co-authored-by: NikolajBjorner <56730610+NikolajBjorner@users.noreply.github.com>
---
 src/ast/euf/euf_sgraph.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp
index f8c518131..3168c2263 100644
--- a/src/ast/euf/euf_sgraph.cpp
+++ b/src/ast/euf/euf_sgraph.cpp
@@ -477,6 +477,9 @@ namespace euf {
         // generate minterms as conjunctions/negations of predicates
         // for n predicates, there are up to 2^n minterms
         unsigned n = preds.size();
+        // cap at reasonable size to prevent exponential blowup
+        if (n > 20)
+            n = 20;
         for (unsigned mask = 0; mask < (1u << n); ++mask) {
             expr_ref_vector conj(m);
             for (unsigned i = 0; i < n; ++i) {
@@ -485,6 +488,7 @@ namespace euf {
                 else
                     conj.push_back(m_seq.re.mk_complement(preds.get(i)));
             }
+            SASSERT(!conj.empty());
             // intersect all terms
             expr_ref mt(conj.get(0), m);
             for (unsigned i = 1; i < conj.size(); ++i)