Implement ZIPT code review improvements: add euf_sgraph/seq_plugin files and apply 3 fixes

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
2026-07-06 15:26:15 +00:00 · 2026-03-03 03:41:13 +00:00 · 2026-03-03 03:41:13 +00:00 · 008ef90d4d
commit 008ef90d4d
parent 50ace4471f
18 changed files with 3624 additions and 2 deletions
--- a/scripts/mk_project.py
+++ b/scripts/mk_project.py
@ -68,6 +68,7 @@ def init_project_def():
    add_lib('subpaving_tactic', ['core_tactics', 'subpaving'], 'math/subpaving/tactic')

    add_lib('proto_model', ['model', 'rewriter', 'params'], 'smt/proto_model')
+    add_lib('smt_seq', ['rewriter', 'euf'], 'smt/seq')
    add_lib('smt', ['bit_blaster', 'macros', 'normal_forms', 'cmd_context', 'proto_model', 'solver_assertions',
                    'substitution', 'grobner', 'simplex', 'proofs', 'pattern', 'parser_util', 'fpa', 'lp'])
    add_lib('sat_smt', ['sat', 'ast_sls', 'euf', 'smt', 'tactic', 'solver', 'params', 'bit_blaster', 'fpa', 'mbp', 'normal_forms', 'lp', 'pattern', 'qe_lite'], 'sat/smt')
@ -100,7 +101,7 @@ def init_project_def():
    add_lib('api', ['portfolio',  'realclosure', 'opt', 'extra_cmds'],
            includes2install=['z3.h', 'z3_v1.h', 'z3_macros.h'] + API_files)
    add_exe('shell', ['api', 'sat', 'extra_cmds', 'opt'], exe_name='z3')
-    add_exe('test', ['api', 'fuzzing', 'simplex', 'sat_smt'], exe_name='test-z3', install=False)
+    add_exe('test', ['api', 'fuzzing', 'simplex', 'sat_smt', 'smt_seq'], exe_name='test-z3', install=False)
    _libz3Component = add_dll('api_dll', ['api', 'sat', 'extra_cmds'], 'api/dll',
                              reexports=['api'],
                              dll_name='libz3',
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -53,6 +53,7 @@ add_subdirectory(ackermannization)
 add_subdirectory(ast/proofs)
 add_subdirectory(ast/fpa)
 add_subdirectory(smt/proto_model)
+add_subdirectory(smt/seq)
 add_subdirectory(smt)
 add_subdirectory(tactic/bv)
 add_subdirectory(smt/tactic)
--- a/src/ast/euf/CMakeLists.txt
+++ b/src/ast/euf/CMakeLists.txt
@ -9,6 +9,8 @@ z3_add_component(euf
    euf_justification.cpp
    euf_mam.cpp
    euf_plugin.cpp
+    euf_seq_plugin.cpp
+    euf_sgraph.cpp
    euf_specrel_plugin.cpp
    ho_matcher.cpp
  COMPONENT_DEPENDENCIES
--- a/src/ast/euf/euf_seq_plugin.cpp
+++ b/src/ast/euf/euf_seq_plugin.cpp
@ -0,0 +1,296 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    euf_seq_plugin.cpp
+
+Abstract:
+
+    Plugin structure for sequences/strings.
+
+    Merges equivalence classes taking into account associativity
+    of concatenation and algebraic properties of strings and
+    regular expressions.
+
+Author:
+
+    Nikolaj Bjorner (nbjorner) 2026-03-01
+    Clemens Eisenhofer 2026-03-01
+
+--*/
+
+#include "ast/euf/euf_seq_plugin.h"
+#include "ast/euf/euf_egraph.h"
+#include "ast/euf/euf_sgraph.h"
+#include "ast/ast_pp.h"
+
+namespace euf {
+
+    // Check if enode is any kind of concat (str.++ or re.++)
+    static bool is_any_concat(enode* n, seq_util const& seq) {
+        expr* a = nullptr, *b = nullptr;
+        return seq.str.is_concat(n->get_expr(), a, b) || seq.re.is_concat(n->get_expr(), a, b);
+    }
+
+    // Collect leaves of a concat tree in left-to-right order.
+    // For non-concat nodes, the node itself is a leaf.
+    // Handles both str.++ and re.++.
+    static void collect_enode_leaves(enode* n, seq_util const& seq, enode_vector& leaves) {
+        if (is_any_concat(n, seq)) {
+            collect_enode_leaves(n->get_arg(0), seq, leaves);
+            collect_enode_leaves(n->get_arg(1), seq, leaves);
+        }
+        else {
+            leaves.push_back(n);
+        }
+    }
+
+    unsigned enode_concat_hash::operator()(enode* n) const {
+        snode* sn = sg.find(n->get_expr());
+        if (sn && sn->has_cached_hash())
+            return sn->assoc_hash();
+        if (!is_any_concat(n, seq))
+            return n->get_id();
+        enode_vector leaves;
+        collect_enode_leaves(n, seq, leaves);
+        unsigned h = 0;
+        for (enode* l : leaves)
+            h = combine_hash(h, l->get_id());
+        return h;
+    }
+
+    bool enode_concat_eq::operator()(enode* a, enode* b) const {
+        if (a == b) return true;
+        if (!is_any_concat(a, seq) || !is_any_concat(b, seq))
+            return false;
+        enode_vector la, lb;
+        collect_enode_leaves(a, seq, la);
+        collect_enode_leaves(b, seq, lb);
+        if (la.size() != lb.size())
+            return false;
+        for (unsigned i = 0; i < la.size(); ++i)
+            if (la[i] != lb[i])
+                return false;
+        return true;
+    }
+
+    seq_plugin::seq_plugin(egraph& g, sgraph* sg):
+        plugin(g),
+        m_seq(g.get_manager()),
+        m_rewriter(g.get_manager()),
+        m_sg(sg ? *sg : *alloc(sgraph, g.get_manager(), g, false)),
+        m_sg_owned(sg == nullptr),
+        m_concat_hash(m_seq, m_sg),
+        m_concat_eq(m_seq),
+        m_concat_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_concat_hash, m_concat_eq) {
+    }
+
+    seq_plugin::~seq_plugin() {
+        if (m_sg_owned)
+            dealloc(&m_sg);
+    }
+
+    void seq_plugin::register_node(enode* n) {
+        m_queue.push_back(n);
+        push_undo(undo_kind::undo_add_concat);
+    }
+
+    void seq_plugin::merge_eh(enode* n1, enode* n2) {
+        m_queue.push_back(enode_pair(n1, n2));
+        push_undo(undo_kind::undo_add_concat);
+    }
+
+    void seq_plugin::push_undo(undo_kind k) {
+        m_undo.push_back(k);
+        push_plugin_undo(get_id());
+    }
+
+    void seq_plugin::propagate() {
+        if (m_qhead == m_queue.size())
+            return;
+        for (; m_qhead < m_queue.size(); ++m_qhead) {
+            if (g.inconsistent())
+                break;
+            if (std::holds_alternative<enode*>(m_queue[m_qhead])) {
+                auto n = std::get<enode*>(m_queue[m_qhead]);
+                propagate_register_node(n);
+            }
+            else {
+                auto [a, b] = std::get<enode_pair>(m_queue[m_qhead]);
+                propagate_merge(a, b);
+            }
+        }
+    }
+
+    void seq_plugin::propagate_register_node(enode* n) {
+        if (!m_seq.is_seq(n->get_expr()) && !m_seq.is_re(n->get_expr()))
+            return;
+
+        TRACE(seq, tout << "seq register " << g.bpp(n) << "\n");
+
+        if (is_concat(n)) {
+            propagate_assoc(n);
+            propagate_simplify(n);
+        }
+
+        // str.++ identity: concat(a, ε) = a, concat(ε, b) = b
+        enode* a, *b;
+        if (is_str_concat(n, a, b)) {
+            if (is_str_empty(a))
+                push_merge(n, b);
+            else if (is_str_empty(b))
+                push_merge(n, a);
+        }
+
+        // re.++ identity: concat(a, epsilon) = a, concat(epsilon, b) = b
+        // re.++ absorption: concat(a, ∅) = ∅, concat(∅, b) = ∅
+        if (is_re_concat(n, a, b)) {
+            if (is_re_epsilon(a))
+                push_merge(n, b);
+            else if (is_re_epsilon(b))
+                push_merge(n, a);
+            else if (is_re_empty(a))
+                push_merge(n, a);
+            else if (is_re_empty(b))
+                push_merge(n, b);
+        }
+    }
+
+    void seq_plugin::propagate_merge(enode* a, enode* b) {
+        if (!m_seq.is_seq(a->get_expr()) && !m_seq.is_re(a->get_expr()))
+            return;
+
+        TRACE(seq, tout << "seq merge " << g.bpp(a) << " == " << g.bpp(b) << "\n");
+
+        // when equivalence classes merge, re-check concat simplifications
+        for (enode* n : enode_class(a)) {
+            if (is_concat(n))
+                propagate_simplify(n);
+        }
+    }
+
+    //
+    // Concat associativity:
+    // Instead of creating new expressions, maintain a hash table
+    // that respects associativity. When a concat is registered,
+    // look up existing concats with the same leaf sequence.
+    // If found, merge the existing node with the new one.
+    //
+    void seq_plugin::propagate_assoc(enode* n) {
+        if (!is_concat(n))
+            return;
+
+        enode* existing = nullptr;
+        if (m_concat_table.find(n, existing)) {
+            if (existing != n)
+                push_merge(n, existing);
+        }
+        else {
+            m_concat_table.insert(n);
+            m_concats.push_back(n);
+            push_undo(undo_kind::undo_add_to_table);
+        }
+    }
+
+    //
+    // Concat simplification rules from ZIPT:
+    //
+    // 1. Kleene star merging: concat(u, v*, v*, w) = concat(u, v*, w)
+    //    when adjacent children in a concat chain have congruent star bodies.
+    //
+    // 2. Nullable absorption: concat(u, .*, v, w) = concat(u, .*, w)
+    //    when v is nullable and adjacent to full_seq (.*).
+    //
+    void seq_plugin::propagate_simplify(enode* n) {
+        enode* a, *b;
+        if (!is_concat(n, a, b))
+            return;
+
+        // Rule 1: Kleene star merging
+        // concat(v*, v*) = v*
+        if (same_star_body(a, b))
+            push_merge(n, a);
+
+        // Rule 1 extended (right): concat(v*, concat(v*, c)) = concat(v*, c)
+        enode* b1, *b2;
+        if (is_concat(b, b1, b2) && same_star_body(a, b1))
+            push_merge(n, b);
+
+        // Rule 1 extended (left): concat(concat(c, v*), v*) = concat(c, v*)
+        enode* a1, *a2;
+        if (is_concat(a, a1, a2) && same_star_body(a2, b))
+            push_merge(n, a);
+
+        // Rule 2: Nullable absorption by .*
+        // concat(.*, v) = .* when v is nullable
+        if (is_full_seq(a) && is_nullable(b))
+            push_merge(n, a);
+
+        // concat(v, .*) = .* when v is nullable
+        if (is_nullable(a) && is_full_seq(b))
+            push_merge(n, b);
+
+        // concat(.*, concat(v, w)) = concat(.*, w) when v nullable
+        // handled by associativity + nullable absorption on sub-concats
+
+        // concat(concat(u, v), .*) = concat(u, .*) when v nullable
+        // handled by associativity + nullable absorption on sub-concats
+    }
+
+    bool seq_plugin::is_nullable(expr* e) {
+        expr_ref result = m_rewriter.is_nullable(e);
+        return g.get_manager().is_true(result);
+    }
+
+    bool seq_plugin::same_star_body(enode* a, enode* b) {
+        if (!is_star(a) || !is_star(b))
+            return false;
+        // re.star(x) and re.star(y) have congruent bodies if x ~ y
+        return a->get_arg(0)->get_root() == b->get_arg(0)->get_root();
+    }
+
+    bool seq_plugin::same_loop_body(enode* a, enode* b,
+                                    unsigned& lo1, unsigned& hi1,
+                                    unsigned& lo2, unsigned& hi2) {
+        if (!is_loop(a) || !is_loop(b))
+            return false;
+        expr* body_a, *body_b;
+        if (!m_seq.re.is_loop(a->get_expr(), body_a, lo1, hi1))
+            return false;
+        if (!m_seq.re.is_loop(b->get_expr(), body_b, lo2, hi2))
+            return false;
+        enode* na = g.find(body_a);
+        enode* nb = g.find(body_b);
+        if (!na || !nb)
+            return false;
+        return na->get_root() == nb->get_root();
+    }
+
+    void seq_plugin::undo() {
+        auto k = m_undo.back();
+        m_undo.pop_back();
+        switch (k) {
+        case undo_kind::undo_add_concat:
+            SASSERT(!m_queue.empty());
+            m_queue.pop_back();
+            if (m_qhead > m_queue.size())
+                m_qhead = m_queue.size();
+            break;
+        case undo_kind::undo_add_to_table:
+            SASSERT(!m_concats.empty());
+            m_concat_table.remove(m_concats.back());
+            m_concats.pop_back();
+            break;
+        }
+    }
+
+    std::ostream& seq_plugin::display(std::ostream& out) const {
+        out << "seq-plugin\n";
+        return out;
+    }
+
+    void seq_plugin::collect_statistics(statistics& st) const {
+        // statistics are collected by sgraph which owns us
+    }
+}
--- a/src/ast/euf/euf_seq_plugin.h
+++ b/src/ast/euf/euf_seq_plugin.h
@ -0,0 +1,175 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    euf_seq_plugin.h
+
+Abstract:
+
+    Plugin structure for sequences/strings.
+
+    Merges equivalence classes taking into account associativity
+    of concatenation and algebraic properties of strings and
+    regular expressions. Implements features from ZIPT:
+
+    -- Concat associativity: str.++ is associative, so
+       concat(a, concat(b, c)) = concat(concat(a, b), c).
+       Handled via an AC-style plugin for the concat operator.
+
+    -- Kleene star merging: adjacent identical Kleene stars
+       in a concatenation are collapsed, u.v*.v*.w = u.v*.w
+
+    -- Loop merging: adjacent loops over the same body are
+       merged, v{l1,h1}.v{l2,h2} = v{l1+l2,h1+h2}
+
+    -- Nullable absorption: a nullable token adjacent to .*
+       is absorbed, u.*.v.w = u.*.w when v is nullable.
+
+    The plugin integrates with euf_egraph for congruence closure.
+    Node registration in sgraph is handled by sgraph itself via
+    the egraph's on_make callback, not by the plugin.
+
+Author:
+
+    Nikolaj Bjorner (nbjorner) 2026-03-01
+    Clemens Eisenhofer 2026-03-01
+
+--*/
+
+#pragma once
+
+#include "ast/seq_decl_plugin.h"
+#include "ast/rewriter/seq_rewriter.h"
+#include "ast/euf/euf_plugin.h"
+#include "util/hashtable.h"
+
+namespace euf {
+
+    class egraph;
+    class sgraph;
+
+    // Associativity-respecting hash for enode concat trees.
+    // Uses cached snode hash matrices from the sgraph for O(1) hashing.
+    // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT).
+    struct enode_concat_hash {
+        seq_util const& seq;
+        sgraph& sg;
+        enode_concat_hash(seq_util const& s, sgraph& sg) : seq(s), sg(sg) {}
+        unsigned operator()(enode* n) const;
+    };
+
+    // Associativity-respecting equality for enode concat trees.
+    // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT).
+    struct enode_concat_eq {
+        seq_util const& seq;
+        enode_concat_eq(seq_util const& s) : seq(s) {}
+        bool operator()(enode* a, enode* b) const;
+    };
+
+    class seq_plugin : public plugin {
+
+        enum class undo_kind {
+            undo_add_concat,
+            undo_add_to_table,
+        };
+
+        seq_util         m_seq;
+        seq_rewriter     m_rewriter;
+        sgraph&          m_sg;
+        bool             m_sg_owned = false; // whether we own the sgraph
+        svector<undo_kind> m_undo;
+
+        // queue of merges and registrations to process
+        vector<std::variant<enode*, enode_pair>> m_queue;
+        unsigned         m_qhead = 0;
+
+        // track registered concat nodes for simplification
+        enode_vector     m_concats;
+
+        // associativity-respecting hash table for concat nodes
+        enode_concat_hash m_concat_hash;
+        enode_concat_eq   m_concat_eq;
+        hashtable<enode*, enode_concat_hash, enode_concat_eq> m_concat_table;
+
+        // string concat predicates
+        bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); }
+        bool is_str_concat(enode* n, enode*& a, enode*& b) {
+            expr* ea = nullptr, *eb = nullptr;
+            return m_seq.str.is_concat(n->get_expr(), ea, eb) &&
+                   n->num_args() == 2 &&
+                   (a = n->get_arg(0), b = n->get_arg(1), true);
+        }
+
+        // regex concat predicates
+        bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); }
+        bool is_re_concat(enode* n, enode*& a, enode*& b) {
+            expr* ea = nullptr, *eb = nullptr;
+            return m_seq.re.is_concat(n->get_expr(), ea, eb) &&
+                   n->num_args() == 2 &&
+                   (a = n->get_arg(0), b = n->get_arg(1), true);
+        }
+
+        // any concat, string or regex
+        bool is_concat(enode* n) const { return is_str_concat(n) || is_re_concat(n); }
+        bool is_concat(enode* n, enode*& a, enode*& b) {
+            return is_str_concat(n, a, b) || is_re_concat(n, a, b);
+        }
+
+        bool is_star(enode* n) const { return m_seq.re.is_star(n->get_expr()); }
+        bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); }
+
+        // string empty: ε for str.++
+        bool is_str_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); }
+        // regex empty set: ∅ for re.++ (absorbing element)
+        bool is_re_empty(enode* n) const { return m_seq.re.is_empty(n->get_expr()); }
+        // regex epsilon: to_re("") for re.++ (identity element)
+        bool is_re_epsilon(enode* n) const { return m_seq.re.is_epsilon(n->get_expr()); }
+
+        bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); }
+        bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); }
+
+        void push_undo(undo_kind k);
+
+        void propagate_register_node(enode* n);
+        void propagate_merge(enode* a, enode* b);
+
+        // concat associativity: maintain hash table of concat nodes,
+        // merge nodes that are equal modulo associativity
+        void propagate_assoc(enode* n);
+
+        // concat simplification:
+        // merging Kleene stars, merging loops, absorbing nullables
+        void propagate_simplify(enode* n);
+
+        // check if expression is nullable using existing seq_rewriter
+        bool is_nullable(expr* e);
+        bool is_nullable(enode* n) { return is_nullable(n->get_expr()); }
+
+        // check if two enodes have congruent star bodies
+        bool same_star_body(enode* a, enode* b);
+
+        // check if two enodes have congruent loop bodies and extract bounds
+        bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2);
+
+    public:
+        seq_plugin(egraph& g, sgraph* sg = nullptr);
+        ~seq_plugin() override;
+
+        theory_id get_id() const override { return m_seq.get_family_id(); }
+
+        void register_node(enode* n) override;
+
+        void merge_eh(enode* n1, enode* n2) override;
+
+        void diseq_eh(enode*) override {}
+
+        void propagate() override;
+
+        void undo() override;
+
+        std::ostream& display(std::ostream& out) const override;
+
+        void collect_statistics(statistics& st) const override;
+    };
+}
--- a/src/ast/euf/euf_sgraph.cpp
+++ b/src/ast/euf/euf_sgraph.cpp
@ -0,0 +1,637 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    euf_sgraph.cpp
+
+Abstract:
+
+    Sequence/string graph implementation
+
+Author:
+
+    Nikolaj Bjorner (nbjorner) 2026-03-01
+    Clemens Eisenhofer 2026-03-01
+
+--*/
+
+#include "ast/euf/euf_sgraph.h"
+#include "ast/euf/euf_seq_plugin.h"
+#include "ast/arith_decl_plugin.h"
+#include "ast/ast_pp.h"
+
+namespace euf {
+
+    // substitution cache stored on snode for ZIPT-style optimization
+    struct snode_subst_cache {
+        struct entry {
+            unsigned var_id;
+            unsigned repl_id;
+            snode* result;
+        };
+        svector<entry> m_entries;
+        snode* find(unsigned var_id, unsigned repl_id) const {
+            for (auto const& e : m_entries)
+                if (e.var_id == var_id && e.repl_id == repl_id)
+                    return e.result;
+            return nullptr;
+        }
+        void insert(unsigned var_id, unsigned repl_id, snode* result) {
+            m_entries.push_back({var_id, repl_id, result});
+        }
+    };
+
+    sgraph::sgraph(ast_manager& m, egraph& eg, bool add_plugin):
+        m(m),
+        m_seq(m),
+        m_rewriter(m),
+        m_egraph(eg),
+        m_str_sort(m_seq.str.mk_string_sort(), m),
+        m_add_plugin(add_plugin) {
+        // create seq_plugin and register it with the egraph
+        if (add_plugin)
+            m_egraph.add_plugin(alloc(seq_plugin, m_egraph, this));
+        // register on_make callback so sgraph creates snodes for new enodes
+        std::function<void(enode*)> on_make = [this](enode* n) {
+            expr* e = n->get_expr();
+            if (m_seq.is_seq(e) || m_seq.is_re(e))
+                mk(e);
+        };
+        m_egraph.set_on_make(on_make);
+    }
+
+    sgraph::~sgraph() {
+        for (auto* c : m_subst_caches)
+            dealloc(c);
+    }
+
+    snode_kind sgraph::classify(expr* e) const {
+        if (!is_app(e))
+            return snode_kind::s_other;
+
+        if (m_seq.str.is_empty(e))
+            return snode_kind::s_empty;
+
+        if (m_seq.str.is_string(e)) {
+            zstring s;
+            if (m_seq.str.is_string(e, s) && s.empty())
+                return snode_kind::s_empty;
+            return snode_kind::s_other;
+        }
+
+        if (m_seq.str.is_concat(e))
+            return snode_kind::s_concat;
+
+        if (m_seq.str.is_unit(e)) {
+            expr* ch = to_app(e)->get_arg(0);
+            if (m_seq.is_const_char(ch))
+                return snode_kind::s_char;
+            return snode_kind::s_unit;
+        }
+
+        if (m_seq.str.is_power(e))
+            return snode_kind::s_power;
+
+        if (m_seq.re.is_star(e))
+            return snode_kind::s_star;
+
+        if (m_seq.re.is_loop(e))
+            return snode_kind::s_loop;
+
+        if (m_seq.re.is_union(e))
+            return snode_kind::s_union;
+
+        if (m_seq.re.is_intersection(e))
+            return snode_kind::s_intersect;
+
+        if (m_seq.re.is_complement(e))
+            return snode_kind::s_complement;
+
+        if (m_seq.re.is_empty(e))
+            return snode_kind::s_fail;
+
+        if (m_seq.re.is_full_char(e))
+            return snode_kind::s_full_char;
+
+        if (m_seq.re.is_full_seq(e))
+            return snode_kind::s_full_seq;
+
+        if (m_seq.re.is_to_re(e))
+            return snode_kind::s_to_re;
+
+        if (m_seq.str.is_in_re(e))
+            return snode_kind::s_in_re;
+
+        // uninterpreted constants of string sort are variables
+        if (is_uninterp_const(e) && m_seq.is_seq(e->get_sort()))
+            return snode_kind::s_var;
+
+        return snode_kind::s_other;
+    }
+
+    void sgraph::compute_metadata(snode* n) {
+        switch (n->m_kind) {
+        case snode_kind::s_empty:
+            n->m_ground = true;
+            n->m_regex_free = true;
+            n->m_nullable = true;
+            n->m_level = 0;
+            n->m_length = 0;
+            break;
+
+        case snode_kind::s_char:
+            n->m_ground = true;
+            n->m_regex_free = true;
+            n->m_nullable = false;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_var:
+            n->m_ground = false;
+            n->m_regex_free = true;
+            n->m_nullable = false;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_unit:
+            n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true;
+            n->m_regex_free = true;
+            n->m_nullable = false;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_concat: {
+            SASSERT(n->num_args() == 2);
+            snode* l = n->arg(0);
+            snode* r = n->arg(1);
+            n->m_ground = l->is_ground() && r->is_ground();
+            n->m_regex_free = l->is_regex_free() && r->is_regex_free();
+            n->m_nullable = l->is_nullable() && r->is_nullable();
+            n->m_level = std::max(l->level(), r->level()) + 1;
+            n->m_length = l->length() + r->length();
+            ++m_stats.m_num_concat;
+            break;
+        }
+
+        case snode_kind::s_power: {
+            // s^n: nullable follows base, consistent with ZIPT's PowerToken
+            // the exponent n is assumed to be a symbolic integer, may or may not be zero
+            SASSERT(n->num_args() >= 1);
+            snode* base = n->arg(0);
+            n->m_ground = base->is_ground();
+            n->m_regex_free = base->is_regex_free();
+            n->m_nullable = base->is_nullable();
+            n->m_level = 1;
+            n->m_length = 1;
+            ++m_stats.m_num_power;
+            break;
+        }
+
+        case snode_kind::s_star:
+            SASSERT(n->num_args() == 1);
+            n->m_ground = n->arg(0)->is_ground();
+            n->m_regex_free = false;
+            n->m_nullable = true;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_loop: {
+            bool base_nullable = n->num_args() > 0 && n->arg(0)->is_nullable();
+            unsigned lo = 0, hi = 0;
+            expr* body = nullptr;
+            bool lo_zero = n->get_expr() && m_seq.re.is_loop(n->get_expr(), body, lo, hi) && lo == 0;
+            n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true;
+            n->m_regex_free = false;
+            n->m_nullable = lo_zero || base_nullable;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+        }
+
+        case snode_kind::s_union:
+            SASSERT(n->num_args() == 2);
+            n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
+            n->m_regex_free = false;
+            n->m_nullable = n->arg(0)->is_nullable() || n->arg(1)->is_nullable();
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_intersect:
+            SASSERT(n->num_args() == 2);
+            n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
+            n->m_regex_free = false;
+            n->m_nullable = n->arg(0)->is_nullable() && n->arg(1)->is_nullable();
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_complement:
+            SASSERT(n->num_args() == 1);
+            n->m_ground = n->arg(0)->is_ground();
+            n->m_regex_free = false;
+            n->m_nullable = !n->arg(0)->is_nullable();
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_fail:
+            n->m_ground = true;
+            n->m_regex_free = false;
+            n->m_nullable = false;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_full_char:
+            n->m_ground = true;
+            n->m_regex_free = false;
+            n->m_nullable = false;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_full_seq:
+            n->m_ground = true;
+            n->m_regex_free = false;
+            n->m_nullable = true;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_to_re:
+            SASSERT(n->num_args() == 1);
+            n->m_ground = n->arg(0)->is_ground();
+            n->m_regex_free = false;
+            n->m_nullable = n->arg(0)->is_nullable();
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        case snode_kind::s_in_re:
+            SASSERT(n->num_args() == 2);
+            n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
+            n->m_regex_free = false;
+            n->m_nullable = false;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+
+        default:
+            n->m_ground = true;
+            n->m_regex_free = true;
+            n->m_nullable = false;
+            n->m_level = 1;
+            n->m_length = 1;
+            break;
+        }
+    }
+
+    static const unsigned HASH_BASE = 31;
+
+    // Compute a 2x2 polynomial hash matrix for associativity-respecting hashing.
+    // Unsigned overflow is intentional and well-defined (mod 2^32).
+    // M[0][0] tracks HASH_BASE^(num_leaves), which is always nonzero since
+    // HASH_BASE is odd. M[0][1] is the actual hash value.
+    void sgraph::compute_hash_matrix(snode* n) {
+        if (n->is_empty()) {
+            // identity matrix: concat with empty is identity
+            n->m_hash_matrix[0][0] = 1;
+            n->m_hash_matrix[0][1] = 0;
+            n->m_hash_matrix[1][0] = 0;
+            n->m_hash_matrix[1][1] = 1;
+        }
+        else if (n->is_concat()) {
+            snode* l = n->arg(0);
+            snode* r = n->arg(1);
+            if (l->has_cached_hash() && r->has_cached_hash()) {
+                // 2x2 matrix multiplication: M(L) * M(R)
+                n->m_hash_matrix[0][0] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][0];
+                n->m_hash_matrix[0][1] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][1];
+                n->m_hash_matrix[1][0] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][0];
+                n->m_hash_matrix[1][1] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][1];
+            }
+        }
+        else {
+            // leaf/token: [[HASH_BASE, value], [0, 1]]
+            // +1 avoids zero hash values; wraps safely on unsigned overflow
+            unsigned v = n->get_expr() ? n->get_expr()->get_id() + 1 : n->id() + 1;
+            n->m_hash_matrix[0][0] = HASH_BASE;
+            n->m_hash_matrix[0][1] = v;
+            n->m_hash_matrix[1][0] = 0;
+            n->m_hash_matrix[1][1] = 1;
+        }
+    }
+
+    snode* sgraph::mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args) {
+        unsigned id = m_nodes.size();
+        snode* n = snode::mk(m_region, e, k, id, num_args, args);
+        compute_metadata(n);
+        compute_hash_matrix(n);
+        m_nodes.push_back(n);
+        if (e) {
+            unsigned eid = e->get_id();
+            m_expr2snode.reserve(eid + 1, nullptr);
+            m_expr2snode[eid] = n;
+            // pin expression via egraph (the egraph has an expr trail)
+            mk_enode(e);
+        }
+        ++m_stats.m_num_nodes;
+        return n;
+    }
+
+    snode* sgraph::mk(expr* e) {
+        SASSERT(e);
+        snode* n = find(e);
+        if (n)
+            return n;
+
+        snode_kind k = classify(e);
+
+        if (!is_app(e))
+            return mk_snode(e, k, 0, nullptr);
+
+        app* a = to_app(e);
+        unsigned arity = a->get_num_args();
+
+        // recursively register children
+        // for seq/re children, create classified snodes
+        // for other children (e.g. integer exponents), create s_other snodes
+        snode_vector child_nodes;
+        for (unsigned i = 0; i < arity; ++i) {
+            expr* ch = a->get_arg(i);
+            snode* cn = mk(ch);
+            child_nodes.push_back(cn);
+        }
+
+        return mk_snode(e, k, child_nodes.size(), child_nodes.data());
+    }
+
+    snode* sgraph::find(expr* e) const {
+        if (!e)
+            return nullptr;
+        unsigned eid = e->get_id();
+        if (eid < m_expr2snode.size())
+            return m_expr2snode[eid];
+        return nullptr;
+    }
+
+    enode* sgraph::mk_enode(expr* e) {
+        enode* n = m_egraph.find(e);
+        if (n) return n;
+        enode_vector args;
+        if (is_app(e)) {
+            for (expr* arg : *to_app(e)) {
+                enode* a = mk_enode(arg);
+                args.push_back(a);
+            }
+        }
+        return m_egraph.mk(e, 0, args.size(), args.data());
+    }
+
+    void sgraph::push() {
+        m_scopes.push_back(m_nodes.size());
+        ++m_num_scopes;
+        m_egraph.push();
+    }
+
+    void sgraph::pop(unsigned num_scopes) {
+        if (num_scopes == 0)
+            return;
+        SASSERT(num_scopes <= m_num_scopes);
+        unsigned new_lvl = m_num_scopes - num_scopes;
+        unsigned old_sz = m_scopes[new_lvl];
+        for (unsigned i = m_nodes.size(); i-- > old_sz; ) {
+            snode* n = m_nodes[i];
+            if (n->get_expr()) {
+                unsigned eid = n->get_expr()->get_id();
+                if (eid < m_expr2snode.size())
+                    m_expr2snode[eid] = nullptr;
+            }
+        }
+        m_nodes.shrink(old_sz);
+        m_scopes.shrink(new_lvl);
+        m_num_scopes = new_lvl;
+        m_egraph.pop(num_scopes);
+    }
+
+    snode* sgraph::mk_var(symbol const& name) {
+        expr_ref e(m.mk_const(name, m_str_sort), m);
+        return mk(e);
+    }
+
+    snode* sgraph::mk_char(unsigned ch) {
+        expr_ref c(m_seq.str.mk_char(ch), m);
+        expr_ref u(m_seq.str.mk_unit(c), m);
+        return mk(u);
+    }
+
+    snode* sgraph::mk_empty() {
+        expr_ref e(m_seq.str.mk_empty(m_str_sort), m);
+        return mk(e);
+    }
+
+    snode* sgraph::mk_concat(snode* a, snode* b) {
+        if (a->is_empty()) return b;
+        if (b->is_empty()) return a;
+        expr_ref e(m_seq.str.mk_concat(a->get_expr(), b->get_expr()), m);
+        return mk(e);
+    }
+
+    snode* sgraph::drop_first(snode* n) {
+        if (n->is_empty() || n->is_token())
+            return mk_empty();
+        SASSERT(n->is_concat());
+        snode* l = n->arg(0);
+        snode* r = n->arg(1);
+        if (l->is_token() || l->is_empty())
+            return r;
+        return mk_concat(drop_first(l), r);
+    }
+
+    snode* sgraph::drop_last(snode* n) {
+        if (n->is_empty() || n->is_token())
+            return mk_empty();
+        SASSERT(n->is_concat());
+        snode* l = n->arg(0);
+        snode* r = n->arg(1);
+        if (r->is_token() || r->is_empty())
+            return l;
+        return mk_concat(l, drop_last(r));
+    }
+
+    snode* sgraph::drop_left(snode* n, unsigned count) {
+        if (count == 0 || n->is_empty()) return n;
+        if (count >= n->length()) return mk_empty();
+        for (unsigned i = 0; i < count; ++i)
+            n = drop_first(n);
+        return n;
+    }
+
+    snode* sgraph::drop_right(snode* n, unsigned count) {
+        if (count == 0 || n->is_empty()) return n;
+        if (count >= n->length()) return mk_empty();
+        for (unsigned i = 0; i < count; ++i)
+            n = drop_last(n);
+        return n;
+    }
+
+    snode* sgraph::subst(snode* n, snode* var, snode* replacement) {
+        if (n == var)
+            return replacement;
+        if (n->is_empty() || n->is_char())
+            return n;
+        if (n->is_concat()) {
+            // check substitution cache (ZIPT-style optimization)
+            if (n->m_subst_cache) {
+                snode* cached = n->m_subst_cache->find(var->id(), replacement->id());
+                if (cached)
+                    return cached;
+            }
+            snode* result = mk_concat(subst(n->arg(0), var, replacement),
+                                      subst(n->arg(1), var, replacement));
+            // cache the result
+            if (!n->m_subst_cache) {
+                n->m_subst_cache = alloc(snode_subst_cache);
+                m_subst_caches.push_back(n->m_subst_cache);
+            }
+            n->m_subst_cache->insert(var->id(), replacement->id(), result);
+            return result;
+        }
+        // for non-concat compound nodes (power, star, etc.), no substitution into children
+        return n;
+    }
+
+    snode* sgraph::brzozowski_deriv(snode* re, snode* elem) {
+        expr* re_expr = re->get_expr();
+        expr* elem_expr = elem->get_expr();
+        if (!re_expr || !elem_expr)
+            return nullptr;
+        // unwrap str.unit to get the character expression
+        expr* ch = nullptr;
+        if (m_seq.str.is_unit(elem_expr, ch))
+            elem_expr = ch;
+        expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr);
+        if (!result)
+            return nullptr;
+        return mk(result);
+    }
+
+    void sgraph::collect_re_predicates(snode* re, expr_ref_vector& preds) {
+        if (!re || !re->get_expr())
+            return;
+        expr* e = re->get_expr();
+        expr* ch = nullptr, *lo = nullptr, *hi = nullptr;
+        // leaf regex predicates: character ranges and single characters
+        if (m_seq.re.is_range(e, lo, hi)) {
+            preds.push_back(e);
+            return;
+        }
+        if (m_seq.re.is_to_re(e))
+            return;
+        if (m_seq.re.is_full_char(e))
+            return;
+        if (m_seq.re.is_full_seq(e))
+            return;
+        if (m_seq.re.is_empty(e))
+            return;
+        // recurse into compound regex operators
+        for (unsigned i = 0; i < re->num_args(); ++i)
+            collect_re_predicates(re->arg(i), preds);
+    }
+
+    void sgraph::compute_minterms(snode* re, snode_vector& minterms) {
+        // extract character predicates from the regex
+        expr_ref_vector preds(m);
+        collect_re_predicates(re, preds);
+        if (preds.empty()) {
+            // no predicates means the whole alphabet is one minterm
+            // represented by full_char
+            expr_ref fc(m_seq.re.mk_full_char(m_str_sort), m);
+            minterms.push_back(mk(fc));
+            return;
+        }
+        // generate minterms as conjunctions/negations of predicates
+        // for n predicates, there are up to 2^n minterms
+        unsigned n = preds.size();
+        // cap at reasonable size to prevent exponential blowup
+        if (n > 20)
+            n = 20;
+        for (unsigned mask = 0; mask < (1u << n); ++mask) {
+            expr_ref_vector conj(m);
+            for (unsigned i = 0; i < n; ++i) {
+                if (mask & (1u << i))
+                    conj.push_back(preds.get(i));
+                else
+                    conj.push_back(m_seq.re.mk_complement(preds.get(i)));
+            }
+            SASSERT(!conj.empty());
+            // intersect all terms
+            expr_ref mt(conj.get(0), m);
+            for (unsigned i = 1; i < conj.size(); ++i)
+                mt = m_seq.re.mk_inter(mt, conj.get(i));
+            minterms.push_back(mk(mt));
+        }
+    }
+
+    std::ostream& sgraph::display(std::ostream& out) const {
+        auto kind_str = [](snode_kind k) -> char const* {
+            switch (k) {
+            case snode_kind::s_empty:      return "empty";
+            case snode_kind::s_char:       return "char";
+            case snode_kind::s_var:        return "var";
+            case snode_kind::s_unit:       return "unit";
+            case snode_kind::s_concat:     return "concat";
+            case snode_kind::s_power:      return "power";
+            case snode_kind::s_star:       return "star";
+            case snode_kind::s_loop:       return "loop";
+            case snode_kind::s_union:      return "union";
+            case snode_kind::s_intersect:  return "intersect";
+            case snode_kind::s_complement: return "complement";
+            case snode_kind::s_fail:       return "fail";
+            case snode_kind::s_full_char:  return "full_char";
+            case snode_kind::s_full_seq:   return "full_seq";
+            case snode_kind::s_to_re:      return "to_re";
+            case snode_kind::s_in_re:      return "in_re";
+            case snode_kind::s_other:      return "other";
+            }
+            return "?";
+        };
+        for (snode* n : m_nodes) {
+            out << "snode[" << n->id() << "] "
+                << kind_str(n->kind())
+                << " level=" << n->level()
+                << " len=" << n->length()
+                << " ground=" << n->is_ground()
+                << " rfree=" << n->is_regex_free()
+                << " nullable=" << n->is_nullable();
+            if (n->num_args() > 0) {
+                out << " args=(";
+                for (unsigned i = 0; i < n->num_args(); ++i) {
+                    if (i > 0) out << ", ";
+                    out << n->arg(i)->id();
+                }
+                out << ")";
+            }
+            if (n->get_expr())
+                out << " expr=" << mk_pp(n->get_expr(), m);
+            out << "\n";
+        }
+        return out;
+    }
+
+    void sgraph::collect_statistics(statistics& st) const {
+        st.update("seq-graph-nodes", m_stats.m_num_nodes);
+        st.update("seq-graph-concat", m_stats.m_num_concat);
+        st.update("seq-graph-power", m_stats.m_num_power);
+        st.update("seq-graph-hash-hits", m_stats.m_num_hash_hits);
+        m_egraph.collect_statistics(st);
+    }
+
+}
+
--- a/src/ast/euf/euf_sgraph.h
+++ b/src/ast/euf/euf_sgraph.h
@ -0,0 +1,160 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    euf_sgraph.h
+
+Abstract:
+
+    Sequence/string graph layer
+
+    Encapsulates string and regex expressions for the string solver.
+    Implements the string graph layer from ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT).
+    The sgraph maps Z3 sequence/regex AST expressions to snode structures
+    organized as binary concatenation trees with metadata, and owns an
+    egraph with a seq_plugin for congruence closure.
+
+    -- snode classification: empty, char, variable, unit, concat, power,
+       star, loop, union, intersection, complement, fail, full_char,
+       full_seq, to_re, in_re, other.
+    -- Metadata computation: ground, regex_free, nullable, level, length.
+    -- Expression registration via mk(expr*), lookup via find(expr*).
+    -- Scope management: push/pop with backtracking.
+    -- egraph ownership with seq_plugin for:
+       * concat associativity via associativity-respecting hash table,
+       * Kleene star merging (u.v*.v*.w = u.v*.w),
+       * nullable absorption next to .* (u.*.v.w = u.*.w when v nullable),
+       * str.++ identity elimination (concat(a, ε) = a),
+       * re.++ identity/absorption (concat(a, epsilon) = a, concat(a, ∅) = ∅).
+    -- enode registration via mk_enode(expr*).
+
+    ZIPT features not yet ported:
+
+    -- Str operations: normalisation with union-find representatives and
+       cache migration, balanced tree maintenance, drop left/right with
+       caching, substitution, indexed access, iteration, ToList caching,
+       simplification, derivative computation, structural equality with
+       associative hashing, rotation equality, expression reconstruction,
+       Graphviz export.
+
+    -- StrToken subclasses: SymCharToken, StrAtToken, SubStrToken,
+       SetToken, PostToken/PreToken.
+
+    -- StrToken features: Nielsen-style GetDecomposition with side
+       constraints, NamedStrToken extension tracking for variable
+       splitting with PowerExtension, CollectSymbols for Parikh analysis,
+       MinTerms for character class analysis, token ordering, Derivable
+       and BasicRegex flags.
+
+Author:
+
+    Nikolaj Bjorner (nbjorner) 2026-03-01
+    Clemens Eisenhofer 2026-03-01
+
+--*/
+
+#pragma once
+
+#include "util/region.h"
+#include "util/statistics.h"
+#include "ast/ast.h"
+#include "ast/seq_decl_plugin.h"
+#include "ast/rewriter/seq_rewriter.h"
+#include "ast/euf/euf_snode.h"
+#include "ast/euf/euf_egraph.h"
+
+namespace euf {
+
+    class seq_plugin;
+
+    class sgraph {
+
+        struct stats {
+            unsigned m_num_nodes;
+            unsigned m_num_concat;
+            unsigned m_num_power;
+            unsigned m_num_hash_hits;
+            stats() { reset(); }
+            void reset() { memset(this, 0, sizeof(*this)); }
+        };
+
+        ast_manager&     m;
+        seq_util         m_seq;
+        seq_rewriter     m_rewriter;
+        egraph&          m_egraph;
+        region           m_region;
+        snode_vector     m_nodes;
+        sort_ref         m_str_sort;    // cached string sort
+        unsigned_vector  m_scopes;
+        unsigned         m_num_scopes = 0;
+        stats            m_stats;
+        bool             m_add_plugin; // whether sgraph created the seq_plugin
+
+        // tracks allocated subst caches for cleanup
+        ptr_vector<snode_subst_cache> m_subst_caches;
+
+        // maps expression id to snode
+        ptr_vector<snode> m_expr2snode;
+
+        snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
+        snode_kind classify(expr* e) const;
+        void compute_metadata(snode* n);
+        void compute_hash_matrix(snode* n);
+        void collect_re_predicates(snode* re, expr_ref_vector& preds);
+
+    public:
+        sgraph(ast_manager& m, egraph& eg, bool add_plugin = true);
+        ~sgraph();
+
+        ast_manager& get_manager() const { return m; }
+        seq_util& get_seq_util() { return m_seq; }
+        egraph& get_egraph() { return m_egraph; }
+        egraph const& get_egraph() const { return m_egraph; }
+
+        // register an expression and return its snode
+        snode* mk(expr* e);
+
+        // lookup an already-registered expression
+        snode* find(expr* e) const;
+
+        // register expression in both sgraph and egraph
+        enode* mk_enode(expr* e);
+
+        // factory methods for creating snodes with corresponding expressions
+        snode* mk_var(symbol const& name);
+        snode* mk_char(unsigned ch);
+        snode* mk_empty();
+        snode* mk_concat(snode* a, snode* b);
+
+        // drop operations: remove tokens from the front/back of a concat tree
+        snode* drop_first(snode* n);
+        snode* drop_last(snode* n);
+        snode* drop_left(snode* n, unsigned count);
+        snode* drop_right(snode* n, unsigned count);
+
+        // substitution: replace all occurrences of var in n by replacement
+        snode* subst(snode* n, snode* var, snode* replacement);
+
+        // Brzozowski derivative of regex re with respect to element elem
+        snode* brzozowski_deriv(snode* re, snode* elem);
+
+        // compute minterms (character class partition) from a regex
+        void compute_minterms(snode* re, snode_vector& minterms);
+
+        // scope management for backtracking
+        void push();
+        void pop(unsigned num_scopes);
+
+        // access
+        snode_vector const& nodes() const { return m_nodes; }
+        unsigned num_nodes() const { return m_nodes.size(); }
+
+        // display
+        std::ostream& display(std::ostream& out) const;
+
+        void collect_statistics(statistics& st) const;
+    };
+
+}
+
--- a/src/ast/euf/euf_snode.h
+++ b/src/ast/euf/euf_snode.h
@ -0,0 +1,202 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    euf_snode.h
+
+Abstract:
+
+    snode layer for sequence/string graph
+
+    Encapsulates strings in the style of euf_enode.h.
+    Maps Z3 sequence expressions to a ZIPT-style representation where
+    strings are composed of tokens (characters, variables, powers, regex, etc.)
+    organized as a binary tree of concatenations.
+
+Author:
+
+    Nikolaj Bjorner (nbjorner) 2026-03-01
+    Clemens Eisenhofer 2026-03-01
+
+--*/
+
+#pragma once
+
+#include "util/vector.h"
+#include "util/region.h"
+#include "ast/ast.h"
+#include "ast/seq_decl_plugin.h"
+
+namespace euf {
+
+    class sgraph;
+    class snode;
+    struct snode_subst_cache;
+
+    typedef ptr_vector<snode> snode_vector;
+
+    enum class snode_kind {
+        s_empty,       // empty string (OP_SEQ_EMPTY or empty string constant)
+        s_char,        // concrete character unit (OP_SEQ_UNIT wrapping a char literal)
+        s_var,         // string variable (uninterpreted constant of string sort)
+        s_unit,        // generic unit (OP_SEQ_UNIT with non-literal character)
+        s_concat,      // concatenation of two snodes (OP_SEQ_CONCAT)
+        s_power,       // string exponentiation s^n (OP_SEQ_POWER)
+        s_star,        // Kleene star r* (OP_RE_STAR)
+        s_loop,        // bounded loop r{lo,hi} (OP_RE_LOOP)
+        s_union,       // union r1|r2 (OP_RE_UNION)
+        s_intersect,   // intersection r1&r2 (OP_RE_INTERSECT)
+        s_complement,  // complement ~r (OP_RE_COMPLEMENT)
+        s_fail,        // empty language (OP_RE_EMPTY_SET)
+        s_full_char,   // full character set (OP_RE_FULL_CHAR_SET)
+        s_full_seq,    // full sequence set r=.* (OP_RE_FULL_SEQ_SET)
+        s_to_re,       // string to regex (OP_SEQ_TO_RE)
+        s_in_re,       // regex membership (OP_SEQ_IN_RE)
+        s_other,       // other sequence expression not directly classified
+    };
+
+    class snode {
+        expr*       m_expr      = nullptr;
+        snode_kind  m_kind      = snode_kind::s_other;
+        unsigned    m_id        = UINT_MAX;
+        unsigned    m_num_args  = 0;
+
+        // metadata flags, analogous to ZIPT's Str/StrToken properties
+        bool        m_ground     = true;   // no uninterpreted string variables
+        bool        m_regex_free = true;   // no regex constructs
+        bool        m_nullable   = false;  // accepts the empty string
+        unsigned    m_level      = 0;      // tree depth/level (0 for empty, 1 for singletons)
+        unsigned    m_length     = 0;      // token count, number of leaf tokens in the tree
+
+        // hash matrix for associativity-respecting hashing (2x2 polynomial hash matrix)
+        // all zeros means not cached, non-zero means cached
+        unsigned    m_hash_matrix[2][2] = {{0,0},{0,0}};
+
+        // substitution cache (lazy-initialized, owned by sgraph)
+        snode_subst_cache* m_subst_cache = nullptr;
+
+        snode*      m_args[0]; // variable-length array, allocated via get_snode_size(num_args)
+
+        friend class sgraph;
+
+        static unsigned get_snode_size(unsigned num_args) {
+            return sizeof(snode) + num_args * sizeof(snode*);
+        }
+
+        static snode* mk(region& r, expr* e, snode_kind k, unsigned id, unsigned num_args, snode* const* args) {
+            void* mem = r.allocate(get_snode_size(num_args));
+            snode* n = new (mem) snode();
+            n->m_expr = e;
+            n->m_kind = k;
+            n->m_id = id;
+            n->m_num_args = num_args;
+            for (unsigned i = 0; i < num_args; ++i)
+                n->m_args[i] = args[i];
+            return n;
+        }
+
+    public:
+        expr*      get_expr() const { return m_expr; }
+        snode_kind kind()     const { return m_kind; }
+        unsigned   id()       const { return m_id; }
+        unsigned   num_args() const { return m_num_args; }
+        snode*     arg(unsigned i) const { SASSERT(i < m_num_args); return m_args[i]; }
+
+        bool is_ground()     const { return m_ground; }
+        bool is_regex_free() const { return m_regex_free; }
+        bool is_nullable()   const { return m_nullable; }
+        unsigned level()     const { return m_level; }
+        unsigned length()    const { return m_length; }
+
+        // associativity-respecting hash: cached if the 2x2 matrix is non-zero.
+        // M[0][0] = HASH_BASE^(num_leaves) which is always nonzero since HASH_BASE
+        // is odd and gcd(odd, 2^32) = 1, so the check is safe.
+        bool has_cached_hash() const { return m_hash_matrix[0][0] != 0; }
+        unsigned assoc_hash()  const { return m_hash_matrix[0][1]; }
+
+        bool is_empty()   const { return m_kind == snode_kind::s_empty; }
+        bool is_char()    const { return m_kind == snode_kind::s_char; }
+        bool is_var()     const { return m_kind == snode_kind::s_var; }
+        bool is_unit()    const { return m_kind == snode_kind::s_unit; }
+        bool is_concat()  const { return m_kind == snode_kind::s_concat; }
+        bool is_power()   const { return m_kind == snode_kind::s_power; }
+        bool is_star()    const { return m_kind == snode_kind::s_star; }
+        bool is_loop()    const { return m_kind == snode_kind::s_loop; }
+        bool is_union()   const { return m_kind == snode_kind::s_union; }
+        bool is_intersect()  const { return m_kind == snode_kind::s_intersect; }
+        bool is_complement() const { return m_kind == snode_kind::s_complement; }
+        bool is_fail()       const { return m_kind == snode_kind::s_fail; }
+        bool is_full_char()  const { return m_kind == snode_kind::s_full_char; }
+        bool is_full_seq()   const { return m_kind == snode_kind::s_full_seq; }
+        bool is_to_re()      const { return m_kind == snode_kind::s_to_re; }
+        bool is_in_re()      const { return m_kind == snode_kind::s_in_re; }
+
+        // is this a leaf token (analogous to ZIPT's StrToken as opposed to Str)
+        bool is_token() const {
+            switch (m_kind) {
+            case snode_kind::s_empty:
+            case snode_kind::s_concat:
+                return false;
+            default:
+                return true;
+            }
+        }
+
+        // analogous to ZIPT's Str.First / Str.Last
+        snode const* first() const {
+            snode const* s = this;
+            while (s->is_concat())
+                s = s->arg(0);
+            return s;
+        }
+
+        snode const* last() const {
+            snode const* s = this;
+            while (s->is_concat())
+                s = s->arg(1);
+            return s;
+        }
+
+        snode* first() {
+            snode* s = this;
+            while (s->is_concat())
+                s = s->arg(0);
+            return s;
+        }
+
+        snode* last() {
+            snode* s = this;
+            while (s->is_concat())
+                s = s->arg(1);
+            return s;
+        }
+
+        // collect all leaf tokens in left-to-right order
+        void collect_tokens(snode_vector& tokens) const {
+            if (is_concat()) {
+                arg(0)->collect_tokens(tokens);
+                arg(1)->collect_tokens(tokens);
+            }
+            else if (!is_empty()) {
+                tokens.push_back(const_cast<snode*>(this));
+            }
+        }
+
+        // access the i-th token (0-based, left-to-right order)
+        // returns nullptr if i >= length()
+        snode* at(unsigned i) const {
+            if (is_concat()) {
+                unsigned left_len = arg(0)->length();
+                if (i < left_len)
+                    return arg(0)->at(i);
+                return arg(1)->at(i - left_len);
+            }
+            if (is_empty())
+                return nullptr;
+            return i == 0 ? const_cast<snode*>(this) : nullptr;
+        }
+    };
+
+}
+
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@ -222,6 +222,7 @@ void seq_decl_plugin::init() {
    m_sigs[OP_SEQ_NTH_I]     = alloc(psig, m, "seq.nth_i",    1, 2, seqAintT, A);
    m_sigs[OP_SEQ_NTH_U]     = alloc(psig, m, "seq.nth_u",    1, 2, seqAintT, A);
    m_sigs[OP_SEQ_LENGTH]    = alloc(psig, m, "seq.len",      1, 1, &seqA, intT);
+    m_sigs[OP_SEQ_POWER]     = alloc(psig, m, "seq.power",    1, 2, seqAintT, seqA);
    m_sigs[OP_RE_PLUS]       = alloc(psig, m, "re.+",         1, 1, &reA, reA);
    m_sigs[OP_RE_STAR]       = alloc(psig, m, "re.*",         1, 1, &reA, reA);
    m_sigs[OP_RE_OPTION]     = alloc(psig, m, "re.opt",       1, 1, &reA, reA);
@ -591,6 +592,10 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p
        add_map_sig();
        return mk_str_fun(k, arity, domain, range, k);    

+    case OP_SEQ_POWER:
+        match(*m_sigs[k], arity, domain, range, rng);
+        return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
+
    case OP_SEQ_TO_RE:
        m_has_re = true;
        return mk_seq_fun(k, arity, domain, range, _OP_STRING_TO_REGEXP);
--- a/src/ast/seq_decl_plugin.h
+++ b/src/ast/seq_decl_plugin.h
@ -59,6 +59,7 @@ enum seq_op_kind {
    OP_SEQ_MAPI,           // Array[Int,A,B] -> Int -> Seq[A] -> Seq[B]
    OP_SEQ_FOLDL,          // Array[B,A,B] -> B -> Seq[A] -> B
    OP_SEQ_FOLDLI,         // Array[Int,B,A,B] -> Int -> B -> Seq[A] -> B
+    OP_SEQ_POWER,          // Seq -> Int -> Seq, string exponentiation s^n
 
    OP_RE_PLUS,
    OP_RE_STAR,
@ -307,6 +308,7 @@ public:
        app* mk_mapi(expr* f, expr* i, expr* s) const { expr* es[3] = { f, i, s }; return m.mk_app(m_fid, OP_SEQ_MAPI, 3, es); }
        app* mk_foldl(expr* f, expr* b, expr* s) const { expr* es[3] = { f, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDL, 3, es); }
        app* mk_foldli(expr* f, expr* i, expr* b, expr* s) const { expr* es[4] = { f, i, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDLI, 4, es); }
+        app* mk_power(expr* s, expr* n) const { expr* es[2] = { s, n }; return m.mk_app(m_fid, OP_SEQ_POWER, 2, es); }

        app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); }
        app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); }
@ -348,6 +350,7 @@ public:
        bool is_mapi(expr const* n)      const { return is_app_of(n, m_fid, OP_SEQ_MAPI); }
        bool is_foldl(expr const* n)      const { return is_app_of(n, m_fid, OP_SEQ_FOLDL); }
        bool is_foldli(expr const* n)      const { return is_app_of(n, m_fid, OP_SEQ_FOLDLI); }
+        bool is_power(expr const* n)       const { return is_app_of(n, m_fid, OP_SEQ_POWER); }
        bool is_extract(expr const* n)  const { return is_app_of(n, m_fid, OP_SEQ_EXTRACT); }
        bool is_contains(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONTAINS); }
        bool is_at(expr const* n)       const { return is_app_of(n, m_fid, OP_SEQ_AT); }
@ -404,6 +407,7 @@ public:
        MATCH_TERNARY(is_mapi);
        MATCH_TERNARY(is_foldl);
        MATCH_QUATARY(is_foldli);
+        MATCH_BINARY(is_power);
        MATCH_BINARY(is_last_index);
        MATCH_TERNARY(is_replace);
        MATCH_TERNARY(is_replace_re);
--- a/src/smt/seq/CMakeLists.txt
+++ b/src/smt/seq/CMakeLists.txt
@ -0,0 +1,7 @@
+z3_add_component(smt_seq
+  SOURCES
+    seq_nielsen.cpp
+  COMPONENT_DEPENDENCIES
+    euf
+    rewriter
+)
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@ -0,0 +1,311 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_nielsen.cpp
+
+Abstract:
+
+    Nielsen graph implementation for string constraint solving.
+
+    Ports the constraint types and Nielsen graph structures from
+    ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
+
+Author:
+
+    Nikolaj Bjorner (nbjorner) 2026-03-02
+    Clemens Eisenhofer 2026-03-02
+
+--*/
+
+#include "smt/seq/seq_nielsen.h"
+#include "ast/ast_pp.h"
+
+namespace seq {
+
+    // -----------------------------------------------
+    // dep_tracker
+    // -----------------------------------------------
+
+    dep_tracker::dep_tracker(unsigned num_bits) {
+        unsigned words = (num_bits + 31) / 32;
+        m_bits.resize(words, 0);
+    }
+
+    dep_tracker::dep_tracker(unsigned num_bits, unsigned set_bit) {
+        unsigned words = (num_bits + 31) / 32;
+        m_bits.resize(words, 0);
+        if (set_bit < num_bits) {
+            unsigned word_idx = set_bit / 32;
+            unsigned bit_idx = set_bit % 32;
+            m_bits[word_idx] = 1u << bit_idx;
+        }
+    }
+
+    void dep_tracker::merge(dep_tracker const& other) {
+        if (other.m_bits.empty())
+            return;
+        if (m_bits.size() < other.m_bits.size())
+            m_bits.resize(other.m_bits.size(), 0);
+        for (unsigned i = 0; i < other.m_bits.size(); ++i)
+            m_bits[i] |= other.m_bits[i];
+    }
+
+    bool dep_tracker::is_superset(dep_tracker const& other) const {
+        for (unsigned i = 0; i < other.m_bits.size(); ++i) {
+            unsigned my_bits = (i < m_bits.size()) ? m_bits[i] : 0;
+            if ((my_bits & other.m_bits[i]) != other.m_bits[i])
+                return false;
+        }
+        return true;
+    }
+
+    bool dep_tracker::empty() const {
+        for (unsigned b : m_bits)
+            if (b != 0) return false;
+        return true;
+    }
+
+    // -----------------------------------------------
+    // str_eq
+    // -----------------------------------------------
+
+    void str_eq::sort() {
+        if (m_lhs && m_rhs && m_lhs->id() > m_rhs->id())
+            std::swap(m_lhs, m_rhs);
+    }
+
+    bool str_eq::is_trivial() const {
+        return m_lhs == m_rhs ||
+               (m_lhs && m_rhs && m_lhs->is_empty() && m_rhs->is_empty());
+    }
+
+    bool str_eq::contains_var(euf::snode* var) const {
+        if (!var) return false;
+        // check if var appears in the token list of lhs or rhs
+        if (m_lhs) {
+            euf::snode_vector tokens;
+            m_lhs->collect_tokens(tokens);
+            for (euf::snode* t : tokens)
+                if (t == var) return true;
+        }
+        if (m_rhs) {
+            euf::snode_vector tokens;
+            m_rhs->collect_tokens(tokens);
+            for (euf::snode* t : tokens)
+                if (t == var) return true;
+        }
+        return false;
+    }
+
+    // -----------------------------------------------
+    // str_mem
+    // -----------------------------------------------
+
+    bool str_mem::is_primitive() const {
+        return m_str && m_str->length() == 1 && m_str->is_var();
+    }
+
+    bool str_mem::contains_var(euf::snode* var) const {
+        if (!var) return false;
+        if (m_str) {
+            euf::snode_vector tokens;
+            m_str->collect_tokens(tokens);
+            for (euf::snode* t : tokens)
+                if (t == var) return true;
+        }
+        return false;
+    }
+
+    // -----------------------------------------------
+    // nielsen_subst
+    // -----------------------------------------------
+
+    bool nielsen_subst::is_eliminating() const {
+        if (!m_var || !m_replacement) return true;
+        // check if var appears in replacement
+        euf::snode_vector tokens;
+        m_replacement->collect_tokens(tokens);
+        for (euf::snode* t : tokens)
+            if (t == m_var) return false;
+        return true;
+    }
+
+    // -----------------------------------------------
+    // nielsen_edge
+    // -----------------------------------------------
+
+    nielsen_edge::nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress):
+        m_src(src), m_tgt(tgt), m_is_progress(is_progress) {
+    }
+
+    // -----------------------------------------------
+    // nielsen_node
+    // -----------------------------------------------
+
+    nielsen_node::nielsen_node(nielsen_graph* graph, unsigned id):
+        m_id(id), m_graph(graph), m_is_progress(true) {
+    }
+
+    void nielsen_node::clone_from(nielsen_node const& parent) {
+        m_str_eq.reset();
+        m_str_mem.reset();
+        for (auto const& eq : parent.m_str_eq)
+            m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep));
+        for (auto const& mem : parent.m_str_mem)
+            m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_history, mem.m_id, mem.m_dep));
+    }
+
+    void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) {
+        if (!s.m_var) return;
+        for (unsigned i = 0; i < m_str_eq.size(); ++i) {
+            str_eq& eq = m_str_eq[i];
+            eq.m_lhs = sg.subst(eq.m_lhs, s.m_var, s.m_replacement);
+            eq.m_rhs = sg.subst(eq.m_rhs, s.m_var, s.m_replacement);
+            eq.m_dep.merge(s.m_dep);
+            eq.sort();
+        }
+        for (unsigned i = 0; i < m_str_mem.size(); ++i) {
+            str_mem& mem = m_str_mem[i];
+            mem.m_str = sg.subst(mem.m_str, s.m_var, s.m_replacement);
+            // regex is typically ground, but apply subst for generality
+            mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement);
+            mem.m_dep.merge(s.m_dep);
+        }
+    }
+
+    // -----------------------------------------------
+    // nielsen_graph
+    // -----------------------------------------------
+
+    nielsen_graph::nielsen_graph(euf::sgraph& sg):
+        m_sg(sg) {
+    }
+
+    nielsen_graph::~nielsen_graph() {
+        reset();
+    }
+
+    nielsen_node* nielsen_graph::mk_node() {
+        unsigned id = m_nodes.size();
+        nielsen_node* n = alloc(nielsen_node, this, id);
+        m_nodes.push_back(n);
+        return n;
+    }
+
+    nielsen_node* nielsen_graph::mk_child(nielsen_node* parent) {
+        nielsen_node* child = mk_node();
+        child->clone_from(*parent);
+        return child;
+    }
+
+    nielsen_edge* nielsen_graph::mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress) {
+        nielsen_edge* e = alloc(nielsen_edge, src, tgt, is_progress);
+        m_edges.push_back(e);
+        src->add_outgoing(e);
+        return e;
+    }
+
+    void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) {
+        if (!m_root)
+            m_root = mk_node();
+        dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
+                        m_root->str_eqs().size());
+        str_eq eq(lhs, rhs, dep);
+        eq.sort();
+        m_root->add_str_eq(eq);
+    }
+
+    void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) {
+        if (!m_root)
+            m_root = mk_node();
+        dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
+                        m_root->str_eqs().size() + m_root->str_mems().size());
+        euf::snode* history = m_sg.mk_empty();
+        unsigned id = next_mem_id();
+        m_root->add_str_mem(str_mem(str, regex, history, id, dep));
+    }
+
+    void nielsen_graph::inc_run_idx() {
+        if (m_run_idx == UINT_MAX) {
+            for (nielsen_node* n : m_nodes)
+                n->reset_counter();
+            m_run_idx = 1;
+        }
+        else
+            ++m_run_idx;
+    }
+
+    void nielsen_graph::reset() {
+        for (nielsen_node* n : m_nodes)
+            dealloc(n);
+        for (nielsen_edge* e : m_edges)
+            dealloc(e);
+        m_nodes.reset();
+        m_edges.reset();
+        m_root = nullptr;
+        m_run_idx = 0;
+        m_depth_bound = 0;
+        m_next_mem_id = 0;
+    }
+
+    std::ostream& nielsen_graph::display(std::ostream& out) const {
+        out << "nielsen_graph with " << m_nodes.size() << " nodes, "
+            << m_edges.size() << " edges\n";
+
+        for (nielsen_node const* n : m_nodes) {
+            out << "  node[" << n->id() << "]";
+            if (n == m_root)
+                out << " (root)";
+            if (n->is_general_conflict())
+                out << " CONFLICT";
+            if (n->is_extended())
+                out << " EXTENDED";
+            out << "\n";
+
+            // display string equalities
+            for (auto const& eq : n->str_eqs()) {
+                out << "    str_eq: ";
+                if (eq.m_lhs) out << "lhs[id=" << eq.m_lhs->id() << ",len=" << eq.m_lhs->length() << "]";
+                else out << "null";
+                out << " = ";
+                if (eq.m_rhs) out << "rhs[id=" << eq.m_rhs->id() << ",len=" << eq.m_rhs->length() << "]";
+                else out << "null";
+                out << "\n";
+            }
+
+            // display regex memberships
+            for (auto const& mem : n->str_mems()) {
+                out << "    str_mem[" << mem.m_id << "]: ";
+                if (mem.m_str) out << "str[id=" << mem.m_str->id() << ",len=" << mem.m_str->length() << "]";
+                else out << "null";
+                out << " in ";
+                if (mem.m_regex) out << "re[id=" << mem.m_regex->id() << "]";
+                else out << "null";
+                out << "\n";
+            }
+
+            // display outgoing edges
+            for (nielsen_edge const* e : n->outgoing()) {
+                out << "    -> node[" << e->tgt()->id() << "]";
+                if (e->is_progress()) out << " (progress)";
+                for (auto const& s : e->subst()) {
+                    out << " {";
+                    if (s.m_var) out << "var[" << s.m_var->id() << "]";
+                    out << " -> ";
+                    if (s.m_replacement) out << "repl[" << s.m_replacement->id() << ",len=" << s.m_replacement->length() << "]";
+                    else out << "eps";
+                    out << "}";
+                }
+                out << "\n";
+            }
+
+            if (n->backedge())
+                out << "    backedge -> node[" << n->backedge()->id() << "]\n";
+        }
+
+        return out;
+    }
+
+}
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@ -0,0 +1,327 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_nielsen.h
+
+Abstract:
+
+    Nielsen graph for string constraint solving.
+
+    Ports the constraint types and Nielsen graph structures from
+    ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
+    into Z3's smt/seq framework.
+
+    The Nielsen graph is used for solving word equations and regex
+    membership constraints via Nielsen transformations. Each node
+    contains a set of constraints (string equalities, regex memberships,
+    integer equalities/inequalities) and edges represent substitutions
+    that transform one constraint set into another.
+
+    Key components:
+    -- str_eq:  string equality constraint (lhs = rhs)
+    -- str_mem: regex membership constraint (str in regex)
+    -- nielsen_subst: variable substitution (var -> replacement)
+    -- nielsen_edge: graph edge with substitutions and side constraints
+    -- nielsen_node: graph node with constraint set and outgoing edges
+    -- nielsen_graph: the overall Nielsen transformation graph
+
+Author:
+
+    Nikolaj Bjorner (nbjorner) 2026-03-02
+    Clemens Eisenhofer 2026-03-02
+
+--*/
+
+#pragma once
+
+#include "util/vector.h"
+#include "util/uint_set.h"
+#include "ast/ast.h"
+#include "ast/seq_decl_plugin.h"
+#include "ast/euf/euf_sgraph.h"
+
+namespace seq {
+
+    // forward declarations
+    class nielsen_node;
+    class nielsen_edge;
+    class nielsen_graph;
+
+    // simplification result for constraint processing
+    // mirrors ZIPT's SimplifyResult enum
+    enum class simplify_result {
+        proceed,               // no change, continue
+        conflict,              // constraint is unsatisfiable
+        satisfied,             // constraint is trivially satisfied
+        restart,               // constraint was simplified, restart
+        restart_and_satisfied, // simplified and satisfied
+    };
+
+    // reason for backtracking in the Nielsen graph
+    // mirrors ZIPT's BacktrackReasons enum
+    enum class backtrack_reason {
+        unevaluated,
+        extended,
+        symbol_clash,
+        parikh_image,
+        subsumption,
+        arithmetic,
+        regex,
+        regex_widening,
+        character_range,
+        smt,
+        children_failed,
+    };
+
+    // dependency tracker: bitvector tracking which input constraints
+    // contributed to deriving a given constraint
+    // mirrors ZIPT's DependencyTracker
+    class dep_tracker {
+        svector<unsigned> m_bits;
+    public:
+        dep_tracker() = default;
+        explicit dep_tracker(unsigned num_bits);
+        dep_tracker(unsigned num_bits, unsigned set_bit);
+
+        void merge(dep_tracker const& other);
+        bool is_superset(dep_tracker const& other) const;
+        bool empty() const;
+
+        bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; }
+        bool operator!=(dep_tracker const& other) const { return !(*this == other); }
+    };
+
+    // string equality constraint: lhs = rhs
+    // mirrors ZIPT's StrEq (both sides are regex-free snode trees)
+    struct str_eq {
+        euf::snode* m_lhs;
+        euf::snode* m_rhs;
+        dep_tracker m_dep;
+
+        str_eq(): m_lhs(nullptr), m_rhs(nullptr) {}
+        str_eq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep):
+            m_lhs(lhs), m_rhs(rhs), m_dep(dep) {}
+
+        bool operator==(str_eq const& other) const {
+            return m_lhs == other.m_lhs && m_rhs == other.m_rhs;
+        }
+
+        // sort so that lhs <= rhs by snode id
+        void sort();
+
+        // check if both sides are empty (trivially satisfied)
+        bool is_trivial() const;
+
+        // check if the constraint contains a given variable
+        bool contains_var(euf::snode* var) const;
+    };
+
+    // regex membership constraint: str in regex
+    // mirrors ZIPT's StrMem
+    struct str_mem {
+        euf::snode* m_str;
+        euf::snode* m_regex;
+        euf::snode* m_history;  // tracks derivation history for cycle detection
+        unsigned    m_id;       // unique identifier
+        dep_tracker m_dep;
+
+        str_mem(): m_str(nullptr), m_regex(nullptr), m_history(nullptr), m_id(UINT_MAX) {}
+        str_mem(euf::snode* str, euf::snode* regex, euf::snode* history, unsigned id, dep_tracker const& dep):
+            m_str(str), m_regex(regex), m_history(history), m_id(id), m_dep(dep) {}
+
+        bool operator==(str_mem const& other) const {
+            return m_id == other.m_id && m_str == other.m_str && m_regex == other.m_regex;
+        }
+
+        // check if the constraint has the form x in R with x a single variable
+        bool is_primitive() const;
+
+        // check if the constraint contains a given variable
+        bool contains_var(euf::snode* var) const;
+    };
+
+    // string variable substitution: var -> replacement
+    // mirrors ZIPT's Subst
+    struct nielsen_subst {
+        euf::snode* m_var;
+        euf::snode* m_replacement;
+        dep_tracker m_dep;
+
+        nielsen_subst(): m_var(nullptr), m_replacement(nullptr) {}
+        nielsen_subst(euf::snode* var, euf::snode* repl, dep_tracker const& dep):
+            m_var(var), m_replacement(repl), m_dep(dep) {}
+
+        // an eliminating substitution does not contain the variable in the replacement
+        bool is_eliminating() const;
+
+        bool operator==(nielsen_subst const& other) const {
+            return m_var == other.m_var && m_replacement == other.m_replacement;
+        }
+    };
+
+    // edge in the Nielsen graph connecting two nodes
+    // mirrors ZIPT's NielsenEdge
+    class nielsen_edge {
+        nielsen_node*           m_src;
+        nielsen_node*           m_tgt;
+        vector<nielsen_subst>   m_subst;
+        ptr_vector<str_eq>      m_side_str_eq;    // side constraints: string equalities
+        ptr_vector<str_mem>     m_side_str_mem;    // side constraints: regex memberships
+        bool                    m_is_progress;     // does this edge represent progress?
+    public:
+        nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress);
+
+        nielsen_node* src() const { return m_src; }
+        nielsen_node* tgt() const { return m_tgt; }
+
+        void set_tgt(nielsen_node* tgt) { m_tgt = tgt; }
+
+        vector<nielsen_subst> const& subst() const { return m_subst; }
+        void add_subst(nielsen_subst const& s) { m_subst.push_back(s); }
+
+        void add_side_str_eq(str_eq* eq) { m_side_str_eq.push_back(eq); }
+        void add_side_str_mem(str_mem* mem) { m_side_str_mem.push_back(mem); }
+
+        ptr_vector<str_eq> const& side_str_eq() const { return m_side_str_eq; }
+        ptr_vector<str_mem> const& side_str_mem() const { return m_side_str_mem; }
+
+        bool is_progress() const { return m_is_progress; }
+
+        bool operator==(nielsen_edge const& other) const {
+            return m_src == other.m_src && m_tgt == other.m_tgt;
+        }
+    };
+
+    // node in the Nielsen graph
+    // mirrors ZIPT's NielsenNode
+    class nielsen_node {
+        friend class nielsen_graph;
+
+        unsigned                m_id;
+        nielsen_graph*          m_graph;
+
+        // constraints at this node
+        vector<str_eq>          m_str_eq;     // string equalities
+        vector<str_mem>         m_str_mem;    // regex memberships
+
+        // edges
+        ptr_vector<nielsen_edge> m_outgoing;
+        nielsen_node*           m_backedge = nullptr;
+
+        // status flags
+        bool                    m_is_general_conflict = false;
+        bool                    m_is_extended = false;
+        backtrack_reason        m_reason = backtrack_reason::unevaluated;
+        bool                    m_is_progress = false;
+
+        // evaluation index for run tracking
+        unsigned                m_eval_idx = 0;
+
+    public:
+        nielsen_node(nielsen_graph* graph, unsigned id);
+
+        unsigned id() const { return m_id; }
+        nielsen_graph* graph() const { return m_graph; }
+
+        // constraint access
+        vector<str_eq> const& str_eqs() const { return m_str_eq; }
+        vector<str_eq>& str_eqs() { return m_str_eq; }
+        vector<str_mem> const& str_mems() const { return m_str_mem; }
+        vector<str_mem>& str_mems() { return m_str_mem; }
+
+        void add_str_eq(str_eq const& eq) { m_str_eq.push_back(eq); }
+        void add_str_mem(str_mem const& mem) { m_str_mem.push_back(mem); }
+
+        // edge access
+        ptr_vector<nielsen_edge> const& outgoing() const { return m_outgoing; }
+        void add_outgoing(nielsen_edge* e) { m_outgoing.push_back(e); }
+
+        nielsen_node* backedge() const { return m_backedge; }
+        void set_backedge(nielsen_node* n) { m_backedge = n; }
+
+        // status
+        bool is_general_conflict() const { return m_is_general_conflict; }
+        void set_general_conflict(bool v) { m_is_general_conflict = v; }
+
+        bool is_extended() const { return m_is_extended; }
+        void set_extended(bool v) { m_is_extended = v; }
+
+        bool is_currently_conflict() const {
+            return m_is_general_conflict ||
+                (m_reason != backtrack_reason::unevaluated && m_is_extended);
+        }
+
+        backtrack_reason reason() const { return m_reason; }
+        void set_reason(backtrack_reason r) { m_reason = r; }
+
+        bool is_progress() const { return m_is_progress; }
+
+        unsigned eval_idx() const { return m_eval_idx; }
+        void set_eval_idx(unsigned idx) { m_eval_idx = idx; }
+        void reset_counter() { m_eval_idx = 0; }
+
+        // clone constraints from a parent node
+        void clone_from(nielsen_node const& parent);
+
+        // apply a substitution to all constraints
+        void apply_subst(euf::sgraph& sg, nielsen_subst const& s);
+    };
+
+    // the overall Nielsen transformation graph
+    // mirrors ZIPT's NielsenGraph
+    class nielsen_graph {
+        euf::sgraph&                  m_sg;
+        region                        m_region;
+        ptr_vector<nielsen_node>      m_nodes;
+        ptr_vector<nielsen_edge>      m_edges;
+        nielsen_node*                 m_root = nullptr;
+        unsigned                      m_run_idx = 0;
+        unsigned                      m_depth_bound = 0;
+        unsigned                      m_next_mem_id = 0;
+
+    public:
+        nielsen_graph(euf::sgraph& sg);
+        ~nielsen_graph();
+
+        euf::sgraph& sg() { return m_sg; }
+
+        // node management
+        nielsen_node* mk_node();
+        nielsen_node* mk_child(nielsen_node* parent);
+
+        // edge management
+        nielsen_edge* mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress);
+
+        // root node access
+        nielsen_node* root() const { return m_root; }
+        void set_root(nielsen_node* n) { m_root = n; }
+
+        // add constraints to the root node from external solver
+        void add_str_eq(euf::snode* lhs, euf::snode* rhs);
+        void add_str_mem(euf::snode* str, euf::snode* regex);
+
+        // run management
+        unsigned run_idx() const { return m_run_idx; }
+        void inc_run_idx();
+
+        // access all nodes
+        ptr_vector<nielsen_node> const& nodes() const { return m_nodes; }
+        unsigned num_nodes() const { return m_nodes.size(); }
+
+        // depth bound for iterative deepening
+        unsigned depth_bound() const { return m_depth_bound; }
+        void set_depth_bound(unsigned d) { m_depth_bound = d; }
+
+        // generate next unique regex membership id
+        unsigned next_mem_id() { return m_next_mem_id++; }
+
+        // display for debugging
+        std::ostream& display(std::ostream& out) const;
+
+        // reset all nodes and state
+        void reset();
+    };
+
+}
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@ -3,7 +3,7 @@ add_subdirectory(lp)
 ################################################################################
 # z3-test executable
 ################################################################################
-set(z3_test_deps api fuzzing simplex)
+set(z3_test_deps api fuzzing simplex smt_seq)
 z3_expand_dependencies(z3_test_expanded_deps ${z3_test_deps})
 set (z3_test_extra_object_files "")
 foreach (component ${z3_test_expanded_deps})
@ -51,6 +51,8 @@ add_executable(test-z3
  escaped.cpp
  euf_bv_plugin.cpp
  euf_arith_plugin.cpp
+  euf_sgraph.cpp
+  euf_seq_plugin.cpp
  ex.cpp
  expr_rand.cpp
  expr_substitution.cpp
@ -129,6 +131,7 @@ add_executable(test-z3
  simplifier.cpp
  sls_test.cpp
  sls_seq_plugin.cpp
+  seq_nielsen.cpp
  small_object_allocator.cpp
  smt2print_parse.cpp
  smt_context.cpp
--- a/src/test/euf_seq_plugin.cpp
+++ b/src/test/euf_seq_plugin.cpp
@ -0,0 +1,241 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+--*/
+
+#include "util/util.h"
+#include "ast/euf/euf_sgraph.h"
+#include "ast/euf/euf_seq_plugin.h"
+#include "ast/euf/euf_egraph.h"
+#include "ast/reg_decl_plugins.h"
+#include "ast/ast_pp.h"
+#include <iostream>
+
+static unsigned s_var = 0;
+
+static euf::enode* get_node(euf::egraph& g, seq_util& seq, expr* e) {
+    auto* n = g.find(e);
+    if (n) return n;
+    euf::enode_vector args;
+    if (is_app(e))
+        for (expr* arg : *to_app(e))
+            args.push_back(get_node(g, seq, arg));
+    n = g.mk(e, 0, args.size(), args.data());
+    if (seq.is_seq(e) || seq.is_re(e))
+        g.add_th_var(n, ++s_var, seq.get_family_id());
+    return n;
+}
+
+// test sgraph: basic classification and metadata
+static void test_sgraph_basic() {
+    std::cout << "test_sgraph_basic\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref y(m.mk_const("y", str_sort), m);
+    expr_ref empty(seq.str.mk_empty(str_sort), m);
+    expr_ref xy(seq.str.mk_concat(x, y), m);
+
+    euf::snode* sx = sg.mk(x);
+    SASSERT(sx);
+    SASSERT(sx->is_var());
+    SASSERT(!sx->is_ground());
+    SASSERT(sx->is_regex_free());
+    SASSERT(!sx->is_nullable());
+    SASSERT(sx->length() == 1);
+
+    euf::snode* se = sg.mk(empty);
+    SASSERT(se);
+    SASSERT(se->is_empty());
+    SASSERT(se->is_ground());
+    SASSERT(se->is_nullable());
+    SASSERT(se->length() == 0);
+
+    euf::snode* sxy = sg.mk(xy);
+    SASSERT(sxy);
+    SASSERT(sxy->is_concat());
+    SASSERT(!sxy->is_ground());
+    SASSERT(sxy->length() == 2);
+    SASSERT(sxy->num_args() == 2);
+
+    std::cout << "sgraph:\n";
+    sg.display(std::cout);
+    std::cout << "\n";
+}
+
+// test sgraph: backtracking with push/pop
+static void test_sgraph_backtrack() {
+    std::cout << "test_sgraph_backtrack\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref y(m.mk_const("y", str_sort), m);
+
+    sg.mk(x);
+    unsigned before = sg.num_nodes();
+
+    sg.push();
+
+    expr_ref xy(seq.str.mk_concat(x, y), m);
+    sg.mk(xy);
+    SASSERT(sg.num_nodes() > before);
+
+    sg.pop(1);
+    // y and xy were created inside the scope, so some nodes should be removed
+    // x was created before the scope, so it should persist
+    SASSERT(sg.find(x));
+}
+
+// test seq_plugin: concat associativity is normalized by the plugin
+static void test_seq_plugin_assoc() {
+    std::cout << "test_seq_plugin_assoc\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    euf::egraph& g = sg.get_egraph();
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref a(m.mk_const("a", str_sort), m);
+    expr_ref b(m.mk_const("b", str_sort), m);
+    expr_ref c(m.mk_const("c", str_sort), m);
+
+    // register nodes in egraph
+    // concat(concat(a,b),c) should be merged with concat(a,concat(b,c))
+    expr_ref ab(seq.str.mk_concat(a, b), m);
+    expr_ref ab_c(seq.str.mk_concat(ab, c), m);
+
+    euf::enode* nab_c = get_node(g, seq, ab_c);
+    g.propagate();
+
+    // the plugin should have created a right-associated form and merged
+    std::cout << g << "\n";
+}
+
+// test seq_plugin: empty string elimination
+static void test_seq_plugin_empty() {
+    std::cout << "test_seq_plugin_empty\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    euf::egraph& g = sg.get_egraph();
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref empty(seq.str.mk_empty(str_sort), m);
+    expr_ref xe(seq.str.mk_concat(x, empty), m);
+
+    auto* nxe = get_node(g, seq, xe);
+    auto* nx = g.find(x);
+    g.propagate();
+
+    // concat(x, empty) should be merged with x
+    SASSERT(nxe->get_root() == nx->get_root());
+    std::cout << g << "\n";
+}
+
+// test seq_plugin: Kleene star merging
+// The seq_plugin detects when star bodies are congruent
+// This tests the same_star_body logic at the regex level
+static void test_seq_plugin_star_merge() {
+    std::cout << "test_seq_plugin_star_merge\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    euf::egraph& g = sg.get_egraph();
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+    sort_ref re_sort(seq.re.mk_re(str_sort), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    // re.star(to_re(x))
+    expr_ref to_re_x(seq.re.mk_to_re(x), m);
+    expr_ref star_x(seq.re.mk_star(to_re_x), m);
+    // use regex concat for star * star
+    expr_ref star_star(seq.re.mk_concat(star_x, star_x), m);
+
+    // register in sgraph
+    sg.mk(star_star);
+    euf::snode* s = sg.find(star_x);
+    SASSERT(s && s->is_star());
+    SASSERT(s->is_nullable());
+
+    std::cout << g << "\n";
+}
+
+// test seq_plugin: nullable absorption by .*
+// concat(.*, nullable) should merge with .*
+static void test_seq_plugin_nullable_absorb() {
+    std::cout << "test_seq_plugin_nullable_absorb\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    euf::egraph& g = sg.get_egraph();
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref empty(seq.str.mk_empty(str_sort), m);
+    // concat(x, empty) = x (empty is nullable, exercises nullable check)
+    expr_ref xe(seq.str.mk_concat(x, empty), m);
+
+    auto* nxe = get_node(g, seq, xe);
+    auto* nx = g.find(x);
+    g.propagate();
+
+    // concat(x, empty) should be merged with x (empty elimination)
+    SASSERT(nxe->get_root() == nx->get_root());
+    std::cout << g << "\n";
+}
+
+// test sgraph owns egraph and syncs push/pop
+static void test_sgraph_egraph_sync() {
+    std::cout << "test_sgraph_egraph_sync\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    euf::egraph& g = sg.get_egraph();
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref y(m.mk_const("y", str_sort), m);
+    auto* nx = get_node(g, seq, x);
+    auto* ny = get_node(g, seq, y);
+
+    sg.push();
+
+    g.merge(nx, ny, nullptr);
+    g.propagate();
+    SASSERT(nx->get_root() == ny->get_root());
+
+    sg.pop(1);
+    // after pop, the merge should be undone
+    SASSERT(nx->get_root() != ny->get_root());
+}
+
+void tst_euf_seq_plugin() {
+    s_var = 0; test_sgraph_basic();
+    s_var = 0; test_sgraph_backtrack();
+    s_var = 0; test_seq_plugin_assoc();
+    s_var = 0; test_seq_plugin_empty();
+    s_var = 0; test_seq_plugin_star_merge();
+    s_var = 0; test_seq_plugin_nullable_absorb();
+    s_var = 0; test_sgraph_egraph_sync();
+}
--- a/src/test/euf_sgraph.cpp
+++ b/src/test/euf_sgraph.cpp
@ -0,0 +1,768 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    euf_sgraph.cpp
+
+Abstract:
+
+    Self-contained unit tests for the sgraph string graph layer.
+    Tests snode classification, metadata computation, push/pop
+    backtracking, associativity-respecting hash table, compound
+    node construction, and snode navigation.
+
+--*/
+
+#include "util/util.h"
+#include "ast/euf/euf_sgraph.h"
+#include "ast/reg_decl_plugins.h"
+#include "ast/ast_pp.h"
+#include "ast/arith_decl_plugin.h"
+#include <iostream>
+
+// test classification and metadata for basic string nodes:
+// variables, empty strings, characters, units, and concats
+static void test_sgraph_classify() {
+    std::cout << "test_sgraph_classify\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    // string variable
+    expr_ref x(m.mk_const("x", str_sort), m);
+    euf::snode* sx = sg.mk(x);
+    SASSERT(sx && sx->is_var());
+    SASSERT(!sx->is_ground());
+    SASSERT(sx->is_regex_free());
+    SASSERT(!sx->is_nullable());
+    SASSERT(sx->level() == 1);
+    SASSERT(sx->length() == 1);
+    SASSERT(sx->is_token());
+
+    // empty string
+    expr_ref empty(seq.str.mk_empty(str_sort), m);
+    euf::snode* se = sg.mk(empty);
+    SASSERT(se && se->is_empty());
+    SASSERT(se->is_ground());
+    SASSERT(se->is_nullable());
+    SASSERT(se->level() == 0);
+    SASSERT(se->length() == 0);
+    SASSERT(!se->is_token());
+
+    // character unit with literal char
+    expr_ref ch(seq.str.mk_char('A'), m);
+    expr_ref unit_a(seq.str.mk_unit(ch), m);
+    euf::snode* sca = sg.mk(unit_a);
+    SASSERT(sca && sca->is_char());
+    SASSERT(sca->is_ground());
+    SASSERT(!sca->is_nullable());
+    SASSERT(sca->level() == 1);
+    SASSERT(sca->length() == 1);
+    SASSERT(sca->is_token());
+
+    // concat of two variables
+    expr_ref y(m.mk_const("y", str_sort), m);
+    expr_ref xy(seq.str.mk_concat(x, y), m);
+    euf::snode* sxy = sg.mk(xy);
+    SASSERT(sxy && sxy->is_concat());
+    SASSERT(!sxy->is_ground());
+    SASSERT(sxy->is_regex_free());
+    SASSERT(!sxy->is_nullable());
+    SASSERT(sxy->level() == 2);
+    SASSERT(sxy->length() == 2);
+    SASSERT(sxy->num_args() == 2);
+    SASSERT(!sxy->is_token());
+
+    sg.display(std::cout);
+}
+
+// test classification for regex nodes:
+// star, union, intersection, complement, full_seq, full_char, fail, to_re, in_re
+static void test_sgraph_regex() {
+    std::cout << "test_sgraph_regex\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+
+    // to_re
+    expr_ref to_re_x(seq.re.mk_to_re(x), m);
+    euf::snode* str = sg.mk(to_re_x);
+    SASSERT(str && str->is_to_re());
+    SASSERT(!str->is_regex_free());
+    SASSERT(!str->is_nullable()); // to_re(x) nullable iff x nullable, x is var so not nullable
+    SASSERT(str->num_args() == 1);
+
+    // star
+    expr_ref star_x(seq.re.mk_star(to_re_x), m);
+    euf::snode* ss = sg.mk(star_x);
+    SASSERT(ss && ss->is_star());
+    SASSERT(!ss->is_regex_free());
+    SASSERT(ss->is_nullable()); // star is always nullable
+    SASSERT(ss->num_args() == 1);
+
+    // full_seq (.*)
+    expr_ref full_seq(seq.re.mk_full_seq(str_sort), m);
+    euf::snode* sfs = sg.mk(full_seq);
+    SASSERT(sfs && sfs->is_full_seq());
+    SASSERT(sfs->is_ground());
+    SASSERT(sfs->is_nullable());
+
+    // full_char (.)
+    expr_ref full_char(seq.re.mk_full_char(str_sort), m);
+    euf::snode* sfc = sg.mk(full_char);
+    SASSERT(sfc && sfc->is_full_char());
+    SASSERT(sfc->is_ground());
+    SASSERT(!sfc->is_nullable());
+
+    // empty set, fail
+    sort_ref re_sort(seq.re.mk_re(str_sort), m);
+    expr_ref empty_set(seq.re.mk_empty(re_sort), m);
+    euf::snode* sfail = sg.mk(empty_set);
+    SASSERT(sfail && sfail->is_fail());
+    SASSERT(!sfail->is_nullable());
+
+    // union: to_re(x) | star(to_re(x)), nullable because star is
+    expr_ref re_union(seq.re.mk_union(to_re_x, star_x), m);
+    euf::snode* su = sg.mk(re_union);
+    SASSERT(su && su->is_union());
+    SASSERT(su->is_nullable()); // star_x is nullable
+
+    // intersection: to_re(x) & star(to_re(x)), nullable only if both are
+    expr_ref re_inter(seq.re.mk_inter(to_re_x, star_x), m);
+    euf::snode* si = sg.mk(re_inter);
+    SASSERT(si && si->is_intersect());
+    SASSERT(!si->is_nullable()); // to_re(x) is not nullable
+
+    // complement of to_re(x): nullable because to_re(x) is not nullable
+    expr_ref re_comp(seq.re.mk_complement(to_re_x), m);
+    euf::snode* sc = sg.mk(re_comp);
+    SASSERT(sc && sc->is_complement());
+    SASSERT(sc->is_nullable()); // complement of non-nullable is nullable
+
+    // in_re
+    expr_ref in_re(seq.re.mk_in_re(x, star_x), m);
+    euf::snode* sir = sg.mk(in_re);
+    SASSERT(sir && sir->is_in_re());
+    SASSERT(!sir->is_regex_free());
+
+    sg.display(std::cout);
+}
+
+// test power node classification and metadata
+static void test_sgraph_power() {
+    std::cout << "test_sgraph_power\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    arith_util arith(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref n(arith.mk_int(3), m);
+    expr_ref xn(seq.str.mk_power(x, n), m);
+
+    euf::snode* sp = sg.mk(xn);
+    SASSERT(sp && sp->is_power());
+    SASSERT(!sp->is_ground()); // base x is not ground
+    SASSERT(sp->is_regex_free());
+    SASSERT(!sp->is_nullable()); // base x is not nullable
+    SASSERT(sp->num_args() >= 1);
+
+    sg.display(std::cout);
+}
+
+// test push/pop backtracking: nodes created inside a scope
+// are removed on pop, nodes before persist
+static void test_sgraph_push_pop() {
+    std::cout << "test_sgraph_push_pop\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref y(m.mk_const("y", str_sort), m);
+    expr_ref z(m.mk_const("z", str_sort), m);
+
+    // create x before any scope
+    sg.mk(x);
+    unsigned before = sg.num_nodes();
+    SASSERT(sg.find(x));
+
+    sg.push();
+
+    // create y and concat(x,y) inside scope
+    expr_ref xy(seq.str.mk_concat(x, y), m);
+    sg.mk(xy);
+    SASSERT(sg.num_nodes() > before);
+    SASSERT(sg.find(y));
+    SASSERT(sg.find(xy));
+
+    sg.pop(1);
+
+    // x persists, y and xy removed
+    SASSERT(sg.find(x));
+    SASSERT(!sg.find(y));
+    SASSERT(!sg.find(xy));
+    SASSERT(sg.num_nodes() == before);
+}
+
+// test nested push/pop with multiple scopes
+static void test_sgraph_nested_scopes() {
+    std::cout << "test_sgraph_nested_scopes\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref a(m.mk_const("a", str_sort), m);
+    expr_ref b(m.mk_const("b", str_sort), m);
+    expr_ref c(m.mk_const("c", str_sort), m);
+
+    sg.mk(a);
+    unsigned n0 = sg.num_nodes();
+
+    sg.push();
+    sg.mk(b);
+    unsigned n1 = sg.num_nodes();
+
+    sg.push();
+    sg.mk(c);
+    unsigned n2 = sg.num_nodes();
+    SASSERT(n2 > n1 && n1 > n0);
+
+    // pop inner scope, c goes away
+    sg.pop(1);
+    SASSERT(sg.num_nodes() == n1);
+    SASSERT(sg.find(a));
+    SASSERT(sg.find(b));
+    SASSERT(!sg.find(c));
+
+    // pop outer scope, b goes away
+    sg.pop(1);
+    SASSERT(sg.num_nodes() == n0);
+    SASSERT(sg.find(a));
+    SASSERT(!sg.find(b));
+}
+
+// test that find returns the same snode for the same expression
+static void test_sgraph_find_idempotent() {
+    std::cout << "test_sgraph_find_idempotent\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    euf::snode* s1 = sg.mk(x);
+    euf::snode* s2 = sg.mk(x);  // calling mk again returns same node
+    SASSERT(s1 == s2);
+    SASSERT(s1 == sg.find(x));
+}
+
+// test mk_concat: empty absorption, node construction via mk(concat_expr)
+static void test_sgraph_mk_concat() {
+    std::cout << "test_sgraph_mk_concat\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref y(m.mk_const("y", str_sort), m);
+    expr_ref empty(seq.str.mk_empty(str_sort), m);
+
+    euf::snode* sx = sg.mk(x);
+    euf::snode* sy = sg.mk(y);
+    euf::snode* se = sg.mk(empty);
+
+    // concat with empty yields the non-empty side at sgraph level
+    // (empty absorption is a property of the expression, checked via mk)
+    SASSERT(se && se->is_empty());
+
+    // normal concat via expression
+    expr_ref xy(seq.str.mk_concat(x, y), m);
+    euf::snode* sxy = sg.mk(xy);
+    SASSERT(sxy && sxy->is_concat());
+    SASSERT(sxy->num_args() == 2);
+    SASSERT(sxy->arg(0) == sx);
+    SASSERT(sxy->arg(1) == sy);
+
+    // calling mk again with same expr returns same node
+    euf::snode* sxy2 = sg.mk(xy);
+    SASSERT(sxy == sxy2);
+}
+
+// test power node construction via mk(power_expr)
+static void test_sgraph_mk_power() {
+    std::cout << "test_sgraph_mk_power\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    arith_util arith(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref n(arith.mk_int(5), m);
+    expr_ref xn(seq.str.mk_power(x, n), m);
+
+    euf::snode* sx = sg.mk(x);
+    euf::snode* sp = sg.mk(xn);
+    SASSERT(sp && sp->is_power());
+    SASSERT(sp->num_args() == 2);
+    SASSERT(sp->arg(0) == sx);
+
+    // calling mk again returns same node
+    euf::snode* sp2 = sg.mk(xn);
+    SASSERT(sp == sp2);
+}
+
+// test snode first/last navigation on concat trees
+static void test_sgraph_first_last() {
+    std::cout << "test_sgraph_first_last\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref a(m.mk_const("a", str_sort), m);
+    expr_ref b(m.mk_const("b", str_sort), m);
+    expr_ref c(m.mk_const("c", str_sort), m);
+
+    euf::snode* sa = sg.mk(a);
+    euf::snode* sb = sg.mk(b);
+    euf::snode* sc = sg.mk(c);
+
+    // concat(concat(a,b),c): first=a, last=c
+    expr_ref ab(seq.str.mk_concat(a, b), m);
+    expr_ref ab_c(seq.str.mk_concat(ab, c), m);
+    euf::snode* sab_c = sg.mk(ab_c);
+    SASSERT(sab_c->first() == sa);
+    SASSERT(sab_c->last() == sc);
+
+    // concat(a,concat(b,c)): first=a, last=c
+    expr_ref bc(seq.str.mk_concat(b, c), m);
+    expr_ref a_bc(seq.str.mk_concat(a, bc), m);
+    euf::snode* sa_bc = sg.mk(a_bc);
+    SASSERT(sa_bc->first() == sa);
+    SASSERT(sa_bc->last() == sc);
+
+    // single node: first and last are self
+    SASSERT(sa->first() == sa);
+    SASSERT(sa->last() == sa);
+}
+
+// test concat metadata propagation:
+// ground, regex_free, nullable, level, length
+static void test_sgraph_concat_metadata() {
+    std::cout << "test_sgraph_concat_metadata\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref empty(seq.str.mk_empty(str_sort), m);
+    expr_ref ch(seq.str.mk_char('Z'), m);
+    expr_ref unit_z(seq.str.mk_unit(ch), m);
+
+    euf::snode* sx = sg.mk(x);
+    euf::snode* se = sg.mk(empty);
+    euf::snode* sz = sg.mk(unit_z);
+
+    // concat(x, unit('Z')): not ground (x is variable), regex_free, not nullable
+    expr_ref xz(seq.str.mk_concat(x, unit_z), m);
+    euf::snode* sxz = sg.mk(xz);
+    SASSERT(!sxz->is_ground());
+    SASSERT(sxz->is_regex_free());
+    SASSERT(!sxz->is_nullable());
+    SASSERT(sxz->length() == 2);
+    SASSERT(sxz->level() == 2);
+
+    // concat(empty, empty): nullable (both empty)
+    expr_ref empty2(seq.str.mk_concat(empty, empty), m);
+    euf::snode* see = sg.mk(empty2);
+    SASSERT(see->is_nullable());
+    SASSERT(see->is_ground());
+    SASSERT(see->length() == 0);
+
+    // deep chain: concat(concat(x,x),concat(x,x)) has level 3, length 4
+    expr_ref xx(seq.str.mk_concat(x, x), m);
+    expr_ref xxxx(seq.str.mk_concat(xx, xx), m);
+    euf::snode* sxxxx = sg.mk(xxxx);
+    SASSERT(sxxxx->level() == 3);
+    SASSERT(sxxxx->length() == 4);
+}
+
+// test display does not crash
+static void test_sgraph_display() {
+    std::cout << "test_sgraph_display\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    expr_ref x(m.mk_const("x", str_sort), m);
+    expr_ref y(m.mk_const("y", str_sort), m);
+    expr_ref xy(seq.str.mk_concat(x, y), m);
+    sg.mk(xy);
+
+    std::ostringstream oss;
+    sg.display(oss);
+    std::string out = oss.str();
+    SASSERT(out.find("var") != std::string::npos);
+    SASSERT(out.find("concat") != std::string::npos);
+    std::cout << out;
+}
+
+// test sgraph factory methods: mk_var, mk_char, mk_empty, mk_concat
+static void test_sgraph_factory() {
+    std::cout << "test_sgraph_factory\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    // mk_var
+    euf::snode* x = sg.mk_var(symbol("x"));
+    SASSERT(x && x->is_var());
+    SASSERT(!x->is_ground());
+    SASSERT(x->length() == 1);
+
+    // mk_char
+    euf::snode* a = sg.mk_char('A');
+    SASSERT(a && a->is_char());
+    SASSERT(a->is_ground());
+    SASSERT(a->length() == 1);
+
+    // mk_empty
+    euf::snode* e = sg.mk_empty();
+    SASSERT(e && e->is_empty());
+    SASSERT(e->is_nullable());
+    SASSERT(e->length() == 0);
+
+    // mk_concat with empty absorption
+    euf::snode* xe = sg.mk_concat(x, e);
+    SASSERT(xe == x);
+    euf::snode* ex = sg.mk_concat(e, x);
+    SASSERT(ex == x);
+
+    // mk_concat of two variables
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* xy = sg.mk_concat(x, y);
+    SASSERT(xy && xy->is_concat());
+    SASSERT(xy->length() == 2);
+    SASSERT(xy->arg(0) == x);
+    SASSERT(xy->arg(1) == y);
+
+    // mk_concat of multiple characters
+    euf::snode* b = sg.mk_char('B');
+    euf::snode* c = sg.mk_char('C');
+    euf::snode* abc = sg.mk_concat(sg.mk_concat(a, b), c);
+    SASSERT(abc->length() == 3);
+    SASSERT(abc->is_ground());
+    SASSERT(abc->first() == a);
+    SASSERT(abc->last() == c);
+}
+
+// test snode::at() and snode::collect_tokens()
+static void test_sgraph_indexing() {
+    std::cout << "test_sgraph_indexing\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* b = sg.mk_char('B');
+    euf::snode* c = sg.mk_char('C');
+    euf::snode* x = sg.mk_var(symbol("x"));
+
+    // build concat(concat(a, b), concat(c, x)) => [A, B, C, x]
+    euf::snode* ab = sg.mk_concat(a, b);
+    euf::snode* cx = sg.mk_concat(c, x);
+    euf::snode* abcx = sg.mk_concat(ab, cx);
+
+    SASSERT(abcx->length() == 4);
+
+    // test at()
+    SASSERT(abcx->at(0) == a);
+    SASSERT(abcx->at(1) == b);
+    SASSERT(abcx->at(2) == c);
+    SASSERT(abcx->at(3) == x);
+    SASSERT(abcx->at(4) == nullptr); // out of bounds
+
+    // test collect_tokens()
+    euf::snode_vector tokens;
+    abcx->collect_tokens(tokens);
+    SASSERT(tokens.size() == 4);
+    SASSERT(tokens[0] == a);
+    SASSERT(tokens[1] == b);
+    SASSERT(tokens[2] == c);
+    SASSERT(tokens[3] == x);
+
+    // single token: at(0) is self
+    SASSERT(a->at(0) == a);
+    SASSERT(a->at(1) == nullptr);
+
+    // empty: at(0) is nullptr
+    euf::snode* e = sg.mk_empty();
+    SASSERT(e->at(0) == nullptr);
+    euf::snode_vector empty_tokens;
+    e->collect_tokens(empty_tokens);
+    SASSERT(empty_tokens.empty());
+}
+
+// test sgraph drop operations
+static void test_sgraph_drop() {
+    std::cout << "test_sgraph_drop\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* b = sg.mk_char('B');
+    euf::snode* c = sg.mk_char('C');
+    euf::snode* d = sg.mk_char('D');
+
+    // build concat(concat(a, b), concat(c, d)) => [A, B, C, D]
+    euf::snode* ab = sg.mk_concat(a, b);
+    euf::snode* cd = sg.mk_concat(c, d);
+    euf::snode* abcd = sg.mk_concat(ab, cd);
+
+    SASSERT(abcd->length() == 4);
+
+    // drop_first: [A, B, C, D] => [B, C, D]
+    euf::snode* bcd = sg.drop_first(abcd);
+    SASSERT(bcd->length() == 3);
+    SASSERT(bcd->first() == b);
+    SASSERT(bcd->last() == d);
+
+    // drop_last: [A, B, C, D] => [A, B, C]
+    euf::snode* abc = sg.drop_last(abcd);
+    SASSERT(abc->length() == 3);
+    SASSERT(abc->first() == a);
+    SASSERT(abc->last() == c);
+
+    // drop_left(2): [A, B, C, D] => [C, D]
+    euf::snode* cd2 = sg.drop_left(abcd, 2);
+    SASSERT(cd2->length() == 2);
+    SASSERT(cd2->first() == c);
+
+    // drop_right(2): [A, B, C, D] => [A, B]
+    euf::snode* ab2 = sg.drop_right(abcd, 2);
+    SASSERT(ab2->length() == 2);
+    SASSERT(ab2->last() == b);
+
+    // drop all: [A, B, C, D] => empty
+    euf::snode* empty = sg.drop_left(abcd, 4);
+    SASSERT(empty->is_empty());
+
+    // drop from single token: [A] => empty
+    euf::snode* e = sg.drop_first(a);
+    SASSERT(e->is_empty());
+
+    // drop from empty: no change
+    euf::snode* ee = sg.drop_first(sg.mk_empty());
+    SASSERT(ee->is_empty());
+}
+
+// test sgraph substitution
+static void test_sgraph_subst() {
+    std::cout << "test_sgraph_subst\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* b = sg.mk_char('B');
+
+    // concat(x, concat(a, x)) with x -> b gives concat(b, concat(a, b))
+    euf::snode* ax = sg.mk_concat(a, x);
+    euf::snode* xax = sg.mk_concat(x, ax);
+    SASSERT(xax->length() == 3);
+
+    euf::snode* result = sg.subst(xax, x, b);
+    SASSERT(result->length() == 3);
+    SASSERT(result->first() == b);
+    SASSERT(result->last() == b);
+    SASSERT(result->at(1) == a); // middle is still 'A'
+
+    // substitution of non-occurring variable is identity
+    euf::snode* same = sg.subst(xax, y, b);
+    SASSERT(same == xax);
+
+    // substitution of variable with empty
+    euf::snode* e = sg.mk_empty();
+    euf::snode* collapsed = sg.subst(xax, x, e);
+    SASSERT(collapsed->length() == 1); // just 'a' remains
+    SASSERT(collapsed == a);
+}
+
+// test complex concatenation creation, merging and simplification
+static void test_sgraph_complex_concat() {
+    std::cout << "test_sgraph_complex_concat\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    // build a string "HELLO" = concat(H, concat(E, concat(L, concat(L, O))))
+    euf::snode* h = sg.mk_char('H');
+    euf::snode* e = sg.mk_char('E');
+    euf::snode* l = sg.mk_char('L');
+    euf::snode* o = sg.mk_char('O');
+
+    euf::snode* lo = sg.mk_concat(l, o);
+    euf::snode* llo = sg.mk_concat(l, lo);
+    euf::snode* ello = sg.mk_concat(e, llo);
+    euf::snode* hello = sg.mk_concat(h, ello);
+
+    SASSERT(hello->length() == 5);
+    SASSERT(hello->is_ground());
+    SASSERT(hello->first() == h);
+    SASSERT(hello->last() == o);
+
+    // index into "HELLO"
+    SASSERT(hello->at(0) == h);
+    SASSERT(hello->at(1) == e);
+    SASSERT(hello->at(2) == l);
+    SASSERT(hello->at(3) == l);
+    SASSERT(hello->at(4) == o);
+
+    // drop first 2 from "HELLO" => "LLO"
+    euf::snode* llo2 = sg.drop_left(hello, 2);
+    SASSERT(llo2->length() == 3);
+    SASSERT(llo2->first() == l);
+
+    // drop last 3 from "HELLO" => "HE"
+    euf::snode* he = sg.drop_right(hello, 3);
+    SASSERT(he->length() == 2);
+    SASSERT(he->first() == h);
+    SASSERT(he->last() == e);
+
+    // mixed variables and characters: concat(x, "AB", y)
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* b = sg.mk_char('B');
+    euf::snode* ab = sg.mk_concat(a, b);
+    euf::snode* xab = sg.mk_concat(x, ab);
+    euf::snode* xaby = sg.mk_concat(xab, y);
+
+    SASSERT(xaby->length() == 4);
+    SASSERT(!xaby->is_ground());
+    SASSERT(xaby->at(0) == x);
+    SASSERT(xaby->at(1) == a);
+    SASSERT(xaby->at(2) == b);
+    SASSERT(xaby->at(3) == y);
+}
+
+// test Brzozowski derivative computation
+static void test_sgraph_brzozowski() {
+    std::cout << "test_sgraph_brzozowski\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    // derivative of re.star(to_re("a")) w.r.t. 'a'
+    // d/da (a*) = a*
+    expr_ref ch_a(seq.str.mk_char('a'), m);
+    expr_ref unit_a(seq.str.mk_unit(ch_a), m);
+    expr_ref to_re_a(seq.re.mk_to_re(unit_a), m);
+    expr_ref star_a(seq.re.mk_star(to_re_a), m);
+
+    euf::snode* s_star_a = sg.mk(star_a);
+    euf::snode* s_unit_a = sg.mk(unit_a);
+
+    euf::snode* deriv = sg.brzozowski_deriv(s_star_a, s_unit_a);
+    SASSERT(deriv != nullptr);
+    std::cout << "  d/da(a*) kind: " << (int)deriv->kind() << "\n";
+
+    // derivative of re.empty w.r.t. 'a' should be re.empty
+    sort_ref re_sort(seq.re.mk_re(str_sort), m);
+    expr_ref re_empty(seq.re.mk_empty(re_sort), m);
+    euf::snode* s_empty = sg.mk(re_empty);
+    euf::snode* deriv_empty = sg.brzozowski_deriv(s_empty, s_unit_a);
+    SASSERT(deriv_empty != nullptr);
+    SASSERT(deriv_empty->is_fail()); // derivative of empty set is empty set
+    std::cout << "  d/da(empty) kind: " << (int)deriv_empty->kind() << "\n";
+
+    sg.display(std::cout);
+}
+
+// test minterm computation
+static void test_sgraph_minterms() {
+    std::cout << "test_sgraph_minterms\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    // simple regex with no character predicates: re.all (.*)
+    expr_ref re_all(seq.re.mk_full_seq(str_sort), m);
+    euf::snode* s_re_all = sg.mk(re_all);
+
+    euf::snode_vector minterms;
+    sg.compute_minterms(s_re_all, minterms);
+    // no predicates => single minterm (full_char)
+    SASSERT(minterms.size() == 1);
+    std::cout << "  re.all minterms: " << minterms.size() << "\n";
+}
+
+void tst_euf_sgraph() {
+    test_sgraph_classify();
+    test_sgraph_regex();
+    test_sgraph_power();
+    test_sgraph_push_pop();
+    test_sgraph_nested_scopes();
+    test_sgraph_find_idempotent();
+    test_sgraph_mk_concat();
+    test_sgraph_mk_power();
+    test_sgraph_first_last();
+    test_sgraph_concat_metadata();
+    test_sgraph_display();
+    test_sgraph_factory();
+    test_sgraph_indexing();
+    test_sgraph_drop();
+    test_sgraph_subst();
+    test_sgraph_complex_concat();
+    test_sgraph_brzozowski();
+    test_sgraph_minterms();
+}
--- a/src/test/main.cpp
+++ b/src/test/main.cpp
@ -281,9 +281,12 @@ int main(int argc, char ** argv) {
    TST(distribution);
    TST(euf_bv_plugin);
    TST(euf_arith_plugin);
+    TST(euf_sgraph);
+    TST(euf_seq_plugin);
    TST(sls_test);
    TST(scoped_vector);
    TST(sls_seq_plugin);
+    TST(seq_nielsen);
    TST(ho_matcher);
    TST(finite_set);
    TST(finite_set_rewriter);
--- a/src/test/seq_nielsen.cpp
+++ b/src/test/seq_nielsen.cpp
@ -0,0 +1,479 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_nielsen.cpp
+
+Abstract:
+
+    Unit tests for the Nielsen graph framework (seq_nielsen.h).
+    Tests constraint types, node/edge construction, substitution
+    application, and graph population.
+
+--*/
+
+#include "util/util.h"
+#include "ast/euf/euf_egraph.h"
+#include "ast/euf/euf_sgraph.h"
+#include "smt/seq/seq_nielsen.h"
+#include "ast/reg_decl_plugins.h"
+#include "ast/ast_pp.h"
+#include <iostream>
+
+// test dep_tracker basic operations
+static void test_dep_tracker() {
+    std::cout << "test_dep_tracker\n";
+
+    // empty tracker
+    seq::dep_tracker d0;
+    SASSERT(d0.empty());
+
+    // tracker with one bit set
+    seq::dep_tracker d1(8, 3);
+    SASSERT(!d1.empty());
+
+    // tracker with another bit
+    seq::dep_tracker d2(8, 5);
+    SASSERT(!d2.empty());
+
+    // merge
+    seq::dep_tracker d3 = d1;
+    d3.merge(d2);
+    SASSERT(!d3.empty());
+    SASSERT(d3.is_superset(d1));
+    SASSERT(d3.is_superset(d2));
+    SASSERT(!d1.is_superset(d2));
+
+    // equality
+    seq::dep_tracker d4(8, 3);
+    SASSERT(d1 == d4);
+    SASSERT(d1 != d2);
+}
+
+// test str_eq constraint creation and operations
+static void test_str_eq() {
+    std::cout << "test_str_eq\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* e = sg.mk_empty();
+
+    seq::dep_tracker dep(4, 0);
+
+    // basic equality
+    seq::str_eq eq1(x, y, dep);
+    SASSERT(!eq1.is_trivial());
+    SASSERT(eq1.contains_var(x));
+    SASSERT(eq1.contains_var(y));
+    SASSERT(!eq1.contains_var(a));
+
+    // trivial equality: same node
+    seq::str_eq eq2(x, x, dep);
+    SASSERT(eq2.is_trivial());
+
+    // trivial equality: both empty
+    seq::str_eq eq3(e, e, dep);
+    SASSERT(eq3.is_trivial());
+
+    // sorting: lower id first
+    seq::str_eq eq4(y, x, dep);
+    eq4.sort();
+    SASSERT(eq4.m_lhs->id() <= eq4.m_rhs->id());
+
+    // contains_var with concat
+    euf::snode* xa = sg.mk_concat(x, a);
+    seq::str_eq eq5(xa, y, dep);
+    SASSERT(eq5.contains_var(x));
+    SASSERT(eq5.contains_var(y));
+    SASSERT(!eq5.contains_var(e));
+}
+
+// test str_mem constraint creation and operations
+static void test_str_mem() {
+    std::cout << "test_str_mem\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* e = sg.mk_empty();
+
+    // create a regex: re.all (.*)
+    expr_ref star_fc(seq.re.mk_full_seq(str_sort), m);
+    euf::snode* regex = sg.mk(star_fc);
+
+    seq::dep_tracker dep(4, 1);
+    seq::str_mem mem(x, regex, e, 0, dep);
+
+    // x in regex is primitive (x is a single variable)
+    SASSERT(mem.is_primitive());
+    SASSERT(mem.contains_var(x));
+
+    // concatenation is not primitive
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* xa = sg.mk_concat(x, a);
+    seq::str_mem mem2(xa, regex, e, 1, dep);
+    SASSERT(!mem2.is_primitive());
+    SASSERT(mem2.contains_var(x));
+}
+
+// test nielsen_subst
+static void test_nielsen_subst() {
+    std::cout << "test_nielsen_subst\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* e = sg.mk_empty();
+
+    seq::dep_tracker dep;
+
+    // eliminating substitution: x -> A (x does not appear in A)
+    seq::nielsen_subst s1(x, a, dep);
+    SASSERT(s1.is_eliminating());
+
+    // eliminating substitution: x -> empty
+    seq::nielsen_subst s2(x, e, dep);
+    SASSERT(s2.is_eliminating());
+
+    // non-eliminating substitution: x -> concat(A, x)
+    euf::snode* ax = sg.mk_concat(a, x);
+    seq::nielsen_subst s3(x, ax, dep);
+    SASSERT(!s3.is_eliminating());
+
+    // eliminating substitution: x -> y (x not in y)
+    seq::nielsen_subst s4(x, y, dep);
+    SASSERT(s4.is_eliminating());
+}
+
+// test nielsen_node creation and constraint management
+static void test_nielsen_node() {
+    std::cout << "test_nielsen_node\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+
+    seq::nielsen_node* root = ng.mk_node();
+    SASSERT(root->id() == 0);
+    SASSERT(root->str_eqs().empty());
+    SASSERT(root->str_mems().empty());
+    SASSERT(root->is_progress());
+    SASSERT(root->reason() == seq::backtrack_reason::unevaluated);
+
+    // add constraints
+    seq::dep_tracker dep;
+    root->add_str_eq(seq::str_eq(x, y, dep));
+    root->add_str_eq(seq::str_eq(sg.mk_concat(x, a), sg.mk_concat(a, y), dep));
+    SASSERT(root->str_eqs().size() == 2);
+
+    // regex membership
+    expr_ref re_all(seq.re.mk_full_seq(str_sort), m);
+    euf::snode* regex = sg.mk(re_all);
+    euf::snode* empty = sg.mk_empty();
+    root->add_str_mem(seq::str_mem(x, regex, empty, 0, dep));
+    SASSERT(root->str_mems().size() == 1);
+
+    // clone from parent
+    seq::nielsen_node* child = ng.mk_node();
+    child->clone_from(*root);
+    SASSERT(child->str_eqs().size() == 2);
+    SASSERT(child->str_mems().size() == 1);
+    SASSERT(child->id() == 1);
+}
+
+// test nielsen_edge creation
+static void test_nielsen_edge() {
+    std::cout << "test_nielsen_edge\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+
+    // create parent and child nodes
+    seq::nielsen_node* parent = ng.mk_node();
+    seq::dep_tracker dep;
+    parent->add_str_eq(seq::str_eq(x, y, dep));
+
+    seq::nielsen_node* child = ng.mk_child(parent);
+
+    // create edge with substitution x -> A
+    seq::nielsen_edge* edge = ng.mk_edge(parent, child, true);
+    edge->add_subst(seq::nielsen_subst(x, a, dep));
+
+    SASSERT(edge->src() == parent);
+    SASSERT(edge->tgt() == child);
+    SASSERT(edge->is_progress());
+    SASSERT(edge->subst().size() == 1);
+    SASSERT(parent->outgoing().size() == 1);
+    SASSERT(parent->outgoing()[0] == edge);
+}
+
+// test nielsen_graph population from external constraints
+static void test_nielsen_graph_populate() {
+    std::cout << "test_nielsen_graph_populate\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+
+    // add string equality: x = y
+    ng.add_str_eq(x, y);
+    SASSERT(ng.root() != nullptr);
+    SASSERT(ng.root()->str_eqs().size() == 1);
+    SASSERT(ng.num_nodes() == 1);
+
+    // add regex membership: x in .*
+    expr_ref re_all(seq.re.mk_full_seq(str_sort), m);
+    euf::snode* regex = sg.mk(re_all);
+    ng.add_str_mem(x, regex);
+    SASSERT(ng.root()->str_mems().size() == 1);
+    SASSERT(ng.root()->str_mems()[0].m_id == 0);
+
+    // add another equality: concat(x, A) = concat(A, y)
+    euf::snode* xa = sg.mk_concat(x, a);
+    euf::snode* ay = sg.mk_concat(a, y);
+    ng.add_str_eq(xa, ay);
+    SASSERT(ng.root()->str_eqs().size() == 2);
+
+    // display for visual inspection
+    ng.display(std::cout);
+}
+
+// test substitution application on nielsen_node
+static void test_nielsen_subst_apply() {
+    std::cout << "test_nielsen_subst_apply\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* b = sg.mk_char('B');
+    euf::snode* e = sg.mk_empty();
+
+    // create node with constraint: concat(x, A) = concat(B, y)
+    seq::nielsen_node* node = ng.mk_node();
+    seq::dep_tracker dep;
+    euf::snode* xa = sg.mk_concat(x, a);
+    euf::snode* by = sg.mk_concat(b, y);
+    node->add_str_eq(seq::str_eq(xa, by, dep));
+
+    // apply substitution x -> empty
+    seq::nielsen_subst s(x, e, dep);
+    node->apply_subst(sg, s);
+
+    // after x -> empty: lhs should be just A, rhs still concat(B, y)
+    SASSERT(node->str_eqs().size() == 1);
+    auto const& eq = node->str_eqs()[0];
+    // a should remain (after x replaced with empty, concat(empty, A) = A)
+    std::cout << "  lhs len=" << eq.m_lhs->length() << " rhs len=" << eq.m_rhs->length() << "\n";
+}
+
+// test Nielsen graph reset
+static void test_nielsen_graph_reset() {
+    std::cout << "test_nielsen_graph_reset\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+
+    ng.add_str_eq(x, y);
+    SASSERT(ng.num_nodes() == 1);
+    SASSERT(ng.root() != nullptr);
+
+    ng.reset();
+    SASSERT(ng.num_nodes() == 0);
+    SASSERT(ng.root() == nullptr);
+}
+
+// test constructing a basic Nielsen expansion tree
+// x = Ay: split into x -> eps (progress) or x -> Ax (non-progress)
+static void test_nielsen_expansion() {
+    std::cout << "test_nielsen_expansion\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+    euf::snode* a = sg.mk_char('A');
+    euf::snode* ay = sg.mk_concat(a, y);
+
+    // root: x = Ay
+    ng.add_str_eq(x, ay);
+    seq::nielsen_node* root = ng.root();
+    SASSERT(root->str_eqs().size() == 1);
+
+    seq::dep_tracker dep;
+
+    // branch 1: x -> eps (eliminating, progress)
+    euf::snode* e = sg.mk_empty();
+    seq::nielsen_node* child1 = ng.mk_child(root);
+    seq::nielsen_subst s1(x, e, dep);
+    child1->apply_subst(sg, s1);
+    seq::nielsen_edge* edge1 = ng.mk_edge(root, child1, true);
+    edge1->add_subst(s1);
+
+    // branch 2: x -> Ax (non-eliminating, non-progress)
+    euf::snode* ax = sg.mk_concat(a, x);
+    seq::nielsen_node* child2 = ng.mk_child(root);
+    seq::nielsen_subst s2(x, ax, dep);
+    child2->apply_subst(sg, s2);
+    seq::nielsen_edge* edge2 = ng.mk_edge(root, child2, false);
+    edge2->add_subst(s2);
+
+    SASSERT(ng.num_nodes() == 3);
+    SASSERT(root->outgoing().size() == 2);
+    SASSERT(edge1->is_progress());
+    SASSERT(!edge2->is_progress());
+
+    // verify substitution effects on child1: eps = Ay becomes empty = Ay
+    SASSERT(child1->str_eqs().size() == 1);
+
+    ng.display(std::cout);
+}
+
+// test run index management
+static void test_run_idx() {
+    std::cout << "test_run_idx\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    seq::nielsen_graph ng(sg);
+    SASSERT(ng.run_idx() == 0);
+
+    ng.inc_run_idx();
+    SASSERT(ng.run_idx() == 1);
+
+    ng.inc_run_idx();
+    SASSERT(ng.run_idx() == 2);
+}
+
+// test multiple regex memberships
+static void test_multiple_memberships() {
+    std::cout << "test_multiple_memberships\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    seq_util seq(m);
+    sort_ref str_sort(seq.str.mk_string_sort(), m);
+
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+
+    // x in .*
+    expr_ref re_all(seq.re.mk_full_seq(str_sort), m);
+    euf::snode* regex1 = sg.mk(re_all);
+    ng.add_str_mem(x, regex1);
+
+    // x in re.union(to_re("A"), to_re("B"))
+    expr_ref ch_a(seq.str.mk_char('A'), m);
+    expr_ref unit_a(seq.str.mk_unit(ch_a), m);
+    expr_ref ch_b(seq.str.mk_char('B'), m);
+    expr_ref unit_b(seq.str.mk_unit(ch_b), m);
+    expr_ref to_re_a(seq.re.mk_to_re(unit_a), m);
+    expr_ref to_re_b(seq.re.mk_to_re(unit_b), m);
+    expr_ref re_union(seq.re.mk_union(to_re_a, to_re_b), m);
+    euf::snode* regex2 = sg.mk(re_union);
+    ng.add_str_mem(x, regex2);
+
+    SASSERT(ng.root() != nullptr);
+    SASSERT(ng.root()->str_mems().size() == 2);
+    SASSERT(ng.root()->str_mems()[0].m_id == 0);
+    SASSERT(ng.root()->str_mems()[1].m_id == 1);
+
+    ng.display(std::cout);
+}
+
+// test backedge setting (cycle detection support)
+static void test_backedge() {
+    std::cout << "test_backedge\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+
+    seq::nielsen_graph ng(sg);
+
+    euf::snode* x = sg.mk_var(symbol("x"));
+    euf::snode* y = sg.mk_var(symbol("y"));
+
+    ng.add_str_eq(x, y);
+    seq::nielsen_node* root = ng.root();
+    seq::nielsen_node* child = ng.mk_child(root);
+
+    // set backedge from child to root (cycle)
+    child->set_backedge(root);
+    SASSERT(child->backedge() == root);
+    SASSERT(root->backedge() == nullptr);
+}
+
+void tst_seq_nielsen() {
+    test_dep_tracker();
+    test_str_eq();
+    test_str_mem();
+    test_nielsen_subst();
+    test_nielsen_node();
+    test_nielsen_edge();
+    test_nielsen_graph_populate();
+    test_nielsen_subst_apply();
+    test_nielsen_graph_reset();
+    test_nielsen_expansion();
+    test_run_idx();
+    test_multiple_memberships();
+    test_backedge();
+}