Move nseq_regex/state into smt/seq and seq_model into smt/; rename to seq_* prefix (#8984)

* Initial plan * build verified: nseq_regex moved to smt/seq Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * rename nseq_regex/state/model to seq_regex/state/model in smt/seq; add Clemens Eisenhofer as co-author Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * seq_model: remove theory_nseq dependency; get family_id from seq_util Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Add comments for regex enhancements in seq_model Added comments regarding future improvements for regex handling. * Add comments for large exponent handling Add comments for handling large exponents in seq_model.cpp * Revise comments for clarity on sort usage Updated comments to reflect review suggestions regarding the use of the sort of 'n'. * seq_state: remove sgraph dep; seq_model: use snode sort for is_empty; remove NSB review comments Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Update seq_state.h * Remove unnecessary include for smt_context.h * move seq_model from smt/seq/ to smt/; fix seq_state.h add_str_mem typo Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2026-06-10 19:07:18 +00:00 · 2026-03-14 11:45:32 -07:00 · 2026-03-14 11:45:32 -07:00 · 5a3dbaf9f3
commit 5a3dbaf9f3
parent 27f5541b0b
15 changed files with 172 additions and 164 deletions
--- a/src/smt/seq/CMakeLists.txt
+++ b/src/smt/seq/CMakeLists.txt
@ -1,5 +1,7 @@
 z3_add_component(smt_seq
  SOURCES
+    seq_regex.cpp
+    seq_state.cpp
    seq_parikh.cpp
    seq_nielsen.cpp
  COMPONENT_DEPENDENCIES
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@ -21,7 +21,7 @@ Author:

 #include "smt/seq/seq_nielsen.h"
 #include "smt/seq/seq_parikh.h"
-#include "smt/nseq_regex.h"
+#include "smt/seq/seq_regex.h"
 #include "ast/arith_decl_plugin.h"
 #include "ast/ast_pp.h"
 #include "ast/rewriter/seq_rewriter.h"
@ -513,13 +513,13 @@ namespace seq {
        m_sg(sg),
        m_solver(solver),
        m_parikh(alloc(seq_parikh, sg)),
-        m_nseq_regex(alloc(smt::nseq_regex, sg)),
+        m_seq_regex(alloc(seq::seq_regex, sg)),
        m_len_vars(sg.get_manager()) {
    }

    nielsen_graph::~nielsen_graph() {
        dealloc(m_parikh);
-        dealloc(m_nseq_regex);
+        dealloc(m_seq_regex);
        reset();
    }

@ -1798,7 +1798,7 @@ namespace seq {
        // and check if the result intersected with the target regex is empty.
        // Detects infeasible constraints that would otherwise require
        // expensive exploration. Mirrors ZIPT NielsenNode.CheckRegexWidening.
-        if (g.m_nseq_regex) {
+        if (g.m_seq_regex) {
            for (str_mem const& mem : m_str_mem) {
                if (!mem.m_str || !mem.m_regex)
                    continue;
@ -3290,11 +3290,11 @@ namespace seq {
            // 3. Fall back to simple star(R)
            euf::snode* stab_base = nullptr;

-            if (m_nseq_regex && m_nseq_regex->is_self_stabilizing(mem.m_regex)) {
+            if (m_seq_regex && m_seq_regex->is_self_stabilizing(mem.m_regex)) {
                stab_base = mem.m_regex;
            }
-            else if (m_nseq_regex && mem.m_history) {
-                stab_base = m_nseq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history);
+            else if (m_seq_regex && mem.m_history) {
+                stab_base = m_seq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history);
            }

            if (!stab_base) {
@ -3303,12 +3303,12 @@ namespace seq {
            }

            // Register the stabilizer
-            if (m_nseq_regex)
-                m_nseq_regex->add_stabilizer(mem.m_regex, stab_base);
+            if (m_seq_regex)
+                m_seq_regex->add_stabilizer(mem.m_regex, stab_base);

            // Check if stabilizer equals regex → self-stabilizing
-            if (m_nseq_regex && stab_base == mem.m_regex)
-                m_nseq_regex->set_self_stabilizing(mem.m_regex);
+            if (m_seq_regex && stab_base == mem.m_regex)
+                m_seq_regex->set_self_stabilizing(mem.m_regex);

            // Build stab_star = star(stab_base)
            expr* stab_expr = stab_base->get_expr();
@ -3320,7 +3320,7 @@ namespace seq {
                continue;

            // Try subsumption: if L(x_range) ⊆ L(stab_star), drop the constraint
-            if (m_nseq_regex && m_nseq_regex->try_subsume(mem, *node))
+            if (m_seq_regex && m_seq_regex->try_subsume(mem, *node))
                continue;

            // Create child: x → pr · po
@ -4281,7 +4281,7 @@ namespace seq {

    bool nielsen_graph::check_regex_widening(nielsen_node const& node,
                                             euf::snode* str, euf::snode* regex) {
-        if (!str || !regex || !m_nseq_regex)
+        if (!str || !regex || !m_seq_regex)
            return false;
        // Only apply to ground regexes with non-trivial strings
        if (!regex->is_ground())
@ -4313,7 +4313,7 @@ namespace seq {
            }
            else if (tok->is_var()) {
                // Variable → intersection of primitive regex constraints, or Σ*
-                euf::snode* x_range = m_nseq_regex->collect_primitive_regex_intersection(tok, node);
+                euf::snode* x_range = m_seq_regex->collect_primitive_regex_intersection(tok, node);
                if (x_range) {
                    tok_re = x_range;
                } else {
@ -4389,7 +4389,7 @@ namespace seq {
        if (!inter_sn)
            return false;

-        lbool result = m_nseq_regex->is_empty_bfs(inter_sn, 5000);
+        lbool result = m_seq_regex->is_empty_bfs(inter_sn, 5000);
        return result == l_true;
    }

@ -4399,7 +4399,7 @@ namespace seq {
    // -----------------------------------------------------------------------

    bool nielsen_graph::check_leaf_regex(nielsen_node const& node) {
-        if (!m_nseq_regex)
+        if (!m_seq_regex)
            return true;

        // Group str_mem constraints by variable (primitive constraints only)
@ -4421,7 +4421,7 @@ namespace seq {
        for (auto& [var_id, regexes] : var_regexes) {
            if (regexes.size() < 2)
                continue;
-            lbool result = m_nseq_regex->check_intersection_emptiness(regexes, 5000);
+            lbool result = m_seq_regex->check_intersection_emptiness(regexes, 5000);
            if (result == l_true) {
                // Intersection is empty — infeasible
                return false;
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@ -244,8 +244,8 @@ Author:
 #include <map>
 #include "model/model.h"

-namespace smt {
-    class nseq_regex;  // forward declaration (defined in smt/nseq_regex.h)
+namespace seq {
+    class seq_regex;  // forward declaration (defined in smt/seq/seq_regex.h)
 }

 namespace seq {
@ -761,7 +761,7 @@ namespace seq {

        // Regex membership module: stabilizers, emptiness checks, language
        // inclusion, derivatives. Allocated in the constructor; owned by this graph.
-        smt::nseq_regex*              m_nseq_regex = nullptr;
+        seq::seq_regex*              m_seq_regex = nullptr;

        // -----------------------------------------------
        // Modification counter for substitution length tracking.
@ -895,8 +895,8 @@ namespace seq {
        // Caller takes ownership of the returned model pointer.
        bool solve_sat_path_ints(model_ref& mdl);

-        // accessor for the nseq_regex module
-        smt::nseq_regex* nseq_regex_module() const { return m_nseq_regex; }
+        // accessor for the seq_regex module
+        seq::seq_regex* seq_regex_module() const { return m_seq_regex; }

    private:
        search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);
--- a/src/smt/seq/seq_regex.cpp
+++ b/src/smt/seq/seq_regex.cpp
--- a/src/smt/seq/seq_regex.h
+++ b/src/smt/seq/seq_regex.h
@ -0,0 +1,362 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_regex.h
+
+Abstract:
+
+    Lazy regex membership processing for the Nielsen-based string solver.
+
+    Provides Brzozowski derivative computation, ground prefix/suffix
+    consumption, cycle detection in derivation histories, and
+    stabilizer-based subsumption for regex membership constraints.
+
+    Ports the following ZIPT StrMem operations:
+      - SimplifyCharRegex / SimplifyDir (ground prefix/suffix consumption)
+      - ExtractCycle / StabilizerFromCycle (cycle detection and widening)
+      - TrySubsume (stabilizer-based subsumption)
+
+    The class wraps sgraph operations (brzozowski_deriv, compute_minterms,
+    drop_first, etc.) and provides a higher-level interface for
+    nielsen_graph and theory_nseq.
+
+Author:
+
+    Clemens Eisenhofer 2026-03-01
+    Nikolaj Bjorner (nbjorner) 2026-03-01
+
+--*/
+#pragma once
+
+#include "ast/euf/euf_sgraph.h"
+#include "smt/seq/seq_nielsen.h"
+#include "util/uint_set.h"
+#include "util/lbool.h"
+
+namespace seq {
+
+    class seq_regex {
+        euf::sgraph& m_sg;
+
+        // -----------------------------------------------------------------
+        // Stabilizer store (non-backtrackable, persists across solve() calls)
+        // Mirrors ZIPT Environment.stabilizers / selfStabilizing
+        // (Environment.cs:36-37)
+        // -----------------------------------------------------------------
+
+        // Maps regex snode id → list of stabilizer snodes.
+        // Each regex may accumulate multiple stabilizers from different
+        // cycle detections. The list is deduplicated by pointer equality.
+        u_map<ptr_vector<euf::snode>> m_stabilizers;
+
+        // Set of regex snode ids that are self-stabilizing, i.e., the
+        // stabilizer for the regex is the regex itself (e.g., r*).
+        // Mirrors ZIPT Environment.selfStabilizing (Environment.cs:37)
+        uint_set                      m_self_stabilizing;
+
+        // -----------------------------------------------------------------
+        // BFS emptiness check helpers (private)
+        // -----------------------------------------------------------------
+
+        // Collect character boundary points from range predicates and
+        // to_re string literals in a regex. Boundaries partition the
+        // alphabet into equivalence classes where all characters in
+        // the same class produce identical derivatives.
+        void collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const;
+
+        // Build a set of representative character snodes, one per
+        // alphabet equivalence class, derived from the boundary points
+        // of the given regex.
+        void get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps);
+
+    public:
+        seq_regex(euf::sgraph& sg) : m_sg(sg) {}
+
+        euf::sgraph& sg() { return m_sg; }
+
+        // -----------------------------------------------------------------
+        // Stabilizer store API
+        // Mirrors ZIPT Environment stabilizer management
+        // (Environment.cs:114-146)
+        // -----------------------------------------------------------------
+
+        // Reset all stabilizer data. Called at the start of each solve()
+        // invocation if fresh stabilizer state is desired.
+        // Note: stabilizers persist across depth-bound iterations by default;
+        // only call this to clear accumulated state.
+        void reset_stabilizers();
+
+        // Add a stabilizer for a regex. De-duplicates by pointer equality.
+        // Mirrors ZIPT Environment.AddStabilizer (Environment.cs:114-123).
+        void add_stabilizer(euf::snode* regex, euf::snode* stabilizer);
+
+        // Get the union of all stabilizers registered for a regex.
+        // Returns a single re.union snode combining all stabilizers,
+        // or nullptr if no stabilizers exist for the regex.
+        // Mirrors ZIPT Environment.GetStabilizerUnion (Environment.cs:125-128).
+        euf::snode* get_stabilizer_union(euf::snode* regex);
+
+        // Check if any stabilizers have been registered for a regex.
+        bool has_stabilizers(euf::snode* regex) const;
+
+        // Get raw stabilizer list for a regex (read-only).
+        // Returns nullptr if no stabilizers exist.
+        ptr_vector<euf::snode> const* get_stabilizers(euf::snode* regex) const;
+
+        // Mark a regex as self-stabilizing (stabilizer == regex itself).
+        // Mirrors ZIPT Environment.SetSelfStabilizing (Environment.cs:143-146).
+        void set_self_stabilizing(euf::snode* regex);
+
+        // Check if a regex is marked as self-stabilizing.
+        // Mirrors ZIPT Environment.IsSelfStabilizing (Environment.cs:134-141).
+        bool is_self_stabilizing(euf::snode* regex) const;
+
+        // -----------------------------------------------------------------
+        // Self-stabilizing auto-detection and propagation through derivatives
+        // -----------------------------------------------------------------
+
+        // Determine if a regex is inherently self-stabilizing based on its
+        // structure. Returns true for:
+        //   - R* (Kleene star): D(c, R*) = D(c,R)·R*, so R* is its own
+        //     stabilizer regardless of the character.
+        //   - Σ* (full_seq): D(c, Σ*) = Σ*, trivially self-stabilizing.
+        //   - ∅  (fail/empty language): no live derivatives, trivially stable.
+        //   - Complement of full_seq (~Σ* = ∅): also trivially stable.
+        // Does NOT mark the snode; call set_self_stabilizing to persist.
+        bool compute_self_stabilizing(euf::snode* regex) const;
+
+        // After computing a derivative of parent, propagate the self-
+        // stabilizing flag to the derivative result if warranted.
+        // Applies structural rules:
+        //   - If parent is R* → derivative is always self-stabilizing
+        //     (derivative has the form D(c,R)·R* which contains the R* tail).
+        //   - If parent is R·S and S is self-stabilizing → derivative may
+        //     inherit from the self-stabilizing tail.
+        //   - If parent is R|S and both are self-stabilizing → derivative is.
+        //   - If parent is R∩S and both are self-stabilizing → derivative is.
+        //   - If parent is ~R and R is self-stabilizing → derivative is.
+        // Updates the internal self-stabilizing set for the derivative.
+        void propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv);
+
+        // Convenience: compute derivative and propagate self-stabilizing flags.
+        // Equivalent to calling derivative() followed by
+        // propagate_self_stabilizing().
+        euf::snode* derivative_with_propagation(euf::snode* re, euf::snode* elem);
+
+        // -----------------------------------------------------------------
+        // Basic regex predicates
+        // -----------------------------------------------------------------
+
+        // check if regex is the empty language (∅ / re.empty).
+        // performs structural analysis beyond is_fail() to detect
+        // derived emptiness (e.g., union of empties, concat with empty).
+        bool is_empty_regex(euf::snode* re) const;
+
+        // BFS emptiness check over the Brzozowski derivative automaton.
+        // Explores reachable derivative states using representative
+        // characters from each alphabet equivalence class.
+        //   l_true  — regex is definitely empty (no accepting states reachable)
+        //   l_false — regex is definitely non-empty (found a nullable state)
+        //   l_undef — inconclusive (hit exploration bound or failed derivative)
+        // max_states caps the number of explored states to prevent blowup.
+        lbool is_empty_bfs(euf::snode* re, unsigned max_states = 10000);
+
+        // Check emptiness of the intersection of multiple regexes.
+        // Uses BFS over the product of Brzozowski derivative automata.
+        //   l_true  — intersection is definitely empty
+        //   l_false — intersection is definitely non-empty
+        //   l_undef — inconclusive (hit exploration bound)
+        // Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
+        lbool check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
+                                           unsigned max_states = 10000);
+
+        // Check if L(subset_re) ⊆ L(superset_re).
+        // Computed as: subset_re ∩ complement(superset_re) = ∅.
+        // Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
+        lbool is_language_subset(euf::snode* subset_re, euf::snode* superset_re);
+
+        // Collect all primitive regex constraints on variable `var` from
+        // the node's str_mem list and return their intersection as a
+        // single regex snode (using re.inter).
+        // Returns nullptr if no primitive constraints found.
+        euf::snode* collect_primitive_regex_intersection(
+            euf::snode* var, seq::nielsen_node const& node);
+
+        // check if regex is the full language (Σ* / re.all)
+        bool is_full_regex(euf::snode* re) const {
+            return re && re->is_full_seq();
+        }
+
+        // check if regex accepts the empty string
+        bool is_nullable(euf::snode* re) const {
+            return re && re->is_nullable();
+        }
+
+        // check if regex is ground (no string variables)
+        bool is_ground(euf::snode* re) const {
+            return re && re->is_ground();
+        }
+
+        // -----------------------------------------------------------------
+        // Derivative computation
+        // -----------------------------------------------------------------
+
+        // compute Brzozowski derivative of regex w.r.t. character element.
+        // returns nullptr on failure.
+        euf::snode* derivative(euf::snode* re, euf::snode* elem) {
+            return m_sg.brzozowski_deriv(re, elem);
+        }
+
+        // check if all minterms of the regex produce the same Brzozowski
+        // derivative ("uniform derivative").  When this holds, the derivative
+        // is independent of the character value: for any character c,
+        // deriv(regex, c) = result.  This enables deterministic consumption
+        // of symbolic (variable) characters without branching.
+        // Returns the uniform derivative if found, nullptr otherwise.
+        // Mirrors ZIPT's SimplifyCharRegex uniform-derivative fast path.
+        euf::snode* try_uniform_derivative(euf::snode* regex);
+
+        // compute derivative of a str_mem constraint: advance past one character.
+        // the string side is shortened by drop_first and the regex is derived.
+        // Propagates self-stabilizing flags from the parent regex to the derivative.
+        seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) {
+            euf::snode* parent_re = mem.m_regex;
+            euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, elem);
+            if (deriv)
+                propagate_self_stabilizing(parent_re, deriv);
+            euf::snode* new_str = m_sg.drop_first(mem.m_str);
+            return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep);
+        }
+
+        // -----------------------------------------------------------------
+        // Ground prefix/suffix consumption
+        // -----------------------------------------------------------------
+
+        enum class simplify_status { ok, conflict, satisfied };
+
+        // consume ground characters from the front of mem.m_str by computing
+        // Brzozowski derivatives against mem.m_regex.
+        // stops when:
+        //   - the string front is not a concrete character (ok)
+        //   - a derivative produces ∅ (conflict)
+        //   - the string becomes empty and regex is nullable (satisfied)
+        //   - the string becomes empty and regex is not nullable (conflict)
+        // modifies mem in-place.
+        simplify_status simplify_ground_prefix(seq::str_mem& mem);
+
+        // consume ground characters from the back of mem.m_str by computing
+        // reverse derivatives. modifies mem in-place.
+        // (reverse derivatives require regex reversal; this is a best-effort
+        //  simplification that handles the common case of trailing constants.)
+        simplify_status simplify_ground_suffix(seq::str_mem& mem);
+
+        // -----------------------------------------------------------------
+        // Trivial checks
+        // -----------------------------------------------------------------
+
+        // quick check for trivially sat/unsat membership.
+        //   returns  1 if satisfied (empty string in nullable regex, or full regex)
+        //   returns -1 if conflicting (empty string in non-nullable, or ∅ regex)
+        //   returns  0 if undetermined
+        int check_trivial(seq::str_mem const& mem) const;
+
+        // -----------------------------------------------------------------
+        // Minterm and character computation
+        // -----------------------------------------------------------------
+
+        // compute minterms (character class partition) from regex
+        void compute_minterms(euf::snode* re, euf::snode_vector& minterms) {
+            m_sg.compute_minterms(re, minterms);
+        }
+
+        // compute minterms for character splitting, filtering out empty
+        // (fail) minterms.  Minterms are regex character-class expressions
+        // forming a partition of the alphabet; callers use them to drive
+        // fresh-variable creation in character-split modifiers.
+        void get_minterms(euf::snode* regex, euf::snode_vector& minterms);
+
+        // collect concrete first-position characters from a regex.
+        // extracts characters reachable from to_re leaves and simple ranges.
+        void collect_first_chars(euf::snode* re, euf::snode_vector& chars);
+
+        // -----------------------------------------------------------------
+        // Membership processing
+        // -----------------------------------------------------------------
+
+        // process a str_mem constraint by consuming ground characters from
+        // the string front via Brzozowski derivatives.  If the entire ground
+        // prefix is consumed and the constraint is neither satisfied nor
+        // conflicting, the (simplified) constraint is pushed to out_mems
+        // for the Nielsen graph to expand via character-split modifiers.
+        // returns false if the constraint is immediately conflicting
+        // (empty string in non-nullable regex, or derivative yields ∅).
+        bool process_str_mem(seq::str_mem const& mem,
+                             vector<seq::str_mem>& out_mems);
+
+        // -----------------------------------------------------------------
+        // Cycle detection and stabilizers
+        // -----------------------------------------------------------------
+
+        // record current regex in the derivation history of a str_mem.
+        // the history tracks a chain of (regex, id) pairs for cycle detection.
+        // returns the updated str_mem.
+        seq::str_mem record_history(seq::str_mem const& mem, euf::snode* history_re);
+
+        // check if the derivation history of mem contains a cycle, i.e.,
+        // the same regex id appears twice in the history chain.
+        // if found, returns the cycle entry point regex; nullptr otherwise.
+        euf::snode* extract_cycle(seq::str_mem const& mem) const;
+
+        // check if the derivation history exhibits a cycle.
+        // returns true when the current regex matches a previously seen regex
+        // in the history chain. used to trigger stabilizer introduction.
+        bool detect_cycle(seq::str_mem const& mem) const;
+
+        // compute a Kleene star stabilizer from a cycle.
+        // given the regex at the cycle point and the current regex,
+        // builds r* that over-approximates any number of cycle iterations.
+        // returns nullptr if no stabilizer can be computed.
+        euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex,
+                                          euf::snode* current_regex);
+
+        // Strengthened stabilizer construction with sub-cycle detection.
+        // Replays the consumed character tokens from cycle_history on the
+        // cycle regex, detecting sub-cycles (where the derivative loops
+        // back to the original regex). For each sub-cycle, builds a
+        // stabilizer from the interleaved character tokens and filtered
+        // sub-stabilizers.
+        // Returns a union of all sub-cycle stabilizer bodies, or nullptr
+        // if no non-trivial stabilizer can be built.
+        // Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225).
+        euf::snode* strengthened_stabilizer(euf::snode* cycle_regex,
+                                            euf::snode* cycle_history);
+
+        // Get filtered stabilizer star: for regex state re, retrieve
+        // existing stabilizers, filter out those whose language can
+        // start with any character in excluded_char, and wrap the
+        // remaining in star(union(...)).
+        // Returns nullptr (or empty-equivalent) if no valid stabilizers.
+        // Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243).
+        euf::snode* get_filtered_stabilizer_star(euf::snode* re,
+                                                  euf::snode* excluded_char);
+
+        // Extract the cycle portion of a str_mem's history by comparing
+        // the current history with an ancestor's history length.
+        // Returns the sub-sequence of tokens consumed since the ancestor,
+        // or nullptr if the history did not advance.
+        euf::snode* extract_cycle_history(seq::str_mem const& current,
+                                           seq::str_mem const& ancestor);
+
+        // try to subsume a str_mem constraint using stabilizer-based
+        // reasoning. Enhanced version: checks if the leading variable's
+        // language (intersection of all its primitive regex constraints)
+        // is a subset of star(union(stabilizers)) for the current regex.
+        // Falls back to cycle-based pointer equality check.
+        // returns true if the constraint can be dropped.
+        // Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386).
+        bool try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node);
+    };
+
+}
--- a/src/smt/seq/seq_state.cpp
+++ b/src/smt/seq/seq_state.cpp
@ -0,0 +1,18 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_state.cpp
+
+Abstract:
+
+    Implementation of seq_state.
+
+Author:
+
+    Clemens Eisenhofer 2026-03-01
+    Nikolaj Bjorner (nbjorner) 2026-03-01
+
+--*/
+#include "smt/seq/seq_state.h"
--- a/src/smt/seq/seq_state.h
+++ b/src/smt/seq/seq_state.h
@ -0,0 +1,118 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_state.h
+
+Abstract:
+
+    Constraint store bridging the SMT context to the Nielsen graph.
+    Holds word equations and regex membership constraints with
+    push/pop support for backtracking.
+
+Author:
+
+    Clemens Eisenhofer 2026-03-01
+    Nikolaj Bjorner (nbjorner) 2026-03-01
+
+--*/
+#pragma once
+
+#include "util/vector.h"
+#include "smt/seq/seq_nielsen.h"
+#include "util/sat_literal.h"
+
+namespace smt {
+
+    class enode;
+
+    // source info for a string equality (the two enodes whose merge caused it)
+    struct eq_source {
+        enode* m_n1;
+        enode* m_n2;
+    };
+
+    // source info for a regex membership (the literal that asserted it)
+    struct mem_source {
+        sat::literal m_lit;
+    };
+
+    // source info for a string disequality
+    struct diseq_source {
+        enode* m_n1;
+        enode* m_n2;
+    };
+
+    class seq_state {
+        vector<seq::str_eq>     m_str_eqs;
+        vector<seq::str_mem>    m_str_mems;
+        vector<eq_source>       m_eq_sources;
+        vector<mem_source>      m_mem_sources;
+        vector<diseq_source>    m_diseqs;
+        unsigned_vector         m_str_eq_lim;
+        unsigned_vector         m_str_mem_lim;
+        unsigned_vector         m_diseq_lim;
+        unsigned                m_next_mem_id = 0;
+
+    public:
+        seq_state() = default;
+
+        void push() {
+            m_str_eq_lim.push_back(m_str_eqs.size());
+            m_str_mem_lim.push_back(m_str_mems.size());
+            m_diseq_lim.push_back(m_diseqs.size());
+        }
+
+        void pop(unsigned n) {
+            for (unsigned i = 0; i < n; ++i) {
+                m_str_eqs.shrink(m_str_eq_lim.back());
+                m_eq_sources.shrink(m_str_eq_lim.back());
+                m_str_eq_lim.pop_back();
+                m_str_mems.shrink(m_str_mem_lim.back());
+                m_mem_sources.shrink(m_str_mem_lim.back());
+                m_str_mem_lim.pop_back();
+                m_diseqs.shrink(m_diseq_lim.back());
+                m_diseq_lim.pop_back();
+            }
+        }
+
+        void add_str_eq(euf::snode* lhs, euf::snode* rhs, enode* n1, enode* n2) {
+            seq::dep_tracker dep;
+            m_str_eqs.push_back(seq::str_eq(lhs, rhs, dep));
+            m_eq_sources.push_back({n1, n2});
+        }
+
+        void add_str_mem(euf::snode* str, euf::snode* regex, sat::literal lit) {
+            seq::dep_tracker dep;
+            m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep));
+            m_mem_sources.push_back({lit});
+        }
+
+        void add_diseq(enode* n1, enode* n2) {
+            m_diseqs.push_back({n1, n2});
+        }
+
+        vector<seq::str_eq> const&  str_eqs()  const { return m_str_eqs; }
+        vector<seq::str_mem> const& str_mems() const { return m_str_mems; }
+        vector<diseq_source> const& diseqs()   const { return m_diseqs; }
+
+        eq_source const& get_eq_source(unsigned i) const { return m_eq_sources[i]; }
+        mem_source const& get_mem_source(unsigned i) const { return m_mem_sources[i]; }
+        diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; }
+
+        bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_diseqs.empty(); }
+
+        void reset() {
+            m_str_eqs.reset();
+            m_str_mems.reset();
+            m_eq_sources.reset();
+            m_mem_sources.reset();
+            m_diseqs.reset();
+            m_str_eq_lim.reset();
+            m_str_mem_lim.reset();
+            m_diseq_lim.reset();
+        }
+    };
+
+}