updates

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2026-03-21 04:15:51 +00:00 · 2026-03-13 18:19:25 -07:00 · 2026-03-13 18:19:25 -07:00 · 27f5541b0b
commit 27f5541b0b
parent 8a48caf742
11 changed files with 2176 additions and 80 deletions
--- a/src/smt/nseq_regex.cpp
+++ b/src/smt/nseq_regex.cpp
@ -15,11 +15,294 @@ Author:

 --*/
 #include "smt/nseq_regex.h"
+#include <unordered_set>

 namespace smt {

    // -----------------------------------------------------------------------
-    // Regex emptiness checking (structural analysis)
+    // Stabilizer store
+    // -----------------------------------------------------------------------
+
+    void nseq_regex::reset_stabilizers() {
+        m_stabilizers.reset();
+        m_self_stabilizing.reset();
+    }
+
+    void nseq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) {
+        if (!regex || !stabilizer)
+            return;
+
+        unsigned id = regex->id();
+        auto& stabs = m_stabilizers.insert_if_not_there(id, ptr_vector<euf::snode>());
+
+        // De-duplicate by pointer equality (mirrors ZIPT Environment.AddStabilizer
+        // which checks reference equality before adding).
+        for (euf::snode* s : stabs)
+            if (s == stabilizer)
+                return;
+        stabs.push_back(stabilizer);
+    }
+
+    euf::snode* nseq_regex::get_stabilizer_union(euf::snode* regex) {
+        if (!regex)
+            return nullptr;
+
+        if (!m_stabilizers.contains(regex->id()))
+            return nullptr;
+
+        auto& stabs = m_stabilizers[regex->id()];
+        if (stabs.empty())
+            return nullptr;
+
+        // Single stabilizer: return it directly.
+        if (stabs.size() == 1)
+            return stabs[0];
+
+        // Multiple stabilizers: build re.union chain.
+        // union(s1, union(s2, ... union(sN-1, sN)...))
+        seq_util& seq = m_sg.get_seq_util();
+        euf::snode* result = stabs[stabs.size() - 1];
+        for (unsigned i = stabs.size() - 1; i-- > 0; ) {
+            expr* lhs = stabs[i]->get_expr();
+            expr* rhs = result->get_expr();
+            if (!lhs || !rhs)
+                return nullptr;
+            expr_ref un(seq.re.mk_union(lhs, rhs), m_sg.get_manager());
+            result = m_sg.mk(un);
+        }
+        return result;
+    }
+
+    bool nseq_regex::has_stabilizers(euf::snode* regex) const {
+        if (!regex)
+            return false;
+        if (!m_stabilizers.contains(regex->id()))
+            return false;
+        return !m_stabilizers[regex->id()].empty();
+    }
+
+    ptr_vector<euf::snode> const* nseq_regex::get_stabilizers(euf::snode* regex) const {
+        if (!regex)
+            return nullptr;
+        if (!m_stabilizers.contains(regex->id()))
+            return nullptr;
+        return &m_stabilizers[regex->id()];
+    }
+
+    void nseq_regex::set_self_stabilizing(euf::snode* regex) {
+        if (!regex)
+            return;
+        m_self_stabilizing.insert(regex->id());
+    }
+
+    bool nseq_regex::is_self_stabilizing(euf::snode* regex) const {
+        if (!regex)
+            return false;
+        return m_self_stabilizing.contains(regex->id());
+    }
+
+    // -----------------------------------------------------------------------
+    // Self-stabilizing auto-detection
+    // -----------------------------------------------------------------------
+
+    bool nseq_regex::compute_self_stabilizing(euf::snode* regex) const {
+        if (!regex)
+            return false;
+
+        // R* is always self-stabilizing: D(c, R*) = D(c,R) · R*,
+        // so R* appears as the tail of every derivative and acts as
+        // its own stabilizer.
+        if (regex->is_star())
+            return true;
+
+        // Σ* (full_seq, i.e., re.all / .*) is self-stabilizing:
+        // D(c, Σ*) = Σ* for every character c.
+        if (regex->is_full_seq())
+            return true;
+
+        // ∅ (fail / empty language) is trivially self-stabilizing:
+        // it has no live derivatives, so the flag is vacuously true.
+        if (regex->is_fail())
+            return true;
+
+        // Complement of full_seq is ∅ (complement of Σ*), which is
+        // also trivially self-stabilizing.
+        if (regex->is_complement() && regex->num_args() == 1 &&
+            regex->arg(0)->is_full_seq())
+            return true;
+
+        // Loop with lo=0 and no upper bound behaves like R*
+        // (r{0,} ≡ r*), so it is self-stabilizing.
+        if (regex->is_loop() && regex->is_nullable()) {
+            // A nullable loop with a star-like body: heuristic check.
+            // Only mark as self-stabilizing if the body is a Kleene closure.
+            // Loop(R, 0, ∞) ~ R* — but we rely on the sgraph to normalize
+            // these, so only catch exact star nodes above.
+        }
+
+        return false;
+    }
+
+    // -----------------------------------------------------------------------
+    // Self-stabilizing propagation through derivatives
+    // -----------------------------------------------------------------------
+
+    void nseq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) {
+        if (!parent || !deriv)
+            return;
+
+        // If the derivative is already known to be self-stabilizing (either
+        // inherently or from a prior propagation), nothing to do.
+        if (is_self_stabilizing(deriv))
+            return;
+
+        // If the derivative is itself inherently self-stabilizing
+        // (e.g., it is a star or full_seq), mark it now.
+        if (compute_self_stabilizing(deriv)) {
+            set_self_stabilizing(deriv);
+            return;
+        }
+
+        // Rule 1: Star parent.
+        // D(c, R*) = D(c, R) · R*. The derivative always contains the
+        // R* tail, so it is self-stabilizing regardless of D(c,R).
+        if (parent->is_star()) {
+            set_self_stabilizing(deriv);
+            return;
+        }
+
+        // Rule 2: Full_seq parent.
+        // D(c, Σ*) = Σ*, and Σ* is self-stabilizing.
+        // (The derivative should be Σ* itself; mark it for safety.)
+        if (parent->is_full_seq()) {
+            set_self_stabilizing(deriv);
+            return;
+        }
+
+        // Check if parent is self-stabilizing (either inherently or marked).
+        bool parent_ss = is_self_stabilizing(parent) || compute_self_stabilizing(parent);
+
+        // Rule 3: Concat parent R · S.
+        // D(c, R·S) = D(c,R)·S | (nullable(R) ? D(c,S) : ∅).
+        // If S is self-stabilizing, the D(c,R)·S branch inherits it.
+        // If the whole parent R·S is self-stabilizing, the derivative is too.
+        if (parent->is_concat() && parent->num_args() == 2) {
+            euf::snode* tail = parent->arg(1);
+            bool tail_ss = is_self_stabilizing(tail) || compute_self_stabilizing(tail);
+            if (tail_ss || parent_ss) {
+                set_self_stabilizing(deriv);
+                return;
+            }
+        }
+
+        // Rule 4: Union parent R | S.
+        // D(c, R|S) = D(c,R) | D(c,S).
+        // Self-stabilizing if both children are self-stabilizing.
+        if (parent->is_union() && parent->num_args() == 2) {
+            euf::snode* lhs = parent->arg(0);
+            euf::snode* rhs = parent->arg(1);
+            bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
+            bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
+            if (lhs_ss && rhs_ss) {
+                set_self_stabilizing(deriv);
+                return;
+            }
+        }
+
+        // Rule 5: Intersection parent R ∩ S.
+        // D(c, R∩S) = D(c,R) ∩ D(c,S).
+        // Self-stabilizing if both children are self-stabilizing.
+        if (parent->is_intersect() && parent->num_args() == 2) {
+            euf::snode* lhs = parent->arg(0);
+            euf::snode* rhs = parent->arg(1);
+            bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
+            bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
+            if (lhs_ss && rhs_ss) {
+                set_self_stabilizing(deriv);
+                return;
+            }
+        }
+
+        // Rule 6: Complement parent ~R.
+        // D(c, ~R) = ~D(c, R).
+        // Preserves self-stabilizing from R.
+        if (parent->is_complement() && parent->num_args() == 1) {
+            euf::snode* inner = parent->arg(0);
+            bool inner_ss = is_self_stabilizing(inner) || compute_self_stabilizing(inner);
+            if (inner_ss) {
+                set_self_stabilizing(deriv);
+                return;
+            }
+        }
+
+        // Rule 7: Generic self-stabilizing parent.
+        // If the parent was explicitly marked self-stabilizing (e.g., via
+        // a previous propagation), propagate to the derivative.
+        if (parent_ss) {
+            set_self_stabilizing(deriv);
+            return;
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Derivative with propagation
+    // -----------------------------------------------------------------------
+
+    euf::snode* nseq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) {
+        if (!re || !elem)
+            return nullptr;
+        euf::snode* deriv = derivative(re, elem);
+        if (deriv)
+            propagate_self_stabilizing(re, deriv);
+        return deriv;
+    }
+
+    // -----------------------------------------------------------------------
+    // Uniform derivative (symbolic character consumption)
+    // -----------------------------------------------------------------------
+
+    euf::snode* nseq_regex::try_uniform_derivative(euf::snode* regex) {
+        if (!regex)
+            return nullptr;
+
+        // Quick exits: trivial regexes with known uniform derivatives.
+        // Σ* (full_seq) has derivative Σ* for every character.
+        if (regex->is_full_seq())
+            return regex;
+        // ∅ (fail) has derivative ∅ for every character — but this means
+        // every character is rejected.  Return fail so the caller can
+        // detect a conflict.
+        if (regex->is_fail())
+            return regex;
+
+        // Compute minterms: the character-class partition of the alphabet
+        // induced by the regex.
+        euf::snode_vector minterms;
+        m_sg.compute_minterms(regex, minterms);
+        if (minterms.empty())
+            return nullptr;
+
+        // Compute the derivative for each non-empty minterm.  If all produce
+        // the same result, the derivative is independent of the character
+        // value and we can consume a symbolic character deterministically.
+        euf::snode* uniform = nullptr;
+        for (euf::snode* mt : minterms) {
+            if (!mt || mt->is_fail())
+                continue;  // empty character class — no character belongs to it
+            euf::snode* deriv = m_sg.brzozowski_deriv(regex, mt);
+            if (!deriv)
+                return nullptr;  // derivative computation failed
+            if (!uniform) {
+                uniform = deriv;
+            } else if (uniform->id() != deriv->id()) {
+                return nullptr;  // different derivatives — not uniform
+            }
+        }
+        return uniform;  // may be nullptr if all minterms were fail/empty
+    }
+
+    // -----------------------------------------------------------------------
+    // Ground prefix consumption
    // -----------------------------------------------------------------------

    bool nseq_regex::is_empty_regex(euf::snode* re) const {
@ -68,6 +351,377 @@ namespace smt {
        return false;
    }

+    // -----------------------------------------------------------------------
+    // BFS regex emptiness check — helper: collect character boundaries
+    // -----------------------------------------------------------------------
+
+    void nseq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const {
+        if (!re || !re->get_expr())
+            return;
+
+        seq_util& seq = m_sg.get_seq_util();
+        expr* e = re->get_expr();
+
+        // Range predicate re.range(lo, hi): boundary at lo and hi+1
+        // Range arguments are string expressions (e.g., str.unit(ch))
+        expr* lo_expr = nullptr;
+        expr* hi_expr = nullptr;
+        if (seq.re.is_range(e, lo_expr, hi_expr)) {
+            zstring s_lo, s_hi;
+            if (lo_expr && seq.str.is_string(lo_expr, s_lo) && s_lo.length() == 1)
+                bounds.push_back(s_lo[0]);
+            if (hi_expr && seq.str.is_string(hi_expr, s_hi) && s_hi.length() == 1 && s_hi[0] < zstring::max_char())
+                bounds.push_back(s_hi[0] + 1);
+            return;
+        }
+
+        // to_re(s): boundary at first character and first+1
+        expr* body = nullptr;
+        if (seq.re.is_to_re(e, body)) {
+            zstring s;
+            if (seq.str.is_string(body, s) && s.length() > 0) {
+                unsigned first_ch = s[0];
+                bounds.push_back(first_ch);
+                if (first_ch < zstring::max_char())
+                    bounds.push_back(first_ch + 1);
+            }
+            return;
+        }
+
+        // Leaf nodes with no character discrimination
+        if (re->is_fail() || re->is_full_char() || re->is_full_seq())
+            return;
+
+        // Recurse into children (handles union, concat, star, loop, etc.)
+        for (unsigned i = 0; i < re->num_args(); ++i)
+            collect_char_boundaries(re->arg(i), bounds);
+    }
+
+    // -----------------------------------------------------------------------
+    // BFS regex emptiness check — helper: alphabet representatives
+    // -----------------------------------------------------------------------
+
+    void nseq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) {
+        unsigned_vector bounds;
+        bounds.push_back(0); // always include character 0
+        collect_char_boundaries(re, bounds);
+
+        // Sort and deduplicate
+        std::sort(bounds.begin(), bounds.end());
+        unsigned prev = UINT_MAX;
+        for (unsigned b : bounds) {
+            if (b != prev) {
+                reps.push_back(m_sg.mk_char(b));
+                prev = b;
+            }
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // BFS regex emptiness check
+    // -----------------------------------------------------------------------
+
+    lbool nseq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
+        if (!re || !re->get_expr())
+            return l_undef;
+        if (re->is_fail())
+            return l_true;
+        if (re->is_nullable())
+            return l_false;
+        // Structural quick checks for kinds that are never empty
+        if (re->is_star() || re->is_full_char() || re->is_full_seq() || re->is_to_re())
+            return l_false;
+        // Structural emptiness catches simple cases
+        if (is_empty_regex(re))
+            return l_true;
+        // Only handle ground regexes; non-ground can't be fully explored
+        if (!re->is_ground())
+            return l_undef;
+
+        // BFS over the Brzozowski derivative automaton.
+        // Each state is a derivative regex snode identified by its id.
+        // We explore states by computing derivatives for representative
+        // characters from the alphabet partition.
+        uint_set visited;
+        euf::snode_vector worklist;
+        worklist.push_back(re);
+        visited.insert(re->id());
+
+        unsigned states_explored = 0;
+        bool had_failed_deriv = false;
+
+        while (!worklist.empty()) {
+            if (states_explored >= max_states)
+                return l_undef;
+
+            euf::snode* current = worklist.back();
+            worklist.pop_back();
+            ++states_explored;
+
+            // Compute representative characters for current state's
+            // alphabet partition. Each representative is a concrete
+            // character snode whose equivalence class has identical
+            // derivative behavior.
+            euf::snode_vector reps;
+            get_alphabet_representatives(current, reps);
+
+            if (reps.empty()) {
+                // No representatives means no character predicates;
+                // use a default character to explore the single partition.
+                reps.push_back(m_sg.mk_char('a'));
+            }
+
+            for (euf::snode* ch : reps) {
+                euf::snode* deriv = m_sg.brzozowski_deriv(current, ch);
+                if (!deriv) {
+                    // Derivative computation failed for this character.
+                    // Track the failure but continue with other characters.
+                    had_failed_deriv = true;
+                    continue;
+                }
+                if (deriv->is_nullable())
+                    return l_false; // found an accepting state
+                if (deriv->is_fail())
+                    continue; // dead-end, no need to explore further
+                if (is_empty_regex(deriv))
+                    continue; // structurally empty subtree
+                if (!visited.contains(deriv->id())) {
+                    visited.insert(deriv->id());
+                    worklist.push_back(deriv);
+                }
+            }
+        }
+
+        // Exhausted all reachable states without finding a nullable one.
+        // If we had any failed derivative computations, the result is
+        // inconclusive since we may have missed reachable states.
+        if (had_failed_deriv)
+            return l_undef;
+
+        return l_true;
+    }
+
+    // -----------------------------------------------------------------------
+    // Multi-regex intersection emptiness check
+    // BFS over the product of Brzozowski derivative automata.
+    // Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
+    // -----------------------------------------------------------------------
+
+    lbool nseq_regex::check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
+                                                    unsigned max_states) {
+        if (regexes.empty())
+            return l_false; // empty intersection = full language (vacuously non-empty)
+
+        // Quick checks: if any regex is fail/empty, intersection is empty
+        for (euf::snode* re : regexes) {
+            if (!re || !re->get_expr())
+                return l_undef;
+            if (re->is_fail() || is_empty_regex(re))
+                return l_true;
+        }
+
+        // Check if all are nullable (intersection accepts ε)
+        bool all_nullable = true;
+        for (euf::snode* re : regexes) {
+            if (!re->is_nullable()) { all_nullable = false; break; }
+        }
+        if (all_nullable)
+            return l_false;
+
+        // Single regex: delegate to is_empty_bfs
+        if (regexes.size() == 1)
+            return is_empty_bfs(regexes[0], max_states);
+
+        // Build product BFS. State = tuple of regex snode ids.
+        // Use a map from state hash to visited set.
+        using state_t = svector<unsigned>;
+
+        auto state_hash = [](state_t const& s) -> unsigned {
+            unsigned h = 0;
+            for (unsigned id : s)
+                h = h * 31 + id;
+            return h;
+        };
+
+        auto state_eq = [](state_t const& a, state_t const& b) -> bool {
+            if (a.size() != b.size()) return false;
+            for (unsigned i = 0; i < a.size(); ++i)
+                if (a[i] != b[i]) return false;
+            return true;
+        };
+
+        // Use simple set via sorted vector of hashes (good enough for bounded BFS)
+        std::unordered_set<unsigned> visited_hashes;
+
+        struct bfs_state {
+            ptr_vector<euf::snode> regexes;
+        };
+
+        std::vector<bfs_state> worklist;
+        bfs_state initial;
+        initial.regexes.append(regexes);
+        worklist.push_back(std::move(initial));
+
+        state_t init_ids;
+        for (euf::snode* re : regexes)
+            init_ids.push_back(re->id());
+        visited_hashes.insert(state_hash(init_ids));
+
+        unsigned states_explored = 0;
+        bool had_failed = false;
+
+        // Collect alphabet representatives from the intersection of all regexes
+        // (merge boundaries from all)
+        unsigned_vector all_bounds;
+        all_bounds.push_back(0);
+        for (euf::snode* re : regexes)
+            collect_char_boundaries(re, all_bounds);
+        std::sort(all_bounds.begin(), all_bounds.end());
+
+        euf::snode_vector reps;
+        unsigned prev = UINT_MAX;
+        for (unsigned b : all_bounds) {
+            if (b != prev) {
+                reps.push_back(m_sg.mk_char(b));
+                prev = b;
+            }
+        }
+        if (reps.empty())
+            reps.push_back(m_sg.mk_char('a'));
+
+        while (!worklist.empty()) {
+            if (states_explored >= max_states)
+                return l_undef;
+
+            bfs_state current = std::move(worklist.back());
+            worklist.pop_back();
+            ++states_explored;
+
+            for (euf::snode* ch : reps) {
+                ptr_vector<euf::snode> derivs;
+                bool any_fail = false;
+                bool all_null = true;
+                bool deriv_failed = false;
+
+                for (euf::snode* re : current.regexes) {
+                    euf::snode* d = m_sg.brzozowski_deriv(re, ch);
+                    if (!d) { deriv_failed = true; break; }
+                    if (d->is_fail()) { any_fail = true; break; }
+                    if (!d->is_nullable()) all_null = false;
+                    derivs.push_back(d);
+                }
+
+                if (deriv_failed) { had_failed = true; continue; }
+                if (any_fail) continue; // this character leads to empty intersection
+
+                if (all_null)
+                    return l_false; // found an accepting state in the product
+
+                // Check if any component is structurally empty
+                bool any_empty = false;
+                for (euf::snode* d : derivs) {
+                    if (is_empty_regex(d)) { any_empty = true; break; }
+                }
+                if (any_empty) continue;
+
+                // Compute state hash and check visited
+                state_t ids;
+                for (euf::snode* d : derivs)
+                    ids.push_back(d->id());
+                unsigned h = state_hash(ids);
+                if (visited_hashes.count(h) == 0) {
+                    visited_hashes.insert(h);
+                    bfs_state next;
+                    next.regexes.append(derivs);
+                    worklist.push_back(std::move(next));
+                }
+            }
+        }
+
+        if (had_failed)
+            return l_undef;
+        return l_true; // exhausted all states, intersection is empty
+    }
+
+    // -----------------------------------------------------------------------
+    // Language subset check: L(A) ⊆ L(B)
+    // via intersection(A, complement(B)) = ∅
+    // Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
+    // -----------------------------------------------------------------------
+
+    lbool nseq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) {
+        if (!subset_re || !superset_re)
+            return l_undef;
+
+        // Quick checks
+        if (subset_re->is_fail() || is_empty_regex(subset_re))
+            return l_true;  // ∅ ⊆ anything
+        if (superset_re->is_full_seq())
+            return l_true;  // anything ⊆ Σ*
+        if (subset_re == superset_re)
+            return l_true;  // L ⊆ L
+
+        // Build complement(superset)
+        seq_util& seq = m_sg.get_seq_util();
+        ast_manager& mgr = m_sg.get_manager();
+        expr* sup_expr = superset_re->get_expr();
+        if (!sup_expr)
+            return l_undef;
+        expr_ref comp(seq.re.mk_complement(sup_expr), mgr);
+        euf::snode* comp_sn = m_sg.mk(comp);
+        if (!comp_sn)
+            return l_undef;
+
+        // Build intersection and check emptiness
+        // subset ∩ complement(superset) should be empty for subset relation
+        expr* sub_expr = subset_re->get_expr();
+        if (!sub_expr)
+            return l_undef;
+        expr_ref inter(seq.re.mk_inter(sub_expr, comp.get()), mgr);
+        euf::snode* inter_sn = m_sg.mk(inter);
+        if (!inter_sn)
+            return l_undef;
+
+        return is_empty_bfs(inter_sn);
+    }
+
+    // -----------------------------------------------------------------------
+    // Collect primitive regex intersection for a variable
+    // -----------------------------------------------------------------------
+
+    euf::snode* nseq_regex::collect_primitive_regex_intersection(
+            euf::snode* var, seq::nielsen_node const& node) {
+        if (!var)
+            return nullptr;
+
+        seq_util& seq = m_sg.get_seq_util();
+        ast_manager& mgr = m_sg.get_manager();
+        euf::snode* result = nullptr;
+
+        for (auto const& mem : node.str_mems()) {
+            if (!mem.m_str || !mem.m_regex)
+                continue;
+            // Primitive constraint: str is a single variable
+            if (!mem.is_primitive())
+                continue;
+            euf::snode* first = mem.m_str->first();
+            if (!first || first != var)
+                continue;
+
+            if (!result) {
+                result = mem.m_regex;
+            } else {
+                expr* r1 = result->get_expr();
+                expr* r2 = mem.m_regex->get_expr();
+                if (r1 && r2) {
+                    expr_ref inter(seq.re.mk_inter(r1, r2), mgr);
+                    result = m_sg.mk(inter);
+                }
+            }
+        }
+        return result;
+    }
+
    // -----------------------------------------------------------------------
    // Cycle detection
    // -----------------------------------------------------------------------
@ -88,11 +742,14 @@ namespace smt {
            euf::snode* first = mem.m_str->first();
            if (!first || !first->is_char())
                break;
-            euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, first);
+            euf::snode* parent_re = mem.m_regex;
+            euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, first);
            if (!deriv)
                break;
            if (deriv->is_fail())
                return simplify_status::conflict;
+            // propagate self-stabilizing flag from parent to derivative
+            propagate_self_stabilizing(parent_re, deriv);
            mem.m_str = m_sg.drop_first(mem.m_str);
            mem.m_regex = deriv;
        }
@ -361,13 +1018,6 @@ namespace smt {
        if (!cycle_regex || !current_regex)
            return nullptr;

-        // The stabilizer is the Kleene star of the "cycle body" regex.
-        // If the cycle regex and current regex are the same (pointer equal),
-        // the stabilizer is cycle_regex* (Kleene star).
-        // This mirrors ZIPT's StabilizerFromCycle which extracts the
-        // regex between the cycle entry and current point and wraps it in *.
-
-        // Build cycle_regex* via the sgraph's expression factory
        expr* re_expr = cycle_regex->get_expr();
        if (!re_expr)
            return nullptr;
@ -378,31 +1028,253 @@ namespace smt {
    }

    // -----------------------------------------------------------------------
-    // Stabilizer-based subsumption
+    // Extract cycle history tokens
    // -----------------------------------------------------------------------

-    bool nseq_regex::try_subsume(seq::str_mem const& mem) {
-        // Check if the derivation history exhibits a cycle, and if so,
-        // whether the current regex is subsumed by the stabilizer.
-        euf::snode* cycle = extract_cycle(mem);
-        if (!cycle)
+    euf::snode* nseq_regex::extract_cycle_history(seq::str_mem const& current,
+                                                   seq::str_mem const& ancestor) {
+        // The history is built by simplify_and_init as a left-associative
+        // string concat chain: concat(concat(concat(nil, c1), c2), c3).
+        // Extract the tokens consumed since the ancestor.
+        if (!current.m_history)
+            return nullptr;
+
+        unsigned cur_len = current.m_history->length();
+        unsigned anc_len = ancestor.m_history ? ancestor.m_history->length() : 0;
+
+        if (cur_len <= anc_len)
+            return nullptr;
+
+        if (anc_len == 0)
+            return current.m_history;
+
+        return m_sg.drop_left(current.m_history, anc_len);
+    }
+
+    // -----------------------------------------------------------------------
+    // Get filtered stabilizer star
+    // Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243)
+    // -----------------------------------------------------------------------
+
+    euf::snode* nseq_regex::get_filtered_stabilizer_star(euf::snode* re,
+                                                          euf::snode* excluded_char) {
+        if (!re)
+            return nullptr;
+
+        ptr_vector<euf::snode> const* stabs = get_stabilizers(re);
+        if (!stabs || stabs->empty())
+            return nullptr;
+
+        seq_util& seq = m_sg.get_seq_util();
+        ast_manager& m = m_sg.get_manager();
+        euf::snode* filtered_union = nullptr;
+
+        for (euf::snode* s : *stabs) {
+            if (!s)
+                continue;
+            // Keep only stabilizers whose language cannot start with excluded_char
+            euf::snode* d = m_sg.brzozowski_deriv(s, excluded_char);
+            if (d && d->is_fail()) {
+                if (!filtered_union) {
+                    filtered_union = s;
+                } else {
+                    expr* e1 = filtered_union->get_expr();
+                    expr* e2 = s->get_expr();
+                    if (e1 && e2) {
+                        expr_ref u(seq.re.mk_union(e1, e2), m);
+                        filtered_union = m_sg.mk(u);
+                    }
+                }
+            }
+        }
+
+        if (!filtered_union)
+            return nullptr;
+
+        expr* fe = filtered_union->get_expr();
+        if (!fe)
+            return nullptr;
+        expr_ref star_expr(seq.re.mk_star(fe), m);
+        return m_sg.mk(star_expr);
+    }
+
+    // -----------------------------------------------------------------------
+    // Strengthened stabilizer construction with sub-cycle detection
+    // Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225)
+    // -----------------------------------------------------------------------
+
+    euf::snode* nseq_regex::strengthened_stabilizer(euf::snode* cycle_regex,
+                                                     euf::snode* cycle_history) {
+        if (!cycle_regex || !cycle_history)
+            return nullptr;
+
+        // Flatten the history concat chain into a vector of character tokens.
+        euf::snode_vector tokens;
+        cycle_history->collect_tokens(tokens);
+
+        if (tokens.empty())
+            return nullptr;
+
+        seq_util& seq = m_sg.get_seq_util();
+        ast_manager& m = m_sg.get_manager();
+
+        // Replay tokens on the cycle regex, detecting sub-cycles.
+        // A sub-cycle is detected when the derivative returns to cycle_regex.
+        svector<std::pair<unsigned, unsigned>> sub_cycles;
+        unsigned cycle_start = 0;
+        euf::snode* current_re = cycle_regex;
+
+        for (unsigned i = 0; i < tokens.size(); ++i) {
+            euf::snode* tok = tokens[i];
+            if (!tok)
+                return nullptr;
+
+            euf::snode* deriv = m_sg.brzozowski_deriv(current_re, tok);
+            if (!deriv)
+                return nullptr;
+
+            // Sub-cycle: derivative returned to the cycle entry regex
+            if (deriv == cycle_regex ||
+                (deriv->get_expr() && cycle_regex->get_expr() &&
+                 deriv->get_expr() == cycle_regex->get_expr())) {
+                sub_cycles.push_back(std::make_pair(cycle_start, i + 1));
+                cycle_start = i + 1;
+                current_re = cycle_regex;
+            } else {
+                current_re = deriv;
+            }
+        }
+
+        // Remaining tokens that don't complete a sub-cycle
+        if (cycle_start < tokens.size())
+            sub_cycles.push_back(std::make_pair(cycle_start, tokens.size()));
+
+        if (sub_cycles.empty())
+            return nullptr;
+
+        // Build a stabilizer body for each sub-cycle.
+        // body = to_re(t0) · [filteredStar(R1, t1)] · to_re(t1) · ... · to_re(t_{n-1})
+        euf::snode* overall_union = nullptr;
+
+        for (auto const& sc : sub_cycles) {
+            unsigned start = sc.first;
+            unsigned end = sc.second;
+            if (start >= end)
+                continue;
+
+            euf::snode* re_state = cycle_regex;
+            euf::snode* body = nullptr;
+
+            for (unsigned i = start; i < end; ++i) {
+                euf::snode* tok = tokens[i];
+                if (!tok)
+                    break;
+
+                // Insert filtered stabilizer star before each token after the first
+                if (i > start) {
+                    euf::snode* filtered = get_filtered_stabilizer_star(re_state, tok);
+                    if (filtered) {
+                        expr* fe = filtered->get_expr();
+                        if (fe) {
+                            if (!body) {
+                                body = filtered;
+                            } else {
+                                expr* be = body->get_expr();
+                                if (be) {
+                                    expr_ref cat(seq.re.mk_concat(be, fe), m);
+                                    body = m_sg.mk(cat);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // Convert char token to regex: to_re(unit(tok))
+                expr* tok_expr = tok->get_expr();
+                if (!tok_expr)
+                    break;
+
+                expr_ref unit_str(seq.str.mk_unit(tok_expr), m);
+                expr_ref tok_re(seq.re.mk_to_re(unit_str), m);
+                euf::snode* tok_re_sn = m_sg.mk(tok_re);
+
+                if (!body) {
+                    body = tok_re_sn;
+                } else {
+                    expr* be = body->get_expr();
+                    expr* te = tok_re_sn->get_expr();
+                    if (be && te) {
+                        expr_ref cat(seq.re.mk_concat(be, te), m);
+                        body = m_sg.mk(cat);
+                    }
+                }
+
+                // Advance the regex state
+                euf::snode* deriv = m_sg.brzozowski_deriv(re_state, tok);
+                if (!deriv)
+                    break;
+                re_state = deriv;
+            }
+
+            if (!body)
+                continue;
+
+            if (!overall_union) {
+                overall_union = body;
+            } else {
+                expr* oe = overall_union->get_expr();
+                expr* be = body->get_expr();
+                if (oe && be) {
+                    expr_ref u(seq.re.mk_union(oe, be), m);
+                    overall_union = m_sg.mk(u);
+                }
+            }
+        }
+
+        return overall_union;
+    }
+
+    // -----------------------------------------------------------------------
+    // Stabilizer-based subsumption (enhanced)
+    // Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386)
+    // -----------------------------------------------------------------------
+
+    bool nseq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) {
+        if (!mem.m_str || !mem.m_regex)
            return false;

-        euf::snode* stab = stabilizer_from_cycle(cycle, mem.m_regex);
-        if (!stab)
+        // 1. Leading token must be a variable
+        euf::snode* first = mem.m_str->first();
+        if (!first || !first->is_var())
            return false;

-        // A constraint x ∈ R is subsumed when R ⊆ stab.
-        // For the simple case where cycle == current regex,
-        // R ⊆ R* is always true (since R* accepts everything R does, and more).
-        // This handles the common idempotent cycle case.
-        if (cycle == mem.m_regex)
-            return true;
+        // 2. Must have stabilizers for the regex
+        if (!has_stabilizers(mem.m_regex))
+            return false;

-        // More sophisticated subsumption checks (regex containment)
-        // would require a regex inclusion decision procedure.
-        // For now, only handle the pointer-equality case.
-        return false;
+        // 3. Build stabStar = star(union(all stabilizers for this regex))
+        euf::snode* stab_union = get_stabilizer_union(mem.m_regex);
+        if (!stab_union)
+            return false;
+
+        seq_util& seq = m_sg.get_seq_util();
+        ast_manager& mgr = m_sg.get_manager();
+        expr* su_expr = stab_union->get_expr();
+        if (!su_expr)
+            return false;
+        expr_ref stab_star(seq.re.mk_star(su_expr), mgr);
+        euf::snode* stab_star_sn = m_sg.mk(stab_star);
+        if (!stab_star_sn)
+            return false;
+
+        // 4. Collect all primitive regex constraints on variable `first`
+        euf::snode* x_range = collect_primitive_regex_intersection(first, node);
+        if (!x_range)
+            return false;
+
+        // 5. Check L(x_range) ⊆ L(stab_star)
+        lbool result = is_language_subset(x_range, stab_star_sn);
+        return result == l_true;
    }

 }