Derive with ranges (#9963)

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Margus Veanes <margus@microsoft.com> Co-authored-by: Margus Veanes <veanes@users.noreply.github.com>
2026-07-21 06:25:50 +00:00 · 2026-06-25 18:47:25 -07:00 · 2026-06-25 18:47:25 -07:00 · 22c2635786
commit 22c2635786
parent 0596c7c634
24 changed files with 3462 additions and 1000 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,9 +1,12 @@
 *~
 rebase.cmd
+reports/
+crashes/
 *.pyc
 *.pyo
 # Ignore callgrind files
 callgrind.out.*
+gmon.out
 # .hpp files are automatically generated
 *.hpp
 .env
--- a/gmon.out
+++ b/gmon.out
--- a/src/ast/rewriter/CMakeLists.txt
+++ b/src/ast/rewriter/CMakeLists.txt
@ -39,7 +39,11 @@ z3_add_component(rewriter
    rewriter.cpp
    seq_axioms.cpp
    seq_eq_solver.cpp
+    seq_derive.cpp
    seq_subset.cpp
+    seq_derive.cpp
+    seq_range_collapse.cpp
+    seq_range_predicate.cpp
    seq_rewriter.cpp
    seq_regex_bisim.cpp
    seq_skolem.cpp
--- a/src/ast/rewriter/bool_rewriter.cpp
+++ b/src/ast/rewriter/bool_rewriter.cpp
@ -19,6 +19,7 @@ Notes:
 #include "ast/rewriter/bool_rewriter.h"
 #include "params/bool_rewriter_params.hpp"
 #include "ast/rewriter/rewriter_def.h"
+#include "ast/rewriter/expr_safe_replace.h"
 #include "ast/ast_lt.h"
 #include "ast/for_each_expr.h"
 #include <algorithm>
@ -1185,4 +1186,30 @@ void bool_rewriter::mk_ge2(expr* a, expr* b, expr* c, expr_ref& r) {
 }


-template class rewriter_tpl<bool_rewriter_cfg>;
+bool bool_rewriter::decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el) {
+    expr *cond = nullptr, *r1 = nullptr, *r2 = nullptr;
+    if (m().is_ite(r, cond, r1, r2)) {
+        c = cond;
+        th = r1;
+        el = r2;
+        return true;
+    }
+    for (expr *e : subterms::ground(expr_ref(r, m()))) {
+        if (m().is_ite(e, cond, r1, r2)) {
+            m_rep1.reset();
+            m_rep2.reset();
+            m_rep1.insert(e, r1);
+            m_rep2.insert(e, r2);
+            c = cond;
+            th = r;
+            el = r;
+            m_rep1(th);
+            m_rep2(el);
+            return true;
+        }
+    }
+    return false;
+}
+
+
+template class rewriter_tpl<bool_rewriter_cfg>;
--- a/src/ast/rewriter/bool_rewriter.h
+++ b/src/ast/rewriter/bool_rewriter.h
@ -20,6 +20,7 @@ Notes:

 #include "ast/ast.h"
 #include "ast/rewriter/rewriter.h"
+#include "ast/rewriter/expr_safe_replace.h"
 #include "util/params.h"

 /**
@ -64,6 +65,7 @@ class bool_rewriter {
    ptr_vector<expr> m_todo1, m_todo2;
    unsigned_vector m_counts1, m_counts2;
    expr_mark       m_marked;
+    expr_safe_replace m_rep1, m_rep2;

    br_status mk_flat_and_core(unsigned num_args, expr * const * args, expr_ref & result);
    br_status mk_flat_or_core(unsigned num_args, expr * const * args, expr_ref & result);
@ -87,7 +89,7 @@ class bool_rewriter {
    expr_ref simplify_eq_ite(expr* value, expr* ite);

 public:
-    bool_rewriter(ast_manager & m, params_ref const & p = params_ref()):m_manager(m), m_local_ctx_cost(0) { 
+    bool_rewriter(ast_manager & m, params_ref const & p = params_ref()):m_manager(m), m_local_ctx_cost(0), m_rep1(m), m_rep2(m) { 
        updt_params(p); 
    }
    ast_manager & m() const { return m_manager; }
@ -242,6 +244,11 @@ public:
    void mk_nand(expr * arg1, expr * arg2, expr_ref & result);
    void mk_nor(expr * arg1, expr * arg2, expr_ref & result);
    void mk_ge2(expr* a, expr* b, expr* c, expr_ref& result);
+
+    // If r is, or contains, an if-then-else, decompose it into a top-level
+    // ite by hoisting the (first) inner ite condition: returns c, th, el such
+    // that r is equivalent to (ite c th el). Returns false if r has no ite.
+    bool decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el);
 };

 struct bool_rewriter_cfg : public default_rewriter_cfg {
--- a/src/ast/rewriter/seq_derive.cpp
+++ b/src/ast/rewriter/seq_derive.cpp
--- a/src/ast/rewriter/seq_derive.h
+++ b/src/ast/rewriter/seq_derive.h
@ -0,0 +1,265 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_derive.h
+
+Abstract:
+
+    Symbolic derivative computation for regular expressions.
+    Produces an ITE-tree (transition regex) representation where
+    the free variable is de Bruijn index 0 representing the input character.
+
+    Based on the theory of symbolic derivatives and transition regexes:
+    - Veanes et al., "On Symbolic Derivatives and Transition Regexes" (LPAR 2024)
+    - Varatalu, Veanes, Ernits, "RE#" (POPL 2025)
+    - Stanford, Veanes, Bjørner, "Symbolic Boolean Derivatives" (PLDI 2021)
+
+Authors:
+
+    Nikolaj Bjorner (nbjorner) 2025-06-03
+
+--*/
+
+#pragma once
+
+#include "ast/seq_decl_plugin.h"
+#include "ast/arith_decl_plugin.h"
+#include "ast/array_decl_plugin.h"
+#include "ast/rewriter/bool_rewriter.h"
+#include "util/obj_pair_hashtable.h"
+#include "util/obj_triple_hashtable.h"
+#include <functional>
+
+class seq_rewriter;
+
+namespace seq {
+
+    enum class derivative_kind { antimirov_t, brzozowski_t };
+    /**
+     * Symbolic derivative engine for regular expressions.
+     *
+     * Given a regex r, operator()(r) computes a symbolic derivative δ(r)
+     * represented as an ITE-tree over character predicates (using de Bruijn
+     * variable 0 for the character). Evaluating the ITE-tree for a concrete
+     * character 'a' yields the classical Brzozowski derivative δ_a(r).
+     *
+     * The ITE-tree structure implicitly defines minterms (equivalence classes
+     * of characters indistinguishable by the regex).
+     *
+     * Key properties:
+     * - Results are memoized for termination on cyclic derivative graphs
+     * - Union/intersection operands are sorted for ACI canonicalization
+     * - Depth-bounded to prevent stack overflow
+     */
+    class derive {
+        ast_manager&    m;
+        seq_util        m_util;
+        arith_util      m_autil;
+        bool_rewriter   m_br;
+        seq_rewriter&   m_re;
+
+        // Cache: maps (ele, regex) pair to its derivative
+        obj_pair_map<expr, expr, expr*> m_acache, m_bcache;
+        obj_pair_map<expr, expr, expr*> m_atop_cache, m_btop_cache; // post-simplify cache
+        expr_ref_vector      m_trail;    // pin cached results
+
+        // Op cache for ITE-hoisting operations (union, inter, concat, complement)
+        // Path-aware caches: key is (a, b, path_expr) for binary ops, (a, path_expr) for complement
+        obj_triple_map<expr, expr, expr, expr *> m_aunion_cache, m_bunion_cache, m_ainter_cache, m_binter_cache, m_axor_cache, m_bxor_cache;
+        obj_pair_map<expr, expr, expr*> m_aconcat_cache, m_bconcat_cache;
+        obj_pair_map<expr, expr, expr*> m_acomplement_cache, m_bcomplement_cache;
+
+        // Depth limiting
+        unsigned m_depth { 0 };
+        static const unsigned m_max_depth = 512;
+
+        seq_util::rex& re() { return m_util.re; }
+        seq_util& u() { return m_util; }
+
+        derivative_kind m_derivative_kind = derivative_kind::antimirov_t;
+
+        // The element (character) for the current derivative computation
+        expr_ref m_ele;
+
+        // Path state for inline pruning during mk_inter/mk_union/mk_complement
+        using intervals_t = svector<std::pair<unsigned, unsigned>>;
+
+        // Path: vector of signed atoms
+        svector<std::pair<expr*, bool>> m_path;
+        // Intervals: feasible character ranges under current path (append-only)
+        intervals_t m_intervals;
+        unsigned m_intervals_start { 0 };
+        // Stack of saved states for push/pop
+        struct path_save { unsigned path_sz; unsigned intervals_sz; unsigned intervals_start; expr* path_expr; };
+        svector<path_save> m_path_stack;
+        // Boolean expression encoding of current path (for cache keys)
+        expr_ref m_path_expr;
+
+        // Path interface
+        lbool push(expr* c, bool sign);  // l_true: implied, l_undef: pushed (must pop), l_false: contradicts
+        void  pop();                      // restore state to matching push
+        expr* get_path_expr() { return m_path_expr; }
+
+        obj_pair_map<expr, expr, expr *> &cache() {
+            return m_derivative_kind == derivative_kind::antimirov_t ? m_acache : m_bcache;
+        }
+
+        obj_pair_map<expr, expr, expr *> &top_cache() {
+            return m_derivative_kind == derivative_kind::antimirov_t ? m_atop_cache : m_btop_cache;
+        }
+
+        obj_triple_map<expr, expr, expr, expr *> &union_cache() {
+            return m_derivative_kind == derivative_kind::antimirov_t ? m_aunion_cache : m_bunion_cache;
+        }
+
+        obj_triple_map<expr, expr, expr, expr *> &inter_cache() {
+            return m_derivative_kind == derivative_kind::antimirov_t ? m_ainter_cache : m_binter_cache;
+        }
+
+        obj_triple_map<expr, expr, expr, expr *> &xor_cache() {
+            return m_derivative_kind == derivative_kind::antimirov_t ? m_axor_cache : m_bxor_cache;
+        }
+
+        obj_pair_map<expr, expr, expr *> &concat_cache() {
+            return m_derivative_kind == derivative_kind::antimirov_t ? m_aconcat_cache : m_bconcat_cache;
+        }
+
+        obj_pair_map<expr, expr, expr *> &complement_cache() {
+            return m_derivative_kind == derivative_kind::antimirov_t ? m_acomplement_cache : m_bcomplement_cache;
+        }
+
+        // Hoist ITE: apply_op through ite(c, t, e) with path pruning
+        expr_ref apply_ite(expr* c, expr* t, expr* e, expr* r, std::function<expr_ref(expr*, expr*)> apply_op);
+        expr_ref apply_ite(expr* c, expr* t1, expr* e1, expr* t2, expr* e2, std::function<expr_ref(expr*, expr*)> apply_op);
+        expr_ref apply_ite(expr* c, expr* t, expr* e, std::function<expr_ref(expr*)> apply_op);
+        // Common ITE dispatch for binary ops (union/inter)
+        expr_ref hoist_ite(expr* a, expr* b, std::function<expr_ref(expr*, expr*)> apply_op);
+
+        // Evaluate a condition against the current path/intervals
+        lbool eval_path_cond(expr* c);
+
+        // Internal helpers for push
+        lbool push_path_atoms(expr* c, bool sign);
+        lbool push_intervals_impl(expr* c, bool sign);
+
+        // Core derivative computation
+        expr_ref derive_rec(expr* r);
+        expr_ref derive_core(expr* r);
+
+        // Helpers for specific regex constructs
+        expr_ref derive_to_re(expr* s, sort* seq_sort);
+        expr_ref derive_range(expr* lo, expr* hi, sort* seq_sort);
+        expr_ref derive_of_pred(expr* pred, sort* seq_sort);
+
+        // Nullable check: returns a Boolean expression
+        expr_ref is_nullable(expr* r);
+        expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort);
+
+        // Smart constructors with path-aware simplification and ACI canonicalization
+        expr_ref mk_union(expr* a, expr* b);
+        bool are_complements(expr* a, expr* b);
+        unsigned union_id(expr* e);              // complement-aware ID for sorting
+        bool is_subset(expr* a, expr* b);
+        expr_ref mk_union_core(expr* a, expr* b); 
+        expr_ref mk_inter(expr* a, expr* b);
+        expr_ref mk_inter_core(expr* a, expr* b); 
+        expr_ref mk_concat(expr* a, expr* b);
+        expr_ref mk_complement(expr* a);
+        expr_ref mk_complement_core(expr* a);
+        expr_ref mk_xor(expr *a, expr *b);
+        expr_ref mk_xor_core(expr *a, expr *b);
+        expr_ref mk_core(decl_kind k, expr* a, expr* b);
+        expr_ref mk_ite(expr* c, expr* t, expr* e);
+
+        // Distribute concatenation through ITE/union in derivative
+        expr_ref mk_deriv_concat(expr* d, expr* tail);
+        expr_ref mk_deriv_concat_core(expr* d, expr* tail);
+
+        // Extract head character and tail from a sequence expression
+        bool get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl);
+
+        // Predicate implication for character range conditions.
+        bool pred_implies(bool sign_a, expr* a, bool sign_b, expr* b);
+        bool pred_implies(expr* a, expr* b);
+
+        // Normalize reverse(r)
+        expr_ref mk_regex_reverse(expr* r);
+
+        // Condition evaluation helpers
+        lbool eval_cond(expr* cond);
+        lbool eval_range_cond(expr* c);
+        void intersect_intervals(unsigned lo, unsigned hi);
+        void exclude_interval(unsigned lo, unsigned hi);
+
+        // Cofactor enumeration over a transition regex (ITE-tree).
+        void get_cofactors_rec(expr* r, expr_ref_pair_vector& result);
+
+        // Re-apply union/intersection simplifications bottom-up to a cofactor
+        // leaf.  decompose_ite substitutes ITE branch values structurally
+        // (no simplification), so leaves can contain un-normalized nodes such
+        // as union(R, none) or inter(R, none); this rebuilds them through
+        // mk_union/mk_inter so equal states share a canonical form.
+        expr_ref clean_leaf(expr* r);
+
+        sort* re_sort(expr* r) { return r->get_sort(); }
+        sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; }
+        sort* ele_sort(expr* r) { sort* s = seq_sort(r); sort* e = nullptr; m_util.is_seq(s, e); return e; }
+
+        void reset();
+        void reset_op_caches();
+
+    public:
+        derive(ast_manager& m, seq_rewriter& re);
+
+        /**
+         * Compute the derivative of regex r with respect to element ele.
+         * When ele is a de Bruijn variable, produces a symbolic ITE-tree.
+         * When ele is a concrete character, produces the concrete derivative.
+         */
+        expr_ref operator()(derivative_kind k, expr* ele, expr* r);
+
+        /**
+         * Convenience: symbolic derivative using de Bruijn var 0.
+         */
+        expr_ref operator()(derivative_kind k, expr* r);
+
+        /**
+         * Nullable check: returns a Boolean expression that is true iff r accepts the empty string.
+         */
+        expr_ref nullable(expr* r) { return is_nullable(r); }
+
+        /**
+         * Enumerate the cofactors (min-terms) of a transition regex r taken with
+         * respect to element ele. r is an ITE-tree over character predicates on
+         * ele; for every feasible path through the tree this produces a pair
+         * (path_condition, leaf_regex). Infeasible character-interval
+         * combinations are pruned using the same path/interval context that the
+         * derivative engine uses while hoisting ITEs.
+         */
+        void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result);
+
+        /**
+         * Compute the symbolic derivative of r and enumerate its reachable
+         * leaves in fully ITE-hoisted normal form.
+         *
+         * Concretely this returns, for every feasible minterm (character
+         * class) of δ(r), a pair (path_condition, target_regex). Every
+         * if-then-else over the input character (including ones that would
+         * otherwise be buried under a concat/union) is hoisted to the top
+         * via the same path/interval pruning used by the derivative engine,
+         * so each target_regex is free of (:var 0) and its nullability is
+         * always decidable. Unions are kept intact as single leaves (a
+         * union leaf denotes a single bisimulation state). Infeasible
+         * minterms are pruned, so all returned leaves are reachable.
+         *
+         * This is the entry point the regex_bisim equivalence procedure
+         * uses: it consumes the target_regex of each pair and ignores the
+         * (redundant) path condition.
+         */
+        void derivative_cofactors(expr* r, expr_ref_pair_vector& result);
+
+    };
+
+}
--- a/src/ast/rewriter/seq_range_collapse.cpp
+++ b/src/ast/rewriter/seq_range_collapse.cpp
@ -0,0 +1,168 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_range_collapse.cpp
+
+Abstract:
+
+    Implementation of regex <-> range_predicate translation for the
+    boolean-combination-of-ranges fragment. See header for the recognized
+    grammar and the canonical regex AST emitted by materialization.
+
+Authors:
+
+    Margus Veanes (veanes) 2026
+
+--*/
+
+#include "ast/rewriter/seq_range_collapse.h"
+
+namespace seq {
+
+    bool regex_to_range_predicate(seq_util& u, expr* r, range_predicate& out) {
+        // The range algebra only models sets of single characters over the
+        // unsigned character domain [0, max_char].  Guard against any regex
+        // whose element type is not a sequence of characters (e.g. a regex
+        // over (Seq Int) or (Seq (Seq Char))): for such regexes the
+        // re.range/re.union/... matchers below would silently fabricate a
+        // character-class predicate and change semantics.  Reject them up
+        // front so callers fall back to the generic regex path.
+        sort* seq_sort = nullptr;
+        if (!u.is_re(r, seq_sort) || !u.is_string(seq_sort))
+            return false;
+
+        unsigned const max_char = u.max_char();
+        auto& re = u.re;
+
+        if (re.is_empty(r)) {
+            out = range_predicate::empty(max_char);
+            return true;
+        }
+        if (re.is_full_char(r)) {
+            out = range_predicate::top(max_char);
+            return true;
+        }
+        unsigned lo = 0, hi = 0;
+        expr* lo_e = nullptr;
+        expr* hi_e = nullptr;
+        if (re.is_range(r, lo_e, hi_e)) {
+            auto extract_char = [&](expr* e, unsigned& c) -> bool {
+                if (u.is_const_char(e, c)) return true;
+                expr* inner = nullptr;
+                if (u.str.is_unit(e, inner) && u.is_const_char(inner, c)) return true;
+                zstring s;
+                if (u.str.is_string(e, s) && s.length() == 1) {
+                    c = s[0];
+                    return true;
+                }
+                return false;
+            };
+            if (!extract_char(lo_e, lo) || !extract_char(hi_e, hi))
+                return false;
+            // Empty/inverted range [lo > hi] is the empty regex.
+            if (lo > hi) {
+                out = range_predicate::empty(max_char);
+                return true;
+            }
+            out = range_predicate::range(lo, hi, max_char);
+            return true;
+        }
+        expr *a = nullptr, *b = nullptr, *c = nullptr;
+        if (re.is_union(r, a, b)) {
+            range_predicate pa(max_char), pb(max_char);
+            if (!regex_to_range_predicate(u, a, pa)) return false;
+            if (!regex_to_range_predicate(u, b, pb)) return false;
+            out = pa | pb;
+            return true;
+        }
+        auto mk_diff = [&](expr *a, expr *b) -> bool {
+            range_predicate pa(max_char), pb(max_char);
+            if (!regex_to_range_predicate(u, a, pa))
+                return false;
+            if (!regex_to_range_predicate(u, b, pb))
+                return false;
+            out = pa - pb;
+            return true;
+        };
+        if (re.is_diff(r, a, b))
+            return mk_diff(a, b);
+
+        if (re.is_intersection(r, a, b) && re.is_complement(b, c)) 
+            return mk_diff(a, c);
+        
+        if (re.is_intersection(r, a, b) && re.is_complement(a, c)) 
+            return mk_diff(b, c);
+        
+        if (re.is_intersection(r, a, b)) {
+            range_predicate pa(max_char), pb(max_char);
+            if (!regex_to_range_predicate(u, a, pa)) return false;
+            if (!regex_to_range_predicate(u, b, pb)) return false;
+            out = pa & pb;
+            return true;
+        }
+
+        
+        // NOTE: re.complement is intentionally NOT handled here.
+        //   re.complement is the SEQUENCE-level complement: its language
+        //   includes the empty string, strings of length >= 2, and any
+        //   length-1 string outside the operand.  A character-class
+        //   range_predicate can only describe a set of length-1 strings,
+        //   so collapsing re.complement(R) to ~R (character-level
+        //   complement) would change semantics whenever R is wrapped in
+        //   any sequence-level context (e.g. re.diff at the top level,
+        //   or membership tests).  De-Morgan equivalences and the
+        //   special cases re.complement(re.empty) / re.complement(re.full)
+        //   are already handled directly in seq_rewriter::mk_re_complement.
+        return false;
+    }
+
+    static expr_ref mk_unit_string_from_char(seq_util& u, unsigned c) {
+        return expr_ref(u.str.mk_string(zstring(c)), u.get_manager());
+    }
+
+    static expr_ref mk_single_range_regex(seq_util& u, unsigned lo, unsigned hi, sort* re_sort) {
+        ast_manager& m = u.get_manager();
+        if (lo == 0 && hi == u.max_char())
+            return expr_ref(u.re.mk_full_char(re_sort), m);
+        // Use the canonical unit-character form (seq.unit (Char N)) for
+        // range bounds.  This matches the shape used elsewhere in
+        // seq_rewriter and avoids creating duplicate AST nodes with
+        // different ids for semantically identical ranges.
+        expr_ref slo(u.str.mk_unit(u.str.mk_char(lo)), m);
+        expr_ref shi(u.str.mk_unit(u.str.mk_char(hi)), m);
+        return expr_ref(u.re.mk_range(slo, shi), m);
+    }
+
+    expr_ref range_predicate_to_regex(seq_util& u, range_predicate const& p, sort* seq_sort) {
+        ast_manager& m = u.get_manager();
+        sort* re_sort = u.re.mk_re(seq_sort);
+        if (p.is_empty())
+            return expr_ref(u.re.mk_empty(re_sort), m);
+        unsigned const n = p.num_ranges();
+        SASSERT(n > 0);
+        if (n == 1) {
+            auto [lo, hi] = p[0];
+            return mk_single_range_regex(u, lo, hi, re_sort);
+        }
+        // Build single-range AST nodes first, then sort by expression id
+        // so the resulting right-associated union matches the canonical
+        // id-sorted shape that seq_rewriter::merge_regex_sets expects.
+        // Without this the merge algorithm produces incorrect unions
+        // when it has to combine our materialized output with another
+        // (id-sorted) regex set.
+        expr_ref_vector ranges(m);
+        for (unsigned i = 0; i < n; ++i) {
+            auto [lo, hi] = p[i];
+            ranges.push_back(mk_single_range_regex(u, lo, hi, re_sort));
+        }
+        std::sort(ranges.data(), ranges.data() + ranges.size(),
+                  [](expr* a, expr* b) { return a->get_id() < b->get_id(); });
+        expr_ref acc(ranges.get(n - 1), m);
+        for (unsigned i = n - 1; i-- > 0; )
+            acc = expr_ref(u.re.mk_union(ranges.get(i), acc), m);
+        return acc;
+    }
+
+}
--- a/src/ast/rewriter/seq_range_collapse.h
+++ b/src/ast/rewriter/seq_range_collapse.h
@ -0,0 +1,71 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_range_collapse.h
+
+Abstract:
+
+    Recognize regexes that are boolean combinations of character-class
+    primitives (re.empty, re.full_char, re.range with concrete chars,
+    and re.union/inter/comp/diff over translatable arguments), and
+    materialize a seq::range_predicate back into a canonical regex AST.
+
+    Together with seq_rewriter integration, this lets any boolean
+    combination of character-class regexes collapse to a canonical
+    multi-range form, so that equivalent character classes share AST
+    identity, and downstream consumers (derivative, OneStep, caching)
+    can short-circuit them as pure range predicates.
+
+Authors:
+
+    Margus Veanes (veanes) 2026
+
+--*/
+#pragma once
+
+#include "ast/rewriter/seq_range_predicate.h"
+#include "ast/seq_decl_plugin.h"
+
+namespace seq {
+
+    /**
+     * If r is a boolean combination of character-class regex primitives
+     * over the unsigned character domain [0, max_char], compute the
+     * equivalent range_predicate and return true. Otherwise return false
+     * with out untouched.
+     *
+     * Recognized fragment (all character-class-preserving operations):
+     *   re.empty                              -> empty
+     *   re.full_char_set                      -> top
+     *   re.range "c_lo" "c_hi"  (concrete)    -> [c_lo, c_hi]
+     *   re.union r1 r2                        -> p1 | p2
+     *   re.intersection r1 r2                 -> p1 & p2
+     *   re.diff r1 r2                         -> p1 - p2
+     *
+     * Notably re.complement is NOT recognized: it is a SEQUENCE-level
+     * complement (over all of Σ*), not a character-class complement, so
+     * collapsing it would change semantics whenever the result is used
+     * in any non-character-class context.  Sequence-level rewrites for
+     * re.complement (double-comp, deMorgan, etc.) are handled directly
+     * in seq_rewriter::mk_re_complement.
+     */
+    bool regex_to_range_predicate(seq_util& u, expr* r, range_predicate& out);
+
+    /**
+     * Canonical materialization of p as a regex AST over the given
+     * sequence sort. Two range_predicates with equal canonical
+     * representations produce structurally identical regex ASTs:
+     *
+     *   empty                  -> re.empty
+     *   top                    -> re.full_char_set
+     *   single range [lo, hi]  -> re.range "lo" "hi"
+     *   multiple ranges        -> right-associated re.union of single
+     *                              ranges, in increasing order of lo
+     *                              (matching the canonical range order
+     *                              held by range_predicate).
+     */
+    expr_ref range_predicate_to_regex(seq_util& u, range_predicate const& p, sort* seq_sort);
+
+}
--- a/src/ast/rewriter/seq_range_predicate.cpp
+++ b/src/ast/rewriter/seq_range_predicate.cpp
@ -0,0 +1,292 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_range_predicate.cpp
+
+Abstract:
+
+    Implementation of the specialized range-algebra used by symbolic
+    derivative computation and regex rewriting. See seq_range_predicate.h
+    for the algebraic specification.
+
+    All Boolean operations are implemented as single linear sweeps over
+    the canonical sorted range vectors and produce canonical output
+    (sorted, disjoint, non-adjacent).
+
+Authors:
+
+    Margus Veanes (veanes) 2026
+
+--*/
+
+#include "ast/rewriter/seq_range_predicate.h"
+#include "util/debug.h"
+#include <algorithm>
+#include <ostream>
+
+namespace seq {
+
+    // -----------------------------------------------------------------------
+    // Factories
+    // -----------------------------------------------------------------------
+
+    range_predicate range_predicate::empty(unsigned max_char) {
+        return range_predicate(max_char);
+    }
+
+    range_predicate range_predicate::top(unsigned max_char) {
+        range_predicate r(max_char);
+        r.m_ranges.push_back({0u, max_char});
+        SASSERT(r.well_formed());
+        return r;
+    }
+
+    range_predicate range_predicate::singleton(unsigned c, unsigned max_char) {
+        SASSERT(c <= max_char);
+        range_predicate r(max_char);
+        r.m_ranges.push_back({c, c});
+        SASSERT(r.well_formed());
+        return r;
+    }
+
+    range_predicate range_predicate::range(unsigned lo, unsigned hi, unsigned max_char) {
+        range_predicate r(max_char);
+        if (lo <= hi && lo <= max_char) {
+            unsigned clipped_hi = hi <= max_char ? hi : max_char;
+            r.m_ranges.push_back({lo, clipped_hi});
+        }
+        SASSERT(r.well_formed());
+        return r;
+    }
+
+    // -----------------------------------------------------------------------
+    // Invariants and observers
+    // -----------------------------------------------------------------------
+
+    bool range_predicate::well_formed() const {
+        for (unsigned i = 0; i < m_ranges.size(); ++i) {
+            auto [lo, hi] = m_ranges[i];
+            if (lo > hi) return false;
+            if (hi > m_max_char) return false;
+            if (i > 0) {
+                unsigned prev_hi = m_ranges[i - 1].second;
+                // Non-adjacent and sorted: prev_hi + 1 < lo, with care
+                // around prev_hi == UINT_MAX which we never expect because
+                // hi <= m_max_char.
+                if (prev_hi + 1 >= lo) return false;
+            }
+        }
+        return true;
+    }
+
+    bool range_predicate::contains(unsigned c) const {
+        // Binary search on first element of pairs.
+        unsigned lo = 0, hi = m_ranges.size();
+        while (lo < hi) {
+            unsigned mid = lo + (hi - lo) / 2;
+            auto [a, b] = m_ranges[mid];
+            if (c < a) hi = mid;
+            else if (c > b) lo = mid + 1;
+            else return true;
+        }
+        return false;
+    }
+
+    uint64_t range_predicate::cardinality() const {
+        uint64_t n = 0;
+        for (auto [lo, hi] : m_ranges)
+            n += static_cast<uint64_t>(hi) - static_cast<uint64_t>(lo) + 1u;
+        return n;
+    }
+
+    // -----------------------------------------------------------------------
+    // Equality, ordering, hashing
+    // -----------------------------------------------------------------------
+
+    bool range_predicate::equals(range_predicate const& o) const {
+        if (m_max_char != o.m_max_char) return false;
+        if (m_ranges.size() != o.m_ranges.size()) return false;
+        for (unsigned i = 0; i < m_ranges.size(); ++i)
+            if (m_ranges[i] != o.m_ranges[i]) return false;
+        return true;
+    }
+
+    bool range_predicate::operator<(range_predicate const& o) const {
+        if (m_max_char != o.m_max_char)
+            return m_max_char < o.m_max_char;
+        unsigned n = std::min(m_ranges.size(), o.m_ranges.size());
+        for (unsigned i = 0; i < n; ++i) {
+            auto a = m_ranges[i];
+            auto b = o.m_ranges[i];
+            if (a.first  != b.first)  return a.first  < b.first;
+            if (a.second != b.second) return a.second < b.second;
+        }
+        return m_ranges.size() < o.m_ranges.size();
+    }
+
+    unsigned range_predicate::hash() const {
+        // FNV-1a 32-bit over (max_char, then each (lo, hi)).
+        uint32_t h = 2166136261u;
+        auto step = [&](uint32_t x) {
+            h ^= x;
+            h *= 16777619u;
+        };
+        step(m_max_char);
+        for (auto [lo, hi] : m_ranges) {
+            step(lo);
+            step(hi);
+        }
+        return h;
+    }
+
+    // -----------------------------------------------------------------------
+    // Boolean operations
+    // -----------------------------------------------------------------------
+
+    namespace {
+        // Append (lo, hi) to result, merging with the previous range if
+        // adjacent or overlapping. Maintains canonical form.
+        inline void append_merged(svector<std::pair<unsigned, unsigned>>& result,
+                                  unsigned lo, unsigned hi) {
+            SASSERT(lo <= hi);
+            if (!result.empty() && result.back().second + 1 >= lo) {
+                if (result.back().second < hi)
+                    result.back().second = hi;
+            } else {
+                result.push_back({lo, hi});
+            }
+        }
+    }
+
+    range_predicate range_predicate::operator|(range_predicate const& o) const {
+        SASSERT(m_max_char == o.m_max_char);
+        range_predicate r(m_max_char);
+        unsigned i = 0, j = 0;
+        const unsigned n = m_ranges.size();
+        const unsigned m = o.m_ranges.size();
+        while (i < n && j < m) {
+            auto a = m_ranges[i];
+            auto b = o.m_ranges[j];
+            if (a.first <= b.first) {
+                append_merged(r.m_ranges, a.first, a.second);
+                ++i;
+            } else {
+                append_merged(r.m_ranges, b.first, b.second);
+                ++j;
+            }
+        }
+        while (i < n) {
+            auto a = m_ranges[i++];
+            append_merged(r.m_ranges, a.first, a.second);
+        }
+        while (j < m) {
+            auto b = o.m_ranges[j++];
+            append_merged(r.m_ranges, b.first, b.second);
+        }
+        SASSERT(r.well_formed());
+        return r;
+    }
+
+    range_predicate range_predicate::operator&(range_predicate const& o) const {
+        SASSERT(m_max_char == o.m_max_char);
+        range_predicate r(m_max_char);
+        unsigned i = 0, j = 0;
+        const unsigned n = m_ranges.size();
+        const unsigned m = o.m_ranges.size();
+        while (i < n && j < m) {
+            auto [a_lo, a_hi] = m_ranges[i];
+            auto [b_lo, b_hi] = o.m_ranges[j];
+            unsigned lo = std::max(a_lo, b_lo);
+            unsigned hi = std::min(a_hi, b_hi);
+            if (lo <= hi)
+                r.m_ranges.push_back({lo, hi});
+            // Advance the range that ends first.
+            if (a_hi < b_hi) ++i;
+            else if (b_hi < a_hi) ++j;
+            else { ++i; ++j; }
+        }
+        SASSERT(r.well_formed());
+        return r;
+    }
+
+    range_predicate range_predicate::operator~() const {
+        range_predicate r(m_max_char);
+        unsigned cursor = 0;
+        for (auto [lo, hi] : m_ranges) {
+            if (cursor < lo)
+                r.m_ranges.push_back({cursor, lo - 1});
+            // Step past hi without overflow: hi <= m_max_char and we
+            // only step when more characters remain.
+            if (hi >= m_max_char) {
+                cursor = m_max_char + 1; // sentinel: no more characters
+                break;
+            }
+            cursor = hi + 1;
+        }
+        if (cursor <= m_max_char)
+            r.m_ranges.push_back({cursor, m_max_char});
+        SASSERT(r.well_formed());
+        return r;
+    }
+
+    range_predicate range_predicate::operator-(range_predicate const& o) const {
+        SASSERT(m_max_char == o.m_max_char);
+        // A - B by linear sweep: for each range of A, subtract overlapping
+        // ranges of B. Both inputs are sorted so we advance j monotonically.
+        range_predicate r(m_max_char);
+        unsigned j = 0;
+        const unsigned m = o.m_ranges.size();
+        for (auto [a_lo, a_hi] : m_ranges) {
+            unsigned cursor = a_lo;
+            while (j < m && o.m_ranges[j].second < cursor)
+                ++j;
+            unsigned k = j;
+            while (k < m && o.m_ranges[k].first <= a_hi) {
+                auto [b_lo, b_hi] = o.m_ranges[k];
+                if (cursor < b_lo)
+                    r.m_ranges.push_back({cursor, std::min(a_hi, b_lo - 1)});
+                if (b_hi >= a_hi) {
+                    cursor = a_hi + 1;
+                    break;
+                }
+                cursor = b_hi + 1;
+                ++k;
+            }
+            if (cursor <= a_hi)
+                r.m_ranges.push_back({cursor, a_hi});
+        }
+        SASSERT(r.well_formed());
+        return r;
+    }
+
+    range_predicate range_predicate::operator^(range_predicate const& o) const {
+        SASSERT(m_max_char == o.m_max_char);
+        // (A | B) - (A & B), but implemented directly with one linear sweep
+        // over the union of breakpoints.
+        return (*this | o) - (*this & o);
+    }
+
+    // -----------------------------------------------------------------------
+    // Display
+    // -----------------------------------------------------------------------
+
+    std::ostream& range_predicate::display(std::ostream& out) const {
+        if (m_ranges.empty()) {
+            return out << "[]";
+        }
+        out << "[";
+        bool first = true;
+        for (auto [lo, hi] : m_ranges) {
+            if (!first) out << ",";
+            first = false;
+            if (lo == hi)
+                out << lo;
+            else
+                out << lo << "-" << hi;
+        }
+        return out << "]";
+    }
+
+}
--- a/src/ast/rewriter/seq_range_predicate.h
+++ b/src/ast/rewriter/seq_range_predicate.h
@ -0,0 +1,127 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    seq_range_predicate.h
+
+Abstract:
+
+    Specialized range-algebra over an unsigned character domain [0, max_char].
+
+    A range_predicate represents a subset of the character domain as a
+    sorted sequence of non-overlapping, non-adjacent, non-empty ranges:
+
+        [(lo_0, hi_0), (lo_1, hi_1), ...] with hi_i + 1 < lo_{i+1}.
+
+    The representation is canonical, so two range_predicates over the same
+    domain are extensionally equivalent iff their internal vectors are
+    elementwise equal.
+
+    All Boolean operations (union, intersection, complement, difference)
+    are linear in the total number of ranges and produce the canonical
+    representation.
+
+    Intended use:
+      * path conditions for symbolic derivative computation,
+      * OneStep predicates capturing length-1 acceptance,
+      * smart-constructor side conditions for regex rewrites such as
+        R & psi  -->  toregex(OneStep(R) & psi).
+
+    The type is a pure value: no ast_manager allocation occurs in its
+    construction or its Boolean operations. Conversion to and from
+    expr* is the responsibility of a separate translator (see callers
+    in seq_derive / seq_rewriter).
+
+Authors:
+
+    Margus Veanes (veanes) 2026
+
+--*/
+#pragma once
+
+#include "util/vector.h"
+#include <iosfwd>
+#include <utility>
+
+namespace seq {
+
+    class range_predicate {
+        using range_t  = std::pair<unsigned, unsigned>;
+        using ranges_t = svector<range_t>;
+
+        // Sorted by first; ranges are disjoint and non-adjacent;
+        // every range satisfies lo <= hi <= m_max_char.
+        ranges_t m_ranges;
+        unsigned m_max_char { 0 };
+
+        // Invariant check used in debug builds.
+        bool well_formed() const;
+
+    public:
+        range_predicate() = default;
+        explicit range_predicate(unsigned max_char) : m_max_char(max_char) {}
+
+        // ---------------- Factory functions ----------------
+
+        static range_predicate empty(unsigned max_char);
+        static range_predicate top(unsigned max_char);
+        static range_predicate singleton(unsigned c, unsigned max_char);
+        static range_predicate range(unsigned lo, unsigned hi, unsigned max_char);
+
+        // ---------------- Observers ----------------
+
+        unsigned max_char()        const { return m_max_char; }
+        unsigned num_ranges()      const { return m_ranges.size(); }
+        range_t  operator[](unsigned i) const { return m_ranges[i]; }
+        ranges_t const& ranges()   const { return m_ranges; }
+
+        bool is_empty() const { return m_ranges.empty(); }
+        bool is_top()   const {
+            return m_ranges.size() == 1
+                && m_ranges[0].first == 0
+                && m_ranges[0].second == m_max_char;
+        }
+        bool is_singleton(unsigned& c) const {
+            if (m_ranges.size() != 1) return false;
+            if (m_ranges[0].first != m_ranges[0].second) return false;
+            c = m_ranges[0].first;
+            return true;
+        }
+        bool contains(unsigned c) const;
+
+        // Number of characters in the predicate (well-defined for any domain).
+        uint64_t cardinality() const;
+
+        // ---------------- Equality, ordering, hashing ----------------
+
+        bool equals(range_predicate const& o) const;
+        bool operator==(range_predicate const& o) const { return equals(o); }
+        bool operator!=(range_predicate const& o) const { return !equals(o); }
+
+        // Total order: lexicographic on the canonical range sequence,
+        // with shorter sequences ordered before longer prefixes.
+        // Predicates over different domains compare by max_char first.
+        bool operator<(range_predicate const& o) const;
+        bool less_than(range_predicate const& o) const { return *this < o; }
+
+        unsigned hash() const;
+
+        // ---------------- Boolean operations ----------------
+
+        range_predicate operator|(range_predicate const& o) const;   // union
+        range_predicate operator&(range_predicate const& o) const;   // intersection
+        range_predicate operator-(range_predicate const& o) const;   // difference
+        range_predicate operator^(range_predicate const& o) const;   // symmetric diff
+        range_predicate operator~() const;                            // complement
+
+        // ---------------- Display ----------------
+
+        std::ostream& display(std::ostream& out) const;
+    };
+
+    inline std::ostream& operator<<(std::ostream& out, range_predicate const& p) {
+        return p.display(out);
+    }
+
+}
--- a/src/ast/rewriter/seq_regex_bisim.cpp
+++ b/src/ast/rewriter/seq_regex_bisim.cpp
@ -85,45 +85,6 @@ namespace seq {
        return is_ground(r);
    }

-    /*
-       Collect the leaves of a t-regex der (an ITE-tree whose leaves are
-       regex expressions) into the output vector. Empty (re.empty) leaves
-       are dropped.
-
-       Each leaf is treated as a single bisimulation state regardless of
-       its top-level shape (including re.union and re.antimirov_union):
-       descending into a union at the leaf would split one state into
-       several, which is semantically unsound for the bisimulation /
-       union-find merging that follows.
-
-       Returns false if we encountered an unexpected node (e.g. a free
-       variable creeping in) — in that case the caller should bail out.
-    */
-    bool regex_bisim::collect_leaves(expr* der, expr_ref_vector& leaves) {
-        ptr_vector<expr> work;
-        obj_hashtable<expr> seen;
-        work.push_back(der);
-        seen.insert(der);
-        while (!work.empty()) {
-            expr* e = work.back();
-            work.pop_back();
-            expr* c = nullptr, * t = nullptr, * f = nullptr;
-            if (m.is_ite(e, c, t, f)) {
-                if (seen.insert_if_not_there(t))
-                    work.push_back(t);
-                if (seen.insert_if_not_there(f))
-                    work.push_back(f);
-                continue;
-            }
-            if (m_util.re.is_empty(e))
-                continue;
-            if (!m_util.is_re(e))
-                return false;
-            leaves.push_back(e);
-        }
-        return true;
-    }
-
    /*
       Fast inequivalence check based on the get_info().classical flag.

@ -232,15 +193,19 @@ namespace seq {
            m_worklist.pop_back();

            // Compute the symbolic derivative wrt the canonical variable
-            // (:var 0). The result is a transition regex (ITE tree) whose
-            // leaves are regex expressions. We use the classical Brzozowski
-            // entry point so the derivative stays as a single TRegex and
-            // does not lift unions to the top via antimirov nodes — this
-            // preserves the XOR-pair invariant the bisimulation relies on.
-            expr_ref d(m_rw.mk_brz_derivative(r), m);
+            // (:var 0) and enumerate its reachable leaves in fully
+            // ITE-hoisted normal form. Every if-then-else over the input
+            // character — even one that would otherwise be buried under a
+            // concat or union — is hoisted to the top and infeasible
+            // minterms are pruned, so each leaf is a ground regex free of
+            // (:var 0) whose nullability is always decidable. Unions are
+            // kept intact as single leaves (a union leaf denotes a single
+            // bisimulation state, never a split into separate states).
+            expr_ref_pair_vector cofs(m);
+            m_rw.brz_derivative_cofactors(r, cofs);
            expr_ref_vector leaves(m);
-            if (!collect_leaves(d, leaves))
-                return l_undef;
+            for (auto const& p : cofs)
+                leaves.push_back(p.second);

            // First pass: check for any nullable leaf (definitive
            // distinguishing empty-continuation word) or any classically
--- a/src/ast/rewriter/seq_regex_bisim.h
+++ b/src/ast/rewriter/seq_regex_bisim.h
@ -74,7 +74,6 @@ namespace seq {

        unsigned node_of(expr* r);
        bool merge_leaf(expr* xor_pair);
-        bool collect_leaves(expr* der, expr_ref_vector& leaves);
        lbool nullability(expr* r);
        bool is_supported(expr* r);
        // Returns true if the leaf l proves that the original pair is
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@ -19,6 +19,7 @@ Notes:
 #pragma once

 #include "ast/seq_decl_plugin.h"
+#include "ast/rewriter/seq_derive.h"
 #include "ast/ast_pp.h"
 #include "ast/arith_decl_plugin.h"
 #include "ast/rewriter/rewriter_types.h"
@ -128,15 +129,20 @@ class seq_rewriter {
        void insert(decl_kind op, expr* a, expr* b, expr* c, expr* r);
    };

+    friend class seq::derive;
+
    seq_util       m_util;
    seq_subset     m_subset;
    arith_util     m_autil;
    bool_rewriter  m_br;
+    seq::derive    m_derive;
    // re2automaton   m_re2aut;
    op_cache       m_op_cache;
    expr_ref_vector m_es, m_lhs, m_rhs;
-    bool           m_coalesce_chars;
-    bool           m_in_bisim { false };
+    bool m_coalesce_chars = false;
+    bool           m_in_bisim { false };   
+    unsigned       m_re_deriv_depth { 0 };
+    static const unsigned m_max_re_deriv_depth = 512;

    enum length_comparison {
        shorter_c, 
@ -179,8 +185,6 @@ class seq_rewriter {
    expr_ref re_replace_char(expr *r, unsigned a_ch, unsigned b_ch, expr *a_str, expr *b_str);

    // Calculate derivative, memoized and enforcing a normal form
-    expr_ref is_nullable_rec(expr* r);
-    expr_ref mk_derivative_rec(expr* ele, expr* r);
    expr_ref mk_der_op(decl_kind k, expr* a, expr* b);
    expr_ref mk_der_op_rec(decl_kind k, expr* a, expr* b);
    expr_ref mk_der_concat(expr* a, expr* b);
@ -191,26 +195,10 @@ class seq_rewriter {
    expr_ref mk_der_cond(expr* cond, expr* ele, sort* seq_sort);
    expr_ref mk_der_antimirov_union(expr* r1, expr* r2);
    bool ite_bdds_compatible(expr* a, expr* b);
-    /* if r has the form deriv(en..deriv(e1,to_re(s))..) returns 's = [e1..en]' else returns '() in r'*/
-    expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort);
    #ifdef Z3DEBUG
    bool check_deriv_normal_form(expr* r, int level = 3);
    #endif

-    void mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result);
-
-    expr_ref mk_antimirov_deriv(expr* e, expr* r, expr* path);
-    expr_ref mk_in_antimirov_rec(expr* s, expr* d);
-    expr_ref mk_in_antimirov(expr* s, expr* d);
-
-    expr_ref mk_antimirov_deriv_intersection(expr* elem, expr* d1, expr* d2, expr* path);
-    expr_ref mk_antimirov_deriv_concat(expr* d, expr* r);
-    expr_ref mk_antimirov_deriv_negate(expr* elem, expr* d);
-    expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2);
-    expr_ref mk_antimirov_deriv_restrict(expr* elem, expr* d1, expr* cond);
-    expr_ref mk_regex_reverse(expr* r);
-    expr_ref mk_regex_concat(expr* r1, expr* r2);
-
    expr_ref merge_regex_sets(expr* r1, expr* r2, expr* unit, std::function<bool(expr*, expr*&, expr*&)>& decompose, std::function<expr* (expr*, expr*)>& compose);

    // elem is (:var 0) and path a condition that may have (:var 0) as a free variable
@ -267,6 +255,14 @@ class seq_rewriter {
    br_status mk_re_union0(expr* a, expr* b, expr_ref& result);
    br_status mk_re_inter0(expr* a, expr* b, expr_ref& result);
    br_status mk_re_complement(expr* a, expr_ref& result);
+    // Range-set collapse helpers: if the operands form a boolean
+    // combination of character-class regexes, materialize the result as a
+    // canonical regex over a single range_predicate.  See
+    // ast/rewriter/seq_range_collapse.h for the recognized fragment.
+    // NOTE: re.complement is intentionally not in this set because it
+    // operates at the sequence level, not the character-class level.
+    bool try_collapse_re_union(expr* a, expr* b, expr_ref& result);
+    bool try_collapse_re_inter(expr* a, expr* b, expr_ref& result);
    br_status mk_re_star(expr* a, expr_ref& result);
    br_status mk_re_diff(expr* a, expr* b, expr_ref& result);
    br_status mk_re_xor(expr* a, expr* b, expr_ref& result);
@ -351,9 +347,9 @@ class seq_rewriter {

 public:
    seq_rewriter(ast_manager & m, params_ref const & p = params_ref()):
-        m_util(m), m_subset(m_util.re), m_autil(m), m_br(m, p), // m_re2aut(m), 
+        m_util(m), m_subset(m_util.re), m_autil(m), m_br(m, p), m_derive(m, *this),
        m_op_cache(m), m_es(m), 
-        m_lhs(m), m_rhs(m), m_coalesce_chars(true) {
+        m_lhs(m), m_rhs(m) {
    }
    ast_manager & m() const { return m_util.get_manager(); }
    family_id get_fid() const { return m_util.get_family_id(); }
@ -364,7 +360,7 @@ public:
    static void get_param_descrs(param_descrs & r);


-    bool coalesce_chars() const { return m_coalesce_chars; }
+    // bool coalesce_chars() const { return m_coalesce_chars; }

    br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result);
    br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result);
@ -380,6 +376,34 @@ public:
        return result;
    }

+    expr_ref mk_xor0(expr *a, expr *b) {
+        expr_ref result(m());
+        if (mk_re_xor0(a, b, result) == BR_FAILED)
+            result = re().mk_xor(a, b);
+        return result;
+    }
+
+    expr_ref mk_union(expr *a, expr *b) {
+        expr_ref result(m());
+        if (mk_re_union(a, b, result) == BR_FAILED)
+            result = re().mk_union(a, b);
+        return result;
+    }
+
+    expr_ref mk_inter(expr *a, expr *b) {
+        expr_ref result(m());
+        if (mk_re_inter(a, b, result) == BR_FAILED)
+            result = re().mk_inter(a, b);
+        return result;
+    }
+
+    expr_ref mk_complement(expr *a) {
+        expr_ref result(m());
+        if (mk_re_complement(a, result) == BR_FAILED)
+            result = re().mk_complement(a);
+        return result;
+    }
+
    /*
    * makes concat and simplifies
    */
@ -440,7 +464,31 @@ public:
    procedure which relies on each leaf of D(p XOR q) being a coherent
    XOR pair (D_v p) XOR (D_v q).
    */
-    expr_ref mk_brz_derivative(expr* r);
+    expr_ref mk_brz_derivative(expr *r) {
+        return mk_derivative(r);
+    }
+
+    /*
+    Enumerate the cofactors (min-terms) of a transition regex r taken with
+    respect to ele. Produces (path_condition, leaf_regex) pairs for every
+    feasible path through the ITE-tree, pruning infeasible character ranges.
+    Delegates to the derivative engine so the same path/interval context used
+    while hoisting ITEs is reused for the leaf simplification.
+    */
+    void get_cofactors(expr* ele, expr* r, expr_ref_pair_vector& result) {
+        m_derive.get_cofactors(ele, r, result);
+    }
+
+    /*
+    Compute the symbolic derivative of r and enumerate its reachable leaves
+    in fully ITE-hoisted normal form: a list of (path_condition, target)
+    pairs where every target is free of (:var 0) (so nullability is always
+    decidable) and unions are kept intact as single states. Used by
+    regex_bisim, which consumes the targets and ignores the path conditions.
+    */
+    void brz_derivative_cofactors(expr* r, expr_ref_pair_vector& result) {
+        m_derive.derivative_cofactors(r, result);
+    }

    // heuristic elimination of element from condition that comes form a derivative.
    // special case optimization for conjunctions of equalities, disequalities and ranges.
@ -451,6 +499,8 @@ public:
    /* Apply simplifications to the intersection to keep it normalized (r1 and r2 are not normalized)*/
    expr_ref mk_regex_inter_normalize(expr* r1, expr* r2);

+    expr_ref mk_regex_concat(expr *r1, expr *r2);
+
    /*
    * Extract some string that is a member of r. 
    * Return true if a valid string was extracted.
--- a/src/ast/rewriter/seq_subset.cpp
+++ b/src/ast/rewriter/seq_subset.cpp
@ -19,7 +19,7 @@ Author:

 bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const {
    while (true) {
-        
+
        if (a == b)
            return true;
        if (m_re.is_empty(a))
@ -30,7 +30,7 @@ bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const {
            return true;

        if (depth >= m_max_depth)
-            return false;        
+            return false;

        expr* a1 = nullptr, * a2 = nullptr, * b1 = nullptr, * b2 = nullptr;
        unsigned la, ua, lb, ub;
@ -39,16 +39,12 @@ bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const {
        if (m_re.is_dot_plus(b) && m_re.get_info(a).nullable == l_false)
            return true;

-        // a ⊆ a*
-        if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth))
-            return true;
-
        // e ⊆ a*
        if (m_re.is_epsilon(a) && m_re.is_star(b, b1))
            return true;

-        // R ⊆ R*
-        if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth + 1))
+        // a ⊆ a*: if b = b1* and a ⊆ b1, then a ⊆ b1*
+        if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth))
            return true;

        // R1* ⊆ R2* if R1 ⊆ R2
@ -112,6 +108,12 @@ bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const {
        if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b1) && is_subset_rec(a, b2, depth))
            return true;

+        // prefix absorption: P·R' ⊆ Σ*·R' for any prefix P (since P ⊆ Σ*).
+        // Detect that a has R' (= b2) as a concatenation suffix, where b = Σ*·R'.
+        // Covers contains-patterns, e.g. Σ*·a·Σ*·b·Σ* ⊆ Σ*·b·Σ*.
+        if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b1) && ends_with(a, b2))
+            return true;
+
        // R ⊆ R'·Σ* if R ⊆ R'
        if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b2) && is_subset_rec(a, b1, depth))
            return true;
@ -144,3 +146,30 @@ bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const {
 bool seq_subset::is_subset(expr* a, expr* b) const {
    return is_subset_rec(a, b, 0);
 }
+
+bool seq_subset::ends_with(expr* a, expr* suf) const {
+    if (a == suf)
+        return true;
+    // Flatten both regexes into their sequence of concatenation factors
+    // (independent of left/right associativity) and test list-suffix equality.
+    ptr_vector<expr> af, sf;
+    flatten_concat(a, af);
+    flatten_concat(suf, sf);
+    if (sf.size() > af.size())
+        return false;
+    unsigned off = af.size() - sf.size();
+    for (unsigned i = 0; i < sf.size(); ++i)
+        if (af[off + i] != sf[i])
+            return false;
+    return true;
+}
+
+void seq_subset::flatten_concat(expr* a, ptr_vector<expr>& out) const {
+    expr* a1 = nullptr, * a2 = nullptr;
+    if (m_re.is_concat(a, a1, a2)) {
+        flatten_concat(a1, out);
+        flatten_concat(a2, out);
+    }
+    else
+        out.push_back(a);
+}
--- a/src/ast/rewriter/seq_subset.h
+++ b/src/ast/rewriter/seq_subset.h
@ -24,6 +24,12 @@ class seq_subset {

    bool is_subset_rec(expr* a, expr* b, unsigned depth) const;

+    // true if regex a, viewed as a flattened concatenation, has suf as a
+    // structural (concatenation) suffix.
+    bool ends_with(expr* a, expr* suf) const;
+
+    void flatten_concat(expr* a, ptr_vector<expr>& out) const;
+
 public:
    explicit seq_subset(seq_util::rex& re) : m_re(re) {}
    bool is_subset(expr* a, expr* b) const;
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@ -461,6 +461,24 @@ namespace smt {
        if (re().is_empty(r))
            //trivially true
            return;
+        // When one side is re.none the equation is a pure emptiness check on
+        // the other regex (symmetric_diff already returned it as r).  Decide
+        // it directly by antimirov NFA reachability instead of running the
+        // bisimulation/XOR closure, which would build large un-canonicalized
+        // product states for intersections of contains-patterns.
+        if ((re().is_empty(r1) || re().is_empty(r2)) && is_ground(r)) {
+            switch (re_is_empty(r)) {
+            case l_true:
+                STRACE(seq_regex_brief, tout << "empty:eq ";);
+                return; // languages equal (both empty): trivially true
+            case l_false:
+                STRACE(seq_regex_brief, tout << "empty:neq ";);
+                th.add_axiom(~th.mk_eq(r1, r2, false), false_literal);
+                return;
+            case l_undef:
+                break;
+            }
+        }
        // Try the bisimulation procedure on ground regexes first.  If it
        // returns a definite answer, dispatch the corresponding axiom and
        // bypass the symbolic emptiness/derivative closure.
@ -562,7 +580,7 @@ namespace smt {
            lits.push_back(null_lit);

        expr_ref_pair_vector cofactors(m);
-        get_cofactors(d, cofactors);
+        seq_rw().get_cofactors(hd, d, cofactors);
        for (auto const& p : cofactors) {
            if (is_member(p.second, u)) 
                continue;            
@ -671,6 +689,67 @@ namespace smt {
        return result;
    }

+    /*
+        Decide emptiness of a ground regex r via antimirov-mode NFA
+        reachability.
+
+        The symbolic derivative engine runs in antimirov mode, so the
+        derivative of an intersection distributes into a *set* of individual
+        product states inter(A_i, B_j) (each a small, ground regex) rather
+        than one giant union-of-intersections term.  get_derivative_targets
+        enumerates these NFA successor states.
+
+        We short-circuit to l_false (non-empty) as soon as a reachable state
+        is nullable (accepts the empty word) or classical (a regex built only
+        from to_re/all/union/concat/star/plus/opt/loop, hence non-empty).  An
+        intersection itself is never classical, but once one operand reduces
+        to Σ* the intersection collapses (via the derivative's subset
+        simplification) to the other, classical, operand.
+
+        If the worklist is exhausted with no such state, r is empty (l_true).
+        Returns l_undef if a step bound is hit, so callers can fall back to
+        the general procedure.
+    */
+    lbool seq_regex::re_is_empty(expr* r) {
+        if (re().is_empty(r))
+            return l_true;
+        expr_ref_vector pinned(m);
+        obj_hashtable<expr> visited;
+        ptr_vector<expr> work;
+        work.push_back(r);
+        visited.insert(r);
+        pinned.push_back(r);
+        unsigned const bound = 100000;
+        unsigned steps = 0;
+        while (!work.empty()) {
+            if (++steps > bound)
+                return l_undef;
+            expr* s = work.back();
+            work.pop_back();
+            auto info = re().get_info(s);
+            if (!info.is_known())
+                return l_undef;
+            // ε ∈ L(s) or s is a non-empty classical regex ⇒ L(r) non-empty.
+            if (info.nullable == l_true || info.classical)
+                return l_false;
+            // Dead state: prune (min_length == UINT_MAX means no word is
+            // accepted from here).
+            if (info.min_length == UINT_MAX)
+                continue;
+            expr_ref_vector targets(m);
+            get_derivative_targets(s, targets);
+            for (expr* t : targets) {
+                if (visited.contains(t))
+                    continue;
+                visited.insert(t);
+                pinned.push_back(t);
+                work.push_back(t);
+            }
+        }
+        return l_true;
+    }
+
+
    /*
        Return a list of all target regexes in the derivative of a regex r,
        ignoring the conditions along each path.
@ -707,53 +786,26 @@ namespace smt {

    /*
        Return a list of all (cond, leaf) pairs in a given derivative
-        expression r.
+        expression r, where elem is the character symbol the derivative was
+        taken with respect to.

-        Note: this  implementation is inefficient: it simply collects all expressions under an if and 
-        iterates over all combinations.
+        The transition regexes produced by the symbolic derivative engine are
+        ITE-trees over character predicates ci on elem (equalities such as
+        elem = 'A', and ranges such as 'a' <= elem <= 'z').  These predicates
+        are typically mutually exclusive, so the number of feasible truth
+        assignments to {c1,..,ck} ("minterms") is small.

-        This method is still used by:
+        The enumeration is delegated to seq::derive (via seq_rw().get_cofactors)
+        so it reuses the very same path/interval context that the derivative
+        engine uses while hoisting ITEs: each feasible path through the ITE-tree
+        yields one (path_condition, leaf) cofactor, infeasible character-range
+        combinations are pruned, and the leaf is simplified with the path-aware
+        smart constructors.
+
+        This is used by:
            propagate_is_empty
            propagate_is_non_empty
    */
-    void seq_regex::get_cofactors(expr* r, expr_ref_pair_vector& result) {
-        obj_hashtable<expr> ifs;
-        expr* cond = nullptr, * r1 = nullptr, * r2 = nullptr;
-        for (expr* e : subterms::ground(expr_ref(r, m))) 
-            if (m.is_ite(e, cond, r1, r2))
-                ifs.insert(cond);
-        
-        expr_ref_vector rs(m);
-        vector<expr_ref_vector> conds;
-        conds.push_back(expr_ref_vector(m));
-        rs.push_back(r);
-        for (expr* c : ifs) {
-            unsigned sz = conds.size();
-            expr_safe_replace rep1(m);
-            expr_safe_replace rep2(m);
-            rep1.insert(c, m.mk_true());
-            rep2.insert(c, m.mk_false());
-            expr_ref r2(m);
-            for (unsigned i = 0; i < sz; ++i) {
-                expr_ref_vector cs = conds[i];
-                cs.push_back(mk_not(m, c));
-                conds.push_back(cs);
-                conds[i].push_back(c);
-                expr_ref r1(rs.get(i), m);
-                rep1(r1, r2);
-                rs[i] = r2;
-                rep2(r1, r2);
-                rs.push_back(r2);
-            }
-        }
-        for (unsigned i = 0; i < conds.size(); ++i) {
-            expr_ref conj = mk_and(conds[i]);
-            expr_ref r(rs.get(i), m);
-            ctx.get_rewriter()(r);
-            if (!m.is_false(conj) && !re().is_empty(r))
-                result.push_back(conj, r);
-        }
-    }

    /*
      is_empty(r, u) => ~is_nullable(r)
@ -781,7 +833,7 @@ namespace smt {
        d = mk_derivative_wrapper(hd, r);
        literal_vector lits;
        expr_ref_pair_vector cofactors(m);
-        get_cofactors(d, cofactors);        
+        seq_rw().get_cofactors(hd, d, cofactors);        
        for (auto const& p : cofactors) {
            if (is_member(p.second, u))
                continue;
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@ -164,7 +164,12 @@ namespace smt {
        // returned by derivative_wrapper
        expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
        void get_derivative_targets(expr* r, expr_ref_vector& targets);
-        void get_cofactors(expr* r, expr_ref_pair_vector& result);
+
+        // Decide emptiness of a ground regex by antimirov-mode NFA
+        // reachability: explore derivative target states, short-circuiting to
+        // "non-empty" on the first reachable nullable or classical state.
+        // Returns l_true (empty), l_false (non-empty), l_undef (gave up).
+        lbool re_is_empty(expr* r);

        /* 
           Pretty print the regex of the state id to the out stream, 
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@ -115,15 +115,18 @@ add_executable(test-z3
  polynomial_factorization.cpp
  polynorm.cpp
  prime_generator.cpp
+  seq_regex_bisim.cpp
  proof_checker.cpp
  qe_arith.cpp
  mbp_qel.cpp
  quant_elim.cpp
  quant_solve.cpp
  random.cpp
+  range_predicate.cpp
  rational.cpp
  rcf.cpp
  region.cpp
+  regex_range_collapse.cpp
  sat_local_search.cpp
  sat_lookahead.cpp
  sat_user_scope.cpp
--- a/src/test/main.cpp
+++ b/src/test/main.cpp
@ -113,6 +113,8 @@
    X(api_bug) \
    X(api_special_relations) \
    X(arith_rewriter) \
+    X(range_predicate) \
+    X(regex_range_collapse) \
    X(seq_rewriter) \
    X(check_assumptions) \
    X(smt_context) \
@ -195,6 +197,7 @@
    X(finite_set) \
    X(finite_set_rewriter) \
    X(fpa) \
+    X(seq_regex_bisim) \
    X(term_enumeration) \
    X(lcube)

--- a/src/test/range_predicate.cpp
+++ b/src/test/range_predicate.cpp
@ -0,0 +1,260 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    test/range_predicate.cpp
+
+Abstract:
+
+    Unit tests for the range-algebra value type seq::range_predicate.
+
+    The tests exercise:
+      * factory constructors and canonical-form invariants,
+      * extensional equality and total ordering,
+      * Boolean operations (|, &, ~, -, ^) on hand-picked instances,
+      * exhaustive verification of de-Morgan and lattice laws on a
+        small character domain, by enumerating every subset.
+
+Author:
+
+    Margus Veanes (veanes) 2026
+
+--*/
+
+#include "ast/rewriter/seq_range_predicate.h"
+#include "util/debug.h"
+#include <cstdint>
+#include <iostream>
+#include <sstream>
+
+using seq::range_predicate;
+
+namespace {
+
+    // Build a range_predicate from a bitmask over [0, max_char] for testing.
+    range_predicate from_mask(uint64_t mask, unsigned max_char) {
+        range_predicate r = range_predicate::empty(max_char);
+        for (unsigned c = 0; c <= max_char; ++c)
+            if ((mask >> c) & 1u)
+                r = r | range_predicate::singleton(c, max_char);
+        return r;
+    }
+
+    // Convert a range_predicate back to a bitmask for cross-checking.
+    uint64_t to_mask(range_predicate const& r) {
+        uint64_t mask = 0;
+        for (unsigned c = 0; c <= r.max_char(); ++c)
+            if (r.contains(c))
+                mask |= (uint64_t(1) << c);
+        return mask;
+    }
+
+    void test_factories() {
+        auto e = range_predicate::empty(255);
+        ENSURE(e.is_empty());
+        ENSURE(!e.is_top());
+        ENSURE(e.num_ranges() == 0);
+        ENSURE(e.cardinality() == 0);
+
+        auto t = range_predicate::top(255);
+        ENSURE(!t.is_empty());
+        ENSURE(t.is_top());
+        ENSURE(t.num_ranges() == 1);
+        ENSURE(t.cardinality() == 256);
+        ENSURE(t.contains(0));
+        ENSURE(t.contains(255));
+
+        auto s = range_predicate::singleton(42, 255);
+        ENSURE(s.num_ranges() == 1);
+        ENSURE(s.cardinality() == 1);
+        ENSURE(s.contains(42));
+        ENSURE(!s.contains(41));
+        unsigned c = 0;
+        ENSURE(s.is_singleton(c));
+        ENSURE(c == 42);
+
+        auto r = range_predicate::range(10, 20, 255);
+        ENSURE(r.num_ranges() == 1);
+        ENSURE(r.cardinality() == 11);
+        ENSURE(r.contains(10));
+        ENSURE(r.contains(20));
+        ENSURE(!r.contains(9));
+        ENSURE(!r.contains(21));
+
+        // Reversed bounds produce empty.
+        auto bad = range_predicate::range(20, 10, 255);
+        ENSURE(bad.is_empty());
+
+        // Clipping at max_char.
+        auto clipped = range_predicate::range(200, 1000, 255);
+        ENSURE(clipped.num_ranges() == 1);
+        ENSURE(clipped[0] == std::make_pair(200u, 255u));
+    }
+
+    void test_equality_and_order() {
+        auto a = range_predicate::range(1, 5, 31);
+        auto b = range_predicate::range(1, 5, 31);
+        auto c = range_predicate::range(1, 6, 31);
+        ENSURE(a == b);
+        ENSURE(a != c);
+        ENSURE(a.hash() == b.hash());
+        ENSURE(a < c || c < a);
+        ENSURE(!(a < a));
+
+        auto empty = range_predicate::empty(31);
+        ENSURE(empty < a);
+
+        // Canonical merging of adjacent ranges.
+        auto d = range_predicate::range(0, 4, 31) | range_predicate::range(5, 10, 31);
+        auto e = range_predicate::range(0, 10, 31);
+        ENSURE(d == e);
+    }
+
+    void test_union_intersection_hand() {
+        unsigned const M = 31;
+        auto a = range_predicate::range(0, 4, M) | range_predicate::range(10, 14, M);
+        auto b = range_predicate::range(3, 11, M);
+
+        auto u = a | b; // [0,14]
+        ENSURE(u.num_ranges() == 1);
+        ENSURE(u[0] == std::make_pair(0u, 14u));
+
+        auto i = a & b; // [3,4] U [10,11]
+        ENSURE(i.num_ranges() == 2);
+        ENSURE(i[0] == std::make_pair(3u, 4u));
+        ENSURE(i[1] == std::make_pair(10u, 11u));
+
+        auto d = a - b; // [0,2] U [12,14]
+        ENSURE(d.num_ranges() == 2);
+        ENSURE(d[0] == std::make_pair(0u, 2u));
+        ENSURE(d[1] == std::make_pair(12u, 14u));
+
+        auto x = a ^ b; // [0,2] U [5,9] U [12,14]
+        ENSURE(x.num_ranges() == 3);
+        ENSURE(x[0] == std::make_pair(0u, 2u));
+        ENSURE(x[1] == std::make_pair(5u, 9u));
+        ENSURE(x[2] == std::make_pair(12u, 14u));
+    }
+
+    void test_complement_hand() {
+        unsigned const M = 10;
+        auto e = range_predicate::empty(M);
+        ENSURE((~e).is_top());
+        auto t = range_predicate::top(M);
+        ENSURE((~t).is_empty());
+
+        // ~([2,3] U [7,8]) = [0,1] U [4,6] U [9,10]
+        auto a = range_predicate::range(2, 3, M) | range_predicate::range(7, 8, M);
+        auto na = ~a;
+        ENSURE(na.num_ranges() == 3);
+        ENSURE(na[0] == std::make_pair(0u, 1u));
+        ENSURE(na[1] == std::make_pair(4u, 6u));
+        ENSURE(na[2] == std::make_pair(9u, 10u));
+
+        // ~([0,4]) = [5,10]
+        auto b = range_predicate::range(0, 4, M);
+        auto nb = ~b;
+        ENSURE(nb.num_ranges() == 1);
+        ENSURE(nb[0] == std::make_pair(5u, 10u));
+
+        // ~([5,10]) = [0,4]
+        auto cnb = ~nb;
+        ENSURE(cnb == b);
+    }
+
+    // Exhaustively verify the lattice / de-Morgan laws on a small domain
+    // by enumerating every possible subset (bitmask).
+    void test_exhaustive_laws() {
+        unsigned const M = 5; // 6 characters -> 64 subsets
+        unsigned const N = 1u << (M + 1);
+        for (unsigned i = 0; i < N; ++i) {
+            range_predicate A = from_mask(i, M);
+            ENSURE(to_mask(A) == i);
+            // ~ ~ A == A
+            ENSURE(~~A == A);
+            // A | ~A == top
+            ENSURE((A | ~A).is_top());
+            // A & ~A == empty
+            ENSURE((A & ~A).is_empty());
+            // cardinality matches popcount
+            unsigned pop = 0;
+            for (unsigned k = 0; k <= M; ++k) if ((i >> k) & 1u) ++pop;
+            ENSURE(A.cardinality() == pop);
+        }
+        for (unsigned i = 0; i < N; ++i) {
+            range_predicate A = from_mask(i, M);
+            for (unsigned j = 0; j < N; ++j) {
+                range_predicate B = from_mask(j, M);
+                // Bitmask reference semantics.
+                ENSURE(to_mask(A | B) == (i | j));
+                ENSURE(to_mask(A & B) == (i & j));
+                ENSURE(to_mask(A - B) == (i & ~j & ((1u << (M + 1)) - 1u)));
+                ENSURE(to_mask(A ^ B) == (i ^ j));
+                // de-Morgan
+                ENSURE(~(A | B) == (~A & ~B));
+                ENSURE(~(A & B) == (~A | ~B));
+                // Commutativity
+                ENSURE((A | B) == (B | A));
+                ENSURE((A & B) == (B & A));
+                // (A - B) == A & ~B
+                ENSURE((A - B) == (A & ~B));
+                // (A ^ B) == (A | B) - (A & B)
+                ENSURE((A ^ B) == ((A | B) - (A & B)));
+                // Extensional equality is reflexive on equal masks.
+                if (i == j) {
+                    ENSURE(A == B);
+                    ENSURE(A.hash() == B.hash());
+                }
+            }
+        }
+    }
+
+    void test_total_order_strict() {
+        unsigned const M = 5;
+        unsigned const N = 1u << (M + 1);
+        // Strict total order: for any distinct A, B exactly one of A<B, B<A holds.
+        for (unsigned i = 0; i < N; ++i) {
+            range_predicate A = from_mask(i, M);
+            ENSURE(!(A < A));
+            for (unsigned j = i + 1; j < N; ++j) {
+                range_predicate B = from_mask(j, M);
+                bool lt = A < B;
+                bool gt = B < A;
+                ENSURE(lt != gt);
+                ENSURE(lt || gt);
+            }
+        }
+    }
+
+    void test_display() {
+        std::ostringstream oss;
+        oss << range_predicate::empty(31);
+        ENSURE(oss.str() == "[]");
+
+        oss.str("");
+        oss << range_predicate::range(3, 7, 31);
+        ENSURE(oss.str() == "[3-7]");
+
+        oss.str("");
+        oss << range_predicate::singleton(9, 31);
+        ENSURE(oss.str() == "[9]");
+
+        oss.str("");
+        auto p = range_predicate::range(0, 2, 31) | range_predicate::singleton(5, 31);
+        oss << p;
+        ENSURE(oss.str() == "[0-2,5]");
+    }
+
+}
+
+void tst_range_predicate() {
+    test_factories();
+    test_equality_and_order();
+    test_union_intersection_hand();
+    test_complement_hand();
+    test_exhaustive_laws();
+    test_total_order_strict();
+    test_display();
+    std::cout << "range_predicate unit tests passed\n";
+}
--- a/src/test/regex_range_collapse.cpp
+++ b/src/test/regex_range_collapse.cpp
@ -0,0 +1,244 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    regex_range_collapse.cpp - unit tests
+
+--*/
+
+#include "ast/rewriter/seq_range_collapse.h"
+#include "ast/reg_decl_plugins.h"
+#include "ast/ast_pp.h"
+#include "ast/arith_decl_plugin.h"
+#include "util/util.h"
+
+#include <iostream>
+
+namespace {
+
+    using seq::range_predicate;
+    using seq::regex_to_range_predicate;
+    using seq::range_predicate_to_regex;
+
+    static void check(bool ok, char const* what) {
+        if (!ok) {
+            std::cerr << "regex_range_collapse FAILED: " << what << "\n";
+            ENSURE(false);
+        }
+    }
+
+    static expr_ref mk_singleton_str(seq_util& u, unsigned c) {
+        return expr_ref(u.str.mk_string(zstring(c)), u.get_manager());
+    }
+
+    static bool extract_range_chars(seq_util& u, expr* e, unsigned& lo, unsigned& hi) {
+        expr* lo_e = nullptr; expr* hi_e = nullptr;
+        if (!u.re.is_range(e, lo_e, hi_e))
+            return false;
+        // Accept either string-constant or (seq.unit (Char N)) bound form.
+        if (u.re.is_range(e, lo, hi))
+            return true;
+        expr* lc = nullptr; expr* hc = nullptr;
+        if (u.str.is_unit(lo_e, lc) && u.is_const_char(lc, lo) &&
+            u.str.is_unit(hi_e, hc) && u.is_const_char(hc, hi))
+            return true;
+        return false;
+    }
+
+    static void run() {
+        ast_manager m;
+        reg_decl_plugins(m);
+        seq_util u(m);
+        unsigned const M = u.max_char();
+
+        sort* str_sort = u.str.mk_string_sort();
+        sort* re_sort  = u.re.mk_re(str_sort);
+
+        // primitives
+        {
+            range_predicate p(M);
+            check(regex_to_range_predicate(u, u.re.mk_empty(re_sort), p) && p.is_empty(),
+                  "re.empty -> empty");
+            check(regex_to_range_predicate(u, u.re.mk_full_char(re_sort), p) && p.is_top(),
+                  "re.full_char -> top");
+        }
+        // re.range "a" "z"
+        {
+            range_predicate p(M);
+            expr_ref a = mk_singleton_str(u, 'a');
+            expr_ref z = mk_singleton_str(u, 'z');
+            expr_ref r(u.re.mk_range(a, z), m);
+            check(regex_to_range_predicate(u, r, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'a' && p[0].second == 'z',
+                  "re.range a z -> [a,z]");
+        }
+        // Disjoint union: (a..z) | (0..9)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, '0'), mk_singleton_str(u, '9')), m);
+            expr_ref un(u.re.mk_union(r1, r2), m);
+            check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 2,
+                  "(a-z)|(0-9) -> 2 ranges");
+            // canonical order: lower lo first
+            check(p[0].first == '0' && p[0].second == '9' && p[1].first == 'a' && p[1].second == 'z',
+                  "(a-z)|(0-9) ranges in canonical order");
+        }
+        // Overlapping union: (a..c) | (b..f) -> (a..f)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'c')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'b'), mk_singleton_str(u, 'f')), m);
+            expr_ref un(u.re.mk_union(r1, r2), m);
+            check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'a' && p[0].second == 'f',
+                  "(a-c)|(b-f) -> (a-f)");
+        }
+        // Adjacent union: (a..c) | (d..f) -> (a..f) (canonical predicate merges adjacent)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'c')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'd'), mk_singleton_str(u, 'f')), m);
+            expr_ref un(u.re.mk_union(r1, r2), m);
+            check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'a' && p[0].second == 'f',
+                  "(a-c)|(d-f) -> (a-f) via adjacency");
+        }
+        // Disjoint intersection: (a..z) & (0..9) -> empty
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, '0'), mk_singleton_str(u, '9')), m);
+            expr_ref ix(u.re.mk_inter(r1, r2), m);
+            check(regex_to_range_predicate(u, ix, p) && p.is_empty(),
+                  "(a-z)&(0-9) -> empty");
+        }
+        // Overlapping intersection: (a..f) & (c..z) -> (c..f)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'f')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'c'), mk_singleton_str(u, 'z')), m);
+            expr_ref ix(u.re.mk_inter(r1, r2), m);
+            check(regex_to_range_predicate(u, ix, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'c' && p[0].second == 'f',
+                  "(a-f)&(c-z) -> (c-f)");
+        }
+        // Complement: re.complement is intentionally NOT a char-class op
+        // (it operates over Σ*), so it must NOT be translated.
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
+            expr_ref cmp(u.re.mk_complement(r1), m);
+            check(!regex_to_range_predicate(u, cmp, p),
+                  "re.comp of range is NOT translatable (sequence-level complement)");
+        }
+        // Diff: (a..f) \ (c..z) -> (a..b)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'f')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'c'), mk_singleton_str(u, 'z')), m);
+            expr_ref df(u.re.mk_diff(r1, r2), m);
+            check(regex_to_range_predicate(u, df, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'a' && p[0].second == 'b',
+                  "(a-f) \\ (c-z) -> (a-b)");
+        }
+        // Negative: re.* of a range is NOT a char class
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
+            expr_ref star(u.re.mk_star(r1), m);
+            check(!regex_to_range_predicate(u, star, p),
+                  "re.* of range not translatable");
+        }
+
+        // Negative: a regex whose element type is NOT a sequence of
+        // characters (here (Seq Int)) must be rejected outright, even for
+        // shapes that structurally resemble char-class operators.
+        {
+            range_predicate p(M);
+            arith_util a(m);
+            sort* int_seq = u.str.mk_seq(a.mk_int());
+            sort* int_re  = u.re.mk_re(int_seq);
+            check(!regex_to_range_predicate(u, u.re.mk_empty(int_re), p),
+                  "re.empty over (Seq Int) is NOT a char class");
+            check(!regex_to_range_predicate(u, u.re.mk_full_char(int_re), p),
+                  "re.full_char over (Seq Int) is NOT a char class");
+        }
+
+        // ---- materialization round-trip ----
+
+        // empty -> re.empty
+        {
+            range_predicate p = range_predicate::empty(M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            check(u.re.is_empty(e), "empty -> re.empty");
+        }
+        // top -> re.full_char
+        {
+            range_predicate p = range_predicate::top(M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            check(u.re.is_full_char(e), "top -> re.full_char");
+        }
+        // single range -> re.range
+        {
+            range_predicate p = range_predicate::range('a', 'z', M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            unsigned lo = 0, hi = 0;
+            check(extract_range_chars(u, e, lo, hi) && lo == 'a' && hi == 'z',
+                  "[a-z] -> re.range a z");
+        }
+        // singleton -> re.range c c
+        {
+            range_predicate p = range_predicate::singleton('A', M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            unsigned lo = 0, hi = 0;
+            check(extract_range_chars(u, e, lo, hi) && lo == 'A' && hi == 'A',
+                  "{A} -> re.range A A");
+        }
+        // 2 ranges -> re.union(range_0, range_1) in canonical order
+        {
+            range_predicate p = range_predicate::range('0', '9', M)
+                              | range_predicate::range('a', 'z', M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            expr* a = nullptr; expr* b = nullptr;
+            check(u.re.is_union(e, a, b), "2-range -> union");
+            unsigned lo0 = 0, hi0 = 0, lo1 = 0, hi1 = 0;
+            check(extract_range_chars(u, a, lo0, hi0) && lo0 == '0' && hi0 == '9',
+                  "union arg0 = (0-9) (canonical: lower lo first)");
+            check(extract_range_chars(u, b, lo1, hi1) && lo1 == 'a' && hi1 == 'z',
+                  "union arg1 = (a-z)");
+        }
+        // 3 ranges -> right-associated union
+        {
+            range_predicate p = range_predicate::range(0, 5, M)
+                              | range_predicate::range(10, 15, M)
+                              | range_predicate::range(20, 25, M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            expr* a = nullptr; expr* rest = nullptr;
+            check(u.re.is_union(e, a, rest), "3-range -> union(...)");
+            unsigned lo = 0, hi = 0;
+            check(extract_range_chars(u, a, lo, hi) && lo == 0 && hi == 5, "first arg = (0-5)");
+            expr* b = nullptr; expr* c = nullptr;
+            check(u.re.is_union(rest, b, c), "rest is union(...,...)");
+            check(extract_range_chars(u, b, lo, hi) && lo == 10 && hi == 15, "second range");
+            check(extract_range_chars(u, c, lo, hi) && lo == 20 && hi == 25, "third range");
+        }
+        // Round-trip identity for an arbitrary range-set
+        {
+            range_predicate p_in = range_predicate::range('a', 'c', M)
+                                 | range_predicate::range('m', 'p', M)
+                                 | range_predicate::range('x', 'z', M);
+            expr_ref e = range_predicate_to_regex(u, p_in, str_sort);
+            range_predicate p_out(M);
+            check(regex_to_range_predicate(u, e, p_out), "round-trip translatable");
+            check(p_in == p_out, "round-trip equal");
+        }
+
+        std::cerr << "regex_range_collapse tests passed\n";
+    }
+}
+
+void tst_regex_range_collapse() {
+    run();
+}
--- a/src/test/seq_regex_bisim.cpp
+++ b/src/test/seq_regex_bisim.cpp
@ -0,0 +1,127 @@
+// Regression test for the seq::derive::intersect_intervals bug.
+//
+// Background: derive uses a path-tracking interval set to compute symbolic
+// derivatives.  The intersect_intervals routine used to react to a single
+// disjoint interval by dropping the entire kept suffix and skipping the rest
+// of the list, which silently killed valid branches in derivatives such as
+// D(a|b).  That made the bisimulation procedure conclude bogus equalities
+// like a* == (a|b)*.
+//
+// This file also covers the seq::derive top-level-cache poisoning bug.
+// `m_top_cache` is keyed only by the regex; the routine used to populate it
+// while `m_ele` was set to a *concrete* character, baking that character
+// into the cached "symbolic" derivative.  Subsequent calls with the same
+// regex but a different ele then returned a stale concrete answer instead
+// of the true symbolic derivative.  The simplest victim is
+//   (str.in_re "aP" (re.++ (re.* "a") "P"))
+// which used to return false because the derivative wrt 'a' was cached and
+// re-used as the derivative wrt 'P'.
+#include "ast/ast.h"
+#include "ast/ast_pp.h"
+#include "ast/reg_decl_plugins.h"
+#include "ast/seq_decl_plugin.h"
+#include "ast/rewriter/seq_rewriter.h"
+#include "ast/rewriter/seq_regex_bisim.h"
+#include "ast/rewriter/th_rewriter.h"
+#include <iostream>
+
+static void test_a_star_neq_ab_star() {
+    ast_manager m;
+    reg_decl_plugins(m);
+    seq_util u(m);
+    seq_rewriter rw(m);
+
+    sort_ref str_sort(u.str.mk_string_sort(), m);
+
+    zstring sa("a"), sb("b");
+    expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
+    expr_ref re_b(u.re.mk_to_re(u.str.mk_string(sb)), m);
+    expr_ref a_star(u.re.mk_star(re_a), m);
+    expr_ref ab(u.re.mk_union(re_a, re_b), m);
+    expr_ref ab_star(u.re.mk_star(ab), m);
+
+    expr_ref d_ab = rw.mk_brz_derivative(ab);
+    std::cout << "D(a|b) = " << mk_pp(d_ab, m) << "\n";
+
+    // Both the 'a' branch and the 'b' branch of D(a|b) must reach epsilon.
+    // Collect the regex leaves of the symbolic derivative and require at
+    // least two distinct accepting leaves (one for 'a' and one for 'b').
+    expr_ref_vector leaves(m);
+    auto collect = [&](expr* e, auto&& self) -> void {
+        expr* c, *t, *f;
+        if (m.is_ite(e, c, t, f) || u.re.is_union(e, t, f) || u.re.is_antimirov_union(e, t, f)) {
+            self(t, self);
+            self(f, self);
+            return;
+        }
+        if (u.re.is_empty(e)) return;
+        leaves.push_back(e);
+    };
+    collect(d_ab, collect);
+    unsigned nullable_leaves = 0;
+    for (expr* l : leaves) {
+        expr_ref n = rw.is_nullable(l);
+        if (m.is_true(n)) ++nullable_leaves;
+    }
+    std::cout << "D(a|b) leaves=" << leaves.size()
+              << " nullable=" << nullable_leaves << "\n";
+    ENSURE(nullable_leaves >= 2);
+
+    // Bisim must report the two languages are not equivalent.
+    seq::regex_bisim bisim(rw);
+    lbool eq = bisim.are_equivalent(a_star, ab_star);
+    std::cout << "bisim(a*, (a|b)*) = "
+              << (eq == l_true ? "true" : eq == l_false ? "false" : "undef") << "\n";
+    ENSURE(eq == l_false);
+}
+
+// Regression for the derive top-level-cache poisoning bug.
+// Take r = (re.* "a") ++ "P" and check str.in_re "aP" r.  Before the fix
+// the first per-char derivative call (wrt 'a') populated m_top_cache with
+// 'a' baked into the symbolic ITE-tree, so the next call (wrt 'P') returned
+// that stale cached value instead of computing D_P(r) = epsilon, making
+// str.in_re wrongly return false.
+static void test_derive_cache_per_ele() {
+    ast_manager m;
+    reg_decl_plugins(m);
+    seq_util u(m);
+    seq_rewriter rw(m);
+
+    sort_ref str_sort(u.str.mk_string_sort(), m);
+
+    zstring sa("a"), sP("P"), s_aP("aP");
+    expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
+    expr_ref re_P(u.re.mk_to_re(u.str.mk_string(sP)), m);
+    expr_ref a_star(u.re.mk_star(re_a), m);
+    expr_ref r(u.re.mk_concat(a_star, re_P), m);
+    expr_ref aP(u.str.mk_string(s_aP), m);
+
+    // Compute D_'a'(a*P) and D_'P'(a*P) directly via mk_derivative.
+    // Before the fix, m_top_cache was populated while m_ele = ele (the
+    // concrete char), so the second call hit the stale cached answer from
+    // the first.  After the fix the cache is keyed by a symbolic var, so
+    // each concrete-ele substitution produces the right answer.
+    expr_ref ch_a(u.mk_char('a'), m);
+    expr_ref ch_P(u.mk_char('P'), m);
+    expr_ref d_a = rw.mk_derivative(ch_a, r);
+    expr_ref d_P = rw.mk_derivative(ch_P, r);
+    std::cout << "D_a(a*P) = " << mk_pp(d_a, m) << "\n";
+    std::cout << "D_P(a*P) = " << mk_pp(d_P, m) << "\n";
+
+    // D_P(a*P) must be nullable (it accepts the empty suffix), while
+    // D_a(a*P) must not be (it still needs a trailing 'P').
+    expr_ref n_a = rw.is_nullable(d_a);
+    expr_ref n_P = rw.is_nullable(d_P);
+    th_rewriter trw(m);
+    trw(n_a);
+    trw(n_P);
+    std::cout << "nullable(D_a) = " << mk_pp(n_a, m) << "\n";
+    std::cout << "nullable(D_P) = " << mk_pp(n_P, m) << "\n";
+    ENSURE(m.is_false(n_a));
+    ENSURE(m.is_true(n_P));
+}
+
+void tst_seq_regex_bisim() {
+    test_a_star_neq_ab_star();
+    test_derive_cache_per_ele();
+}