Derive with ranges (#9963)

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Margus Veanes <margus@microsoft.com> Co-authored-by: Margus Veanes <veanes@users.noreply.github.com>
2026-07-05 14:56:11 +00:00 · 2026-06-25 18:47:25 -07:00 · 2026-06-25 18:47:25 -07:00 · 22c2635786
commit 22c2635786
parent 0596c7c634
24 changed files with 3462 additions and 1000 deletions
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@ -115,15 +115,18 @@ add_executable(test-z3
  polynomial_factorization.cpp
  polynorm.cpp
  prime_generator.cpp
+  seq_regex_bisim.cpp
  proof_checker.cpp
  qe_arith.cpp
  mbp_qel.cpp
  quant_elim.cpp
  quant_solve.cpp
  random.cpp
+  range_predicate.cpp
  rational.cpp
  rcf.cpp
  region.cpp
+  regex_range_collapse.cpp
  sat_local_search.cpp
  sat_lookahead.cpp
  sat_user_scope.cpp
--- a/src/test/main.cpp
+++ b/src/test/main.cpp
@ -113,6 +113,8 @@
    X(api_bug) \
    X(api_special_relations) \
    X(arith_rewriter) \
+    X(range_predicate) \
+    X(regex_range_collapse) \
    X(seq_rewriter) \
    X(check_assumptions) \
    X(smt_context) \
@ -195,6 +197,7 @@
    X(finite_set) \
    X(finite_set_rewriter) \
    X(fpa) \
+    X(seq_regex_bisim) \
    X(term_enumeration) \
    X(lcube)

--- a/src/test/range_predicate.cpp
+++ b/src/test/range_predicate.cpp
@ -0,0 +1,260 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    test/range_predicate.cpp
+
+Abstract:
+
+    Unit tests for the range-algebra value type seq::range_predicate.
+
+    The tests exercise:
+      * factory constructors and canonical-form invariants,
+      * extensional equality and total ordering,
+      * Boolean operations (|, &, ~, -, ^) on hand-picked instances,
+      * exhaustive verification of de-Morgan and lattice laws on a
+        small character domain, by enumerating every subset.
+
+Author:
+
+    Margus Veanes (veanes) 2026
+
+--*/
+
+#include "ast/rewriter/seq_range_predicate.h"
+#include "util/debug.h"
+#include <cstdint>
+#include <iostream>
+#include <sstream>
+
+using seq::range_predicate;
+
+namespace {
+
+    // Build a range_predicate from a bitmask over [0, max_char] for testing.
+    range_predicate from_mask(uint64_t mask, unsigned max_char) {
+        range_predicate r = range_predicate::empty(max_char);
+        for (unsigned c = 0; c <= max_char; ++c)
+            if ((mask >> c) & 1u)
+                r = r | range_predicate::singleton(c, max_char);
+        return r;
+    }
+
+    // Convert a range_predicate back to a bitmask for cross-checking.
+    uint64_t to_mask(range_predicate const& r) {
+        uint64_t mask = 0;
+        for (unsigned c = 0; c <= r.max_char(); ++c)
+            if (r.contains(c))
+                mask |= (uint64_t(1) << c);
+        return mask;
+    }
+
+    void test_factories() {
+        auto e = range_predicate::empty(255);
+        ENSURE(e.is_empty());
+        ENSURE(!e.is_top());
+        ENSURE(e.num_ranges() == 0);
+        ENSURE(e.cardinality() == 0);
+
+        auto t = range_predicate::top(255);
+        ENSURE(!t.is_empty());
+        ENSURE(t.is_top());
+        ENSURE(t.num_ranges() == 1);
+        ENSURE(t.cardinality() == 256);
+        ENSURE(t.contains(0));
+        ENSURE(t.contains(255));
+
+        auto s = range_predicate::singleton(42, 255);
+        ENSURE(s.num_ranges() == 1);
+        ENSURE(s.cardinality() == 1);
+        ENSURE(s.contains(42));
+        ENSURE(!s.contains(41));
+        unsigned c = 0;
+        ENSURE(s.is_singleton(c));
+        ENSURE(c == 42);
+
+        auto r = range_predicate::range(10, 20, 255);
+        ENSURE(r.num_ranges() == 1);
+        ENSURE(r.cardinality() == 11);
+        ENSURE(r.contains(10));
+        ENSURE(r.contains(20));
+        ENSURE(!r.contains(9));
+        ENSURE(!r.contains(21));
+
+        // Reversed bounds produce empty.
+        auto bad = range_predicate::range(20, 10, 255);
+        ENSURE(bad.is_empty());
+
+        // Clipping at max_char.
+        auto clipped = range_predicate::range(200, 1000, 255);
+        ENSURE(clipped.num_ranges() == 1);
+        ENSURE(clipped[0] == std::make_pair(200u, 255u));
+    }
+
+    void test_equality_and_order() {
+        auto a = range_predicate::range(1, 5, 31);
+        auto b = range_predicate::range(1, 5, 31);
+        auto c = range_predicate::range(1, 6, 31);
+        ENSURE(a == b);
+        ENSURE(a != c);
+        ENSURE(a.hash() == b.hash());
+        ENSURE(a < c || c < a);
+        ENSURE(!(a < a));
+
+        auto empty = range_predicate::empty(31);
+        ENSURE(empty < a);
+
+        // Canonical merging of adjacent ranges.
+        auto d = range_predicate::range(0, 4, 31) | range_predicate::range(5, 10, 31);
+        auto e = range_predicate::range(0, 10, 31);
+        ENSURE(d == e);
+    }
+
+    void test_union_intersection_hand() {
+        unsigned const M = 31;
+        auto a = range_predicate::range(0, 4, M) | range_predicate::range(10, 14, M);
+        auto b = range_predicate::range(3, 11, M);
+
+        auto u = a | b; // [0,14]
+        ENSURE(u.num_ranges() == 1);
+        ENSURE(u[0] == std::make_pair(0u, 14u));
+
+        auto i = a & b; // [3,4] U [10,11]
+        ENSURE(i.num_ranges() == 2);
+        ENSURE(i[0] == std::make_pair(3u, 4u));
+        ENSURE(i[1] == std::make_pair(10u, 11u));
+
+        auto d = a - b; // [0,2] U [12,14]
+        ENSURE(d.num_ranges() == 2);
+        ENSURE(d[0] == std::make_pair(0u, 2u));
+        ENSURE(d[1] == std::make_pair(12u, 14u));
+
+        auto x = a ^ b; // [0,2] U [5,9] U [12,14]
+        ENSURE(x.num_ranges() == 3);
+        ENSURE(x[0] == std::make_pair(0u, 2u));
+        ENSURE(x[1] == std::make_pair(5u, 9u));
+        ENSURE(x[2] == std::make_pair(12u, 14u));
+    }
+
+    void test_complement_hand() {
+        unsigned const M = 10;
+        auto e = range_predicate::empty(M);
+        ENSURE((~e).is_top());
+        auto t = range_predicate::top(M);
+        ENSURE((~t).is_empty());
+
+        // ~([2,3] U [7,8]) = [0,1] U [4,6] U [9,10]
+        auto a = range_predicate::range(2, 3, M) | range_predicate::range(7, 8, M);
+        auto na = ~a;
+        ENSURE(na.num_ranges() == 3);
+        ENSURE(na[0] == std::make_pair(0u, 1u));
+        ENSURE(na[1] == std::make_pair(4u, 6u));
+        ENSURE(na[2] == std::make_pair(9u, 10u));
+
+        // ~([0,4]) = [5,10]
+        auto b = range_predicate::range(0, 4, M);
+        auto nb = ~b;
+        ENSURE(nb.num_ranges() == 1);
+        ENSURE(nb[0] == std::make_pair(5u, 10u));
+
+        // ~([5,10]) = [0,4]
+        auto cnb = ~nb;
+        ENSURE(cnb == b);
+    }
+
+    // Exhaustively verify the lattice / de-Morgan laws on a small domain
+    // by enumerating every possible subset (bitmask).
+    void test_exhaustive_laws() {
+        unsigned const M = 5; // 6 characters -> 64 subsets
+        unsigned const N = 1u << (M + 1);
+        for (unsigned i = 0; i < N; ++i) {
+            range_predicate A = from_mask(i, M);
+            ENSURE(to_mask(A) == i);
+            // ~ ~ A == A
+            ENSURE(~~A == A);
+            // A | ~A == top
+            ENSURE((A | ~A).is_top());
+            // A & ~A == empty
+            ENSURE((A & ~A).is_empty());
+            // cardinality matches popcount
+            unsigned pop = 0;
+            for (unsigned k = 0; k <= M; ++k) if ((i >> k) & 1u) ++pop;
+            ENSURE(A.cardinality() == pop);
+        }
+        for (unsigned i = 0; i < N; ++i) {
+            range_predicate A = from_mask(i, M);
+            for (unsigned j = 0; j < N; ++j) {
+                range_predicate B = from_mask(j, M);
+                // Bitmask reference semantics.
+                ENSURE(to_mask(A | B) == (i | j));
+                ENSURE(to_mask(A & B) == (i & j));
+                ENSURE(to_mask(A - B) == (i & ~j & ((1u << (M + 1)) - 1u)));
+                ENSURE(to_mask(A ^ B) == (i ^ j));
+                // de-Morgan
+                ENSURE(~(A | B) == (~A & ~B));
+                ENSURE(~(A & B) == (~A | ~B));
+                // Commutativity
+                ENSURE((A | B) == (B | A));
+                ENSURE((A & B) == (B & A));
+                // (A - B) == A & ~B
+                ENSURE((A - B) == (A & ~B));
+                // (A ^ B) == (A | B) - (A & B)
+                ENSURE((A ^ B) == ((A | B) - (A & B)));
+                // Extensional equality is reflexive on equal masks.
+                if (i == j) {
+                    ENSURE(A == B);
+                    ENSURE(A.hash() == B.hash());
+                }
+            }
+        }
+    }
+
+    void test_total_order_strict() {
+        unsigned const M = 5;
+        unsigned const N = 1u << (M + 1);
+        // Strict total order: for any distinct A, B exactly one of A<B, B<A holds.
+        for (unsigned i = 0; i < N; ++i) {
+            range_predicate A = from_mask(i, M);
+            ENSURE(!(A < A));
+            for (unsigned j = i + 1; j < N; ++j) {
+                range_predicate B = from_mask(j, M);
+                bool lt = A < B;
+                bool gt = B < A;
+                ENSURE(lt != gt);
+                ENSURE(lt || gt);
+            }
+        }
+    }
+
+    void test_display() {
+        std::ostringstream oss;
+        oss << range_predicate::empty(31);
+        ENSURE(oss.str() == "[]");
+
+        oss.str("");
+        oss << range_predicate::range(3, 7, 31);
+        ENSURE(oss.str() == "[3-7]");
+
+        oss.str("");
+        oss << range_predicate::singleton(9, 31);
+        ENSURE(oss.str() == "[9]");
+
+        oss.str("");
+        auto p = range_predicate::range(0, 2, 31) | range_predicate::singleton(5, 31);
+        oss << p;
+        ENSURE(oss.str() == "[0-2,5]");
+    }
+
+}
+
+void tst_range_predicate() {
+    test_factories();
+    test_equality_and_order();
+    test_union_intersection_hand();
+    test_complement_hand();
+    test_exhaustive_laws();
+    test_total_order_strict();
+    test_display();
+    std::cout << "range_predicate unit tests passed\n";
+}
--- a/src/test/regex_range_collapse.cpp
+++ b/src/test/regex_range_collapse.cpp
@ -0,0 +1,244 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    regex_range_collapse.cpp - unit tests
+
+--*/
+
+#include "ast/rewriter/seq_range_collapse.h"
+#include "ast/reg_decl_plugins.h"
+#include "ast/ast_pp.h"
+#include "ast/arith_decl_plugin.h"
+#include "util/util.h"
+
+#include <iostream>
+
+namespace {
+
+    using seq::range_predicate;
+    using seq::regex_to_range_predicate;
+    using seq::range_predicate_to_regex;
+
+    static void check(bool ok, char const* what) {
+        if (!ok) {
+            std::cerr << "regex_range_collapse FAILED: " << what << "\n";
+            ENSURE(false);
+        }
+    }
+
+    static expr_ref mk_singleton_str(seq_util& u, unsigned c) {
+        return expr_ref(u.str.mk_string(zstring(c)), u.get_manager());
+    }
+
+    static bool extract_range_chars(seq_util& u, expr* e, unsigned& lo, unsigned& hi) {
+        expr* lo_e = nullptr; expr* hi_e = nullptr;
+        if (!u.re.is_range(e, lo_e, hi_e))
+            return false;
+        // Accept either string-constant or (seq.unit (Char N)) bound form.
+        if (u.re.is_range(e, lo, hi))
+            return true;
+        expr* lc = nullptr; expr* hc = nullptr;
+        if (u.str.is_unit(lo_e, lc) && u.is_const_char(lc, lo) &&
+            u.str.is_unit(hi_e, hc) && u.is_const_char(hc, hi))
+            return true;
+        return false;
+    }
+
+    static void run() {
+        ast_manager m;
+        reg_decl_plugins(m);
+        seq_util u(m);
+        unsigned const M = u.max_char();
+
+        sort* str_sort = u.str.mk_string_sort();
+        sort* re_sort  = u.re.mk_re(str_sort);
+
+        // primitives
+        {
+            range_predicate p(M);
+            check(regex_to_range_predicate(u, u.re.mk_empty(re_sort), p) && p.is_empty(),
+                  "re.empty -> empty");
+            check(regex_to_range_predicate(u, u.re.mk_full_char(re_sort), p) && p.is_top(),
+                  "re.full_char -> top");
+        }
+        // re.range "a" "z"
+        {
+            range_predicate p(M);
+            expr_ref a = mk_singleton_str(u, 'a');
+            expr_ref z = mk_singleton_str(u, 'z');
+            expr_ref r(u.re.mk_range(a, z), m);
+            check(regex_to_range_predicate(u, r, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'a' && p[0].second == 'z',
+                  "re.range a z -> [a,z]");
+        }
+        // Disjoint union: (a..z) | (0..9)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, '0'), mk_singleton_str(u, '9')), m);
+            expr_ref un(u.re.mk_union(r1, r2), m);
+            check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 2,
+                  "(a-z)|(0-9) -> 2 ranges");
+            // canonical order: lower lo first
+            check(p[0].first == '0' && p[0].second == '9' && p[1].first == 'a' && p[1].second == 'z',
+                  "(a-z)|(0-9) ranges in canonical order");
+        }
+        // Overlapping union: (a..c) | (b..f) -> (a..f)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'c')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'b'), mk_singleton_str(u, 'f')), m);
+            expr_ref un(u.re.mk_union(r1, r2), m);
+            check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'a' && p[0].second == 'f',
+                  "(a-c)|(b-f) -> (a-f)");
+        }
+        // Adjacent union: (a..c) | (d..f) -> (a..f) (canonical predicate merges adjacent)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'c')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'd'), mk_singleton_str(u, 'f')), m);
+            expr_ref un(u.re.mk_union(r1, r2), m);
+            check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'a' && p[0].second == 'f',
+                  "(a-c)|(d-f) -> (a-f) via adjacency");
+        }
+        // Disjoint intersection: (a..z) & (0..9) -> empty
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, '0'), mk_singleton_str(u, '9')), m);
+            expr_ref ix(u.re.mk_inter(r1, r2), m);
+            check(regex_to_range_predicate(u, ix, p) && p.is_empty(),
+                  "(a-z)&(0-9) -> empty");
+        }
+        // Overlapping intersection: (a..f) & (c..z) -> (c..f)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'f')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'c'), mk_singleton_str(u, 'z')), m);
+            expr_ref ix(u.re.mk_inter(r1, r2), m);
+            check(regex_to_range_predicate(u, ix, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'c' && p[0].second == 'f',
+                  "(a-f)&(c-z) -> (c-f)");
+        }
+        // Complement: re.complement is intentionally NOT a char-class op
+        // (it operates over Σ*), so it must NOT be translated.
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
+            expr_ref cmp(u.re.mk_complement(r1), m);
+            check(!regex_to_range_predicate(u, cmp, p),
+                  "re.comp of range is NOT translatable (sequence-level complement)");
+        }
+        // Diff: (a..f) \ (c..z) -> (a..b)
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'f')), m);
+            expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'c'), mk_singleton_str(u, 'z')), m);
+            expr_ref df(u.re.mk_diff(r1, r2), m);
+            check(regex_to_range_predicate(u, df, p) && p.num_ranges() == 1 &&
+                  p[0].first == 'a' && p[0].second == 'b',
+                  "(a-f) \\ (c-z) -> (a-b)");
+        }
+        // Negative: re.* of a range is NOT a char class
+        {
+            range_predicate p(M);
+            expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
+            expr_ref star(u.re.mk_star(r1), m);
+            check(!regex_to_range_predicate(u, star, p),
+                  "re.* of range not translatable");
+        }
+
+        // Negative: a regex whose element type is NOT a sequence of
+        // characters (here (Seq Int)) must be rejected outright, even for
+        // shapes that structurally resemble char-class operators.
+        {
+            range_predicate p(M);
+            arith_util a(m);
+            sort* int_seq = u.str.mk_seq(a.mk_int());
+            sort* int_re  = u.re.mk_re(int_seq);
+            check(!regex_to_range_predicate(u, u.re.mk_empty(int_re), p),
+                  "re.empty over (Seq Int) is NOT a char class");
+            check(!regex_to_range_predicate(u, u.re.mk_full_char(int_re), p),
+                  "re.full_char over (Seq Int) is NOT a char class");
+        }
+
+        // ---- materialization round-trip ----
+
+        // empty -> re.empty
+        {
+            range_predicate p = range_predicate::empty(M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            check(u.re.is_empty(e), "empty -> re.empty");
+        }
+        // top -> re.full_char
+        {
+            range_predicate p = range_predicate::top(M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            check(u.re.is_full_char(e), "top -> re.full_char");
+        }
+        // single range -> re.range
+        {
+            range_predicate p = range_predicate::range('a', 'z', M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            unsigned lo = 0, hi = 0;
+            check(extract_range_chars(u, e, lo, hi) && lo == 'a' && hi == 'z',
+                  "[a-z] -> re.range a z");
+        }
+        // singleton -> re.range c c
+        {
+            range_predicate p = range_predicate::singleton('A', M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            unsigned lo = 0, hi = 0;
+            check(extract_range_chars(u, e, lo, hi) && lo == 'A' && hi == 'A',
+                  "{A} -> re.range A A");
+        }
+        // 2 ranges -> re.union(range_0, range_1) in canonical order
+        {
+            range_predicate p = range_predicate::range('0', '9', M)
+                              | range_predicate::range('a', 'z', M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            expr* a = nullptr; expr* b = nullptr;
+            check(u.re.is_union(e, a, b), "2-range -> union");
+            unsigned lo0 = 0, hi0 = 0, lo1 = 0, hi1 = 0;
+            check(extract_range_chars(u, a, lo0, hi0) && lo0 == '0' && hi0 == '9',
+                  "union arg0 = (0-9) (canonical: lower lo first)");
+            check(extract_range_chars(u, b, lo1, hi1) && lo1 == 'a' && hi1 == 'z',
+                  "union arg1 = (a-z)");
+        }
+        // 3 ranges -> right-associated union
+        {
+            range_predicate p = range_predicate::range(0, 5, M)
+                              | range_predicate::range(10, 15, M)
+                              | range_predicate::range(20, 25, M);
+            expr_ref e = range_predicate_to_regex(u, p, str_sort);
+            expr* a = nullptr; expr* rest = nullptr;
+            check(u.re.is_union(e, a, rest), "3-range -> union(...)");
+            unsigned lo = 0, hi = 0;
+            check(extract_range_chars(u, a, lo, hi) && lo == 0 && hi == 5, "first arg = (0-5)");
+            expr* b = nullptr; expr* c = nullptr;
+            check(u.re.is_union(rest, b, c), "rest is union(...,...)");
+            check(extract_range_chars(u, b, lo, hi) && lo == 10 && hi == 15, "second range");
+            check(extract_range_chars(u, c, lo, hi) && lo == 20 && hi == 25, "third range");
+        }
+        // Round-trip identity for an arbitrary range-set
+        {
+            range_predicate p_in = range_predicate::range('a', 'c', M)
+                                 | range_predicate::range('m', 'p', M)
+                                 | range_predicate::range('x', 'z', M);
+            expr_ref e = range_predicate_to_regex(u, p_in, str_sort);
+            range_predicate p_out(M);
+            check(regex_to_range_predicate(u, e, p_out), "round-trip translatable");
+            check(p_in == p_out, "round-trip equal");
+        }
+
+        std::cerr << "regex_range_collapse tests passed\n";
+    }
+}
+
+void tst_regex_range_collapse() {
+    run();
+}
--- a/src/test/seq_regex_bisim.cpp
+++ b/src/test/seq_regex_bisim.cpp
@ -0,0 +1,127 @@
+// Regression test for the seq::derive::intersect_intervals bug.
+//
+// Background: derive uses a path-tracking interval set to compute symbolic
+// derivatives.  The intersect_intervals routine used to react to a single
+// disjoint interval by dropping the entire kept suffix and skipping the rest
+// of the list, which silently killed valid branches in derivatives such as
+// D(a|b).  That made the bisimulation procedure conclude bogus equalities
+// like a* == (a|b)*.
+//
+// This file also covers the seq::derive top-level-cache poisoning bug.
+// `m_top_cache` is keyed only by the regex; the routine used to populate it
+// while `m_ele` was set to a *concrete* character, baking that character
+// into the cached "symbolic" derivative.  Subsequent calls with the same
+// regex but a different ele then returned a stale concrete answer instead
+// of the true symbolic derivative.  The simplest victim is
+//   (str.in_re "aP" (re.++ (re.* "a") "P"))
+// which used to return false because the derivative wrt 'a' was cached and
+// re-used as the derivative wrt 'P'.
+#include "ast/ast.h"
+#include "ast/ast_pp.h"
+#include "ast/reg_decl_plugins.h"
+#include "ast/seq_decl_plugin.h"
+#include "ast/rewriter/seq_rewriter.h"
+#include "ast/rewriter/seq_regex_bisim.h"
+#include "ast/rewriter/th_rewriter.h"
+#include <iostream>
+
+static void test_a_star_neq_ab_star() {
+    ast_manager m;
+    reg_decl_plugins(m);
+    seq_util u(m);
+    seq_rewriter rw(m);
+
+    sort_ref str_sort(u.str.mk_string_sort(), m);
+
+    zstring sa("a"), sb("b");
+    expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
+    expr_ref re_b(u.re.mk_to_re(u.str.mk_string(sb)), m);
+    expr_ref a_star(u.re.mk_star(re_a), m);
+    expr_ref ab(u.re.mk_union(re_a, re_b), m);
+    expr_ref ab_star(u.re.mk_star(ab), m);
+
+    expr_ref d_ab = rw.mk_brz_derivative(ab);
+    std::cout << "D(a|b) = " << mk_pp(d_ab, m) << "\n";
+
+    // Both the 'a' branch and the 'b' branch of D(a|b) must reach epsilon.
+    // Collect the regex leaves of the symbolic derivative and require at
+    // least two distinct accepting leaves (one for 'a' and one for 'b').
+    expr_ref_vector leaves(m);
+    auto collect = [&](expr* e, auto&& self) -> void {
+        expr* c, *t, *f;
+        if (m.is_ite(e, c, t, f) || u.re.is_union(e, t, f) || u.re.is_antimirov_union(e, t, f)) {
+            self(t, self);
+            self(f, self);
+            return;
+        }
+        if (u.re.is_empty(e)) return;
+        leaves.push_back(e);
+    };
+    collect(d_ab, collect);
+    unsigned nullable_leaves = 0;
+    for (expr* l : leaves) {
+        expr_ref n = rw.is_nullable(l);
+        if (m.is_true(n)) ++nullable_leaves;
+    }
+    std::cout << "D(a|b) leaves=" << leaves.size()
+              << " nullable=" << nullable_leaves << "\n";
+    ENSURE(nullable_leaves >= 2);
+
+    // Bisim must report the two languages are not equivalent.
+    seq::regex_bisim bisim(rw);
+    lbool eq = bisim.are_equivalent(a_star, ab_star);
+    std::cout << "bisim(a*, (a|b)*) = "
+              << (eq == l_true ? "true" : eq == l_false ? "false" : "undef") << "\n";
+    ENSURE(eq == l_false);
+}
+
+// Regression for the derive top-level-cache poisoning bug.
+// Take r = (re.* "a") ++ "P" and check str.in_re "aP" r.  Before the fix
+// the first per-char derivative call (wrt 'a') populated m_top_cache with
+// 'a' baked into the symbolic ITE-tree, so the next call (wrt 'P') returned
+// that stale cached value instead of computing D_P(r) = epsilon, making
+// str.in_re wrongly return false.
+static void test_derive_cache_per_ele() {
+    ast_manager m;
+    reg_decl_plugins(m);
+    seq_util u(m);
+    seq_rewriter rw(m);
+
+    sort_ref str_sort(u.str.mk_string_sort(), m);
+
+    zstring sa("a"), sP("P"), s_aP("aP");
+    expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
+    expr_ref re_P(u.re.mk_to_re(u.str.mk_string(sP)), m);
+    expr_ref a_star(u.re.mk_star(re_a), m);
+    expr_ref r(u.re.mk_concat(a_star, re_P), m);
+    expr_ref aP(u.str.mk_string(s_aP), m);
+
+    // Compute D_'a'(a*P) and D_'P'(a*P) directly via mk_derivative.
+    // Before the fix, m_top_cache was populated while m_ele = ele (the
+    // concrete char), so the second call hit the stale cached answer from
+    // the first.  After the fix the cache is keyed by a symbolic var, so
+    // each concrete-ele substitution produces the right answer.
+    expr_ref ch_a(u.mk_char('a'), m);
+    expr_ref ch_P(u.mk_char('P'), m);
+    expr_ref d_a = rw.mk_derivative(ch_a, r);
+    expr_ref d_P = rw.mk_derivative(ch_P, r);
+    std::cout << "D_a(a*P) = " << mk_pp(d_a, m) << "\n";
+    std::cout << "D_P(a*P) = " << mk_pp(d_P, m) << "\n";
+
+    // D_P(a*P) must be nullable (it accepts the empty suffix), while
+    // D_a(a*P) must not be (it still needs a trailing 'P').
+    expr_ref n_a = rw.is_nullable(d_a);
+    expr_ref n_P = rw.is_nullable(d_P);
+    th_rewriter trw(m);
+    trw(n_a);
+    trw(n_P);
+    std::cout << "nullable(D_a) = " << mk_pp(n_a, m) << "\n";
+    std::cout << "nullable(D_P) = " << mk_pp(n_P, m) << "\n";
+    ENSURE(m.is_false(n_a));
+    ENSURE(m.is_true(n_P));
+}
+
+void tst_seq_regex_bisim() {
+    test_a_star_neq_ab_star();
+    test_derive_cache_per_ele();
+}