From bcdbc80ab8c22d61201dd5ab0b038f3aa73a601e Mon Sep 17 00:00:00 2001
From: Margus Veanes <223556219+Copilot@users.noreply.github.com>
Date: Sun, 14 Jun 2026 18:06:27 -0700
Subject: [PATCH] seq::derive: revert Stage 2 path-tracking; keep
 range_predicate value type for Stage 3

Stage 2 replaced log-structured m_intervals path state with stacked
range_predicate values. Profiling showed this caused a ~2.4x allocation
blowup on the regex-equivalence corpus: each push() heap-allocated a
fresh svector for the narrowed set, vs. the original code's
allocation-free append-and-rewind in a single shared svector. On the
hot file flat_vs_loop_015_n8.smt2 the cost grew from 54K allocs (0.03s)
to 100M allocs (1.49s).

Revert the path-tracking to its derive-branch implementation. Keep the
seq::range_predicate value type and seq::range_predicate_translator as
standalone modules with unit tests; they remain available for Stage 3
(OneStep predicate, smart-constructor side conditions, AC normalization)
where their value-type semantics pay off as cache keys.

Add a small O(1) range_predicate::swap helper for future use.

Validated on inputs/regex-equivalence (1523 files, -T:5,
model_validate=true, 8 workers):
  derive branch:        633s wall, 485 sat / 951 unsat / 87 to
  reverted dwr (this):  634s wall, 486 sat / 952 unsat / 85 to
  0 soundness disagreements; 4 trivial nondeterministic diffs.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/ast/rewriter/range_predicate.h |   6 ++
 src/ast/rewriter/seq_derive.cpp    | 160 ++++++++++++++++++++++-------
 src/ast/rewriter/seq_derive.h      |  20 ++--
 3 files changed, 137 insertions(+), 49 deletions(-)

diff --git a/src/ast/rewriter/range_predicate.h b/src/ast/rewriter/range_predicate.h
index 93c8e3219..f2b4a4f30 100644
--- a/src/ast/rewriter/range_predicate.h
+++ b/src/ast/rewriter/range_predicate.h
@@ -113,6 +113,12 @@ namespace seq {
 
         unsigned hash() const;
 
+        // O(1) swap of internals (the underlying svector swap is pointer-swap).
+        void swap(range_predicate& o) noexcept {
+            m_ranges.swap(o.m_ranges);
+            std::swap(m_max_char, o.m_max_char);
+        }
+
         // ---------------- Boolean operations ----------------
 
         range_predicate operator|(range_predicate const& o) const;   // union
diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp
index 26093ee18..48db95e3b 100644
--- a/src/ast/rewriter/seq_derive.cpp
+++ b/src/ast/rewriter/seq_derive.cpp
@@ -89,7 +89,9 @@ namespace seq {
             m_depth = 0;
             // Initialize path state for inline pruning
             m_path.reset();
-            m_path_pred = range_predicate::top(u().max_char());
+            m_intervals.reset();
+            m_intervals.push_back({0u, u().max_char()});
+            m_intervals_start = 0;
             m_path_expr = m.mk_true();
             result = derive_rec(r);
             m_top_cache.insert(r, result);
@@ -913,27 +915,31 @@ namespace seq {
         // Check if (c, sign) is already determined by the path
         lbool cv = eval_path_cond(c);
         if (cv == l_true && !sign) return l_true;   // c implied true, push(c,false) is redundant
-        if (cv == l_false && sign) return l_true;   // c implied false, push(c,true) is redundant
+        if (cv == l_false && sign) return l_true;   // c implied false, push(c,true) is redundant  
         if (cv == l_true && sign) return l_false;   // c implied true, push(c,true) contradicts
         if (cv == l_false && !sign) return l_false; // c implied false, push(c,false) contradicts
 
         // Save current state
         unsigned saved_path_sz = m_path.size();
-        range_predicate saved_path_pred = m_path_pred;
+        unsigned saved_intervals_sz = m_intervals.size();
+        unsigned saved_intervals_start = m_intervals_start;
         expr* saved_path_expr = m_path_expr;
 
         // Push atoms onto path and check for contradiction or implication
         lbool result = push_path_atoms(c, sign);
         if (result != l_undef) {
             m_path.shrink(saved_path_sz);
+            m_intervals.shrink(saved_intervals_sz);
+            m_intervals_start = saved_intervals_start;
             return result;
         }
 
-        // Update path predicate (feasible character set)
+        // Update intervals
         result = push_intervals_impl(c, sign);
         if (result != l_undef) {
             m_path.shrink(saved_path_sz);
-            m_path_pred = std::move(saved_path_pred);
+            m_intervals.shrink(saved_intervals_sz);
+            m_intervals_start = saved_intervals_start;
             return result;
         }
 
@@ -943,15 +949,16 @@ namespace seq {
         m_trail.push_back(m_path_expr);
 
         // Commit: save state for pop()
-        m_path_stack.push_back({ saved_path_sz, std::move(saved_path_pred), saved_path_expr });
+        m_path_stack.push_back({ saved_path_sz, saved_intervals_sz, saved_intervals_start, saved_path_expr });
         return l_undef;
     }
 
     void derive::pop() {
         SASSERT(!m_path_stack.empty());
-        auto& saved = m_path_stack.back();
+        auto const& saved = m_path_stack.back();
         m_path.shrink(saved.path_sz);
-        m_path_pred = std::move(saved.saved_path_pred);
+        m_intervals.shrink(saved.intervals_sz);
+        m_intervals_start = saved.intervals_start;
         m_path_expr = saved.path_expr;
         m_path_stack.pop_back();
     }
@@ -1076,35 +1083,50 @@ namespace seq {
         return l_undef;
     }
 
-    // Update m_path_pred with the feasible-character constraint induced by
-    // (c, sign). Returns l_true if already implied (no change), l_false if
-    // the resulting set becomes empty (contradiction), and l_undef otherwise.
+    // Update m_intervals based on the condition. Returns l_true if implied, l_false if inconsistent, l_undef if pushed.
+    // Operates on the active suffix m_intervals[m_intervals_start..end].
+    // On modification, appends new intervals and updates m_intervals_start.
     lbool derive::push_intervals_impl(expr* c, bool sign) {
-        range_predicate p(u().max_char());
-        if (m_pred_xlate.translate(m_ele, c, p)) {
-            range_predicate constraint = sign ? ~p : std::move(p);
-            if (m_path_pred.subset_of(constraint))
-                return l_true;
-            range_predicate new_pred = m_path_pred & constraint;
-            if (new_pred.is_empty())
-                return l_false;
-            m_path_pred = std::move(new_pred);
-            return l_undef;
-        }
-        // Translation failed: descend into a top-level and (under sign=false)
-        // or top-level or (under sign=true) to extract translatable
-        // sub-conditions. Non-translatable arguments are ignored and only
-        // weaken the implied/undef return.
-        if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
+        unsigned lo = 0, hi = 0;
+        bool negated = false;
+        if (m_util.is_char_const_range(m_ele, c, lo, hi, negated)) {
+            bool effective_neg = (negated != sign);
+            if (!effective_neg) {
+                if (lo <= hi) {
+                    // Check if current intervals already imply [lo,hi]
+                    bool already_subset = true;
+                    for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
+                        if (m_intervals[i].first < lo || m_intervals[i].second > hi) { already_subset = false; break; }
+                    }
+                    if (already_subset) return l_true;
+                    intersect_intervals(lo, hi);
+                } else {
+                    // lo > hi means empty range — contradiction
+                    return l_false;
+                }
+            } else {
+                if (lo <= hi) {
+                    // Check if current intervals already exclude [lo,hi]
+                    bool already_excluded = true;
+                    for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
+                        if (m_intervals[i].first <= hi && m_intervals[i].second >= lo) { already_excluded = false; break; }
+                    }
+                    if (already_excluded) return l_true;
+                    exclude_interval(lo, hi);
+                }
+            }
+        } else if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
             bool all_implied = true;
             for (expr* arg : *to_app(c)) {
                 lbool r = push_intervals_impl(arg, sign);
                 if (r == l_false) return l_false;
                 if (r == l_undef) all_implied = false;
             }
-            return all_implied ? l_true : (m_path_pred.is_empty() ? l_false : l_undef);
+            unsigned n = m_intervals.size() - m_intervals_start;
+            return all_implied ? l_true : (n == 0 ? l_false : l_undef);
         }
-        return m_path_pred.is_empty() ? l_false : l_undef;
+        unsigned n = m_intervals.size() - m_intervals_start;
+        return n == 0 ? l_false : l_undef;
     }
 
     // Evaluate a condition against the current path and intervals.
@@ -1197,18 +1219,80 @@ namespace seq {
     }
 
     lbool derive::eval_range_cond(expr* c) {
-        if (m_path_pred.is_empty())
+        unsigned n = m_intervals.size() - m_intervals_start;
+        if (n == 0)
             return l_false;
-        range_predicate p(u().max_char());
-        if (!m_pred_xlate.translate(m_ele, c, p))
+        unsigned lo = 0, hi = 0;
+        bool negated = false;
+        if (!m_util.is_char_const_range(m_ele, c, lo, hi, negated))
             return l_undef;
-        // c is implied true iff every feasible char satisfies it.
-        if (m_path_pred.subset_of(p))
-            return l_true;
-        // c is implied false iff no feasible char satisfies it.
-        if (!m_path_pred.intersects(p))
-            return l_false;
+        if (lo > hi) {
+            return negated ? l_true : l_false;
+        }
+        // Check if [lo, hi] overlaps with intervals and/or contains all intervals
+        bool any_overlap = false;
+        bool all_contained = true;
+        for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
+            auto [r_lo, r_hi] = m_intervals[i];
+            if (std::max(r_lo, lo) <= std::min(r_hi, hi))
+                any_overlap = true;
+            if (r_lo < lo || r_hi > hi)
+                all_contained = false;
+        }
+        if (!negated) {
+            if (!any_overlap) return l_false;
+            if (all_contained) return l_true;
+        } else {
+            if (all_contained) return l_false;
+            if (!any_overlap) return l_true;
+        }
         return l_undef;
     }
 
+    // Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi]
+    void derive::intersect_intervals(unsigned lo, unsigned hi) {
+        // Copy active suffix to end, update start, then filter
+        unsigned old_start = m_intervals_start;
+        unsigned old_sz = m_intervals.size();
+        for (unsigned i = old_start; i < old_sz; ++i)
+            m_intervals.push_back(m_intervals[i]);
+        m_intervals_start = old_sz;
+        // Filter in-place within new suffix
+        unsigned j = m_intervals_start;
+        for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
+            auto [lo1, hi1] = m_intervals[i];
+            if (hi < lo1 || lo > hi1) {
+                j = old_sz;
+                break;
+            }
+            if (hi1 >= lo)
+                m_intervals[j++] = {std::max(lo1, lo), std::min(hi1, hi)};
+        }
+        m_intervals.shrink(j);
+    }
+
+    // Exclude [lo, hi] from the active suffix m_intervals[m_intervals_start..end]
+    void derive::exclude_interval(unsigned lo, unsigned hi) {
+        unsigned max_char = u().max_char();
+        if (lo == 0 && hi >= max_char) { m_intervals_start = m_intervals.size(); return; }
+        if (lo == 0) { intersect_intervals(hi + 1, max_char); return; }
+        if (hi >= max_char) { intersect_intervals(0, lo - 1); return; }
+        // Each interval [ilo, ihi] minus [lo, hi] → up to 2 pieces
+        // Append new results past the end, then move start
+        unsigned old_start = m_intervals_start;
+        unsigned old_sz = m_intervals.size();
+        for (unsigned i = old_start; i < old_sz; ++i) {
+            auto [ilo, ihi] = m_intervals[i];
+            if (ihi < lo || ilo > hi) {
+                m_intervals.push_back(m_intervals[i]);
+            } else {
+                if (ilo < lo)
+                    m_intervals.push_back({ilo, lo - 1});
+                if (ihi > hi)
+                    m_intervals.push_back({hi + 1, ihi});
+            }
+        }
+        m_intervals_start = old_sz;
+    }
+
 }
diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h
index 2865c0c89..dadd7f67b 100644
--- a/src/ast/rewriter/seq_derive.h
+++ b/src/ast/rewriter/seq_derive.h
@@ -28,8 +28,6 @@ Authors:
 #include "ast/arith_decl_plugin.h"
 #include "ast/array_decl_plugin.h"
 #include "ast/rewriter/bool_rewriter.h"
-#include "ast/rewriter/range_predicate.h"
-#include "ast/rewriter/range_predicate_translator.h"
 #include "util/obj_pair_hashtable.h"
 #include "util/obj_triple_hashtable.h"
 #include <functional>
@@ -83,18 +81,16 @@ namespace seq {
         expr_ref m_ele;
 
         // Path state for inline pruning during mk_inter/mk_union/mk_complement
+        using intervals_t = svector<std::pair<unsigned, unsigned>>;
 
         // Path: vector of signed atoms
         svector<std::pair<expr*, bool>> m_path;
-        // Feasible characters for the current path, expressed as a sorted
-        // disjoint range set over [0, max_char].
-        range_predicate m_path_pred;
-        // Translator from AST char-predicates to range_predicate values.
-        range_predicate_translator m_pred_xlate { m, m_util };
-        // Stack of saved states for push/pop. range_predicate has a non-trivial
-        // destructor (owns a svector), so use vector<> (CallDestructors=true).
-        struct path_save { unsigned path_sz; range_predicate saved_path_pred; expr* path_expr; };
-        vector<path_save> m_path_stack;
+        // Intervals: feasible character ranges under current path (append-only)
+        intervals_t m_intervals;
+        unsigned m_intervals_start { 0 };
+        // Stack of saved states for push/pop
+        struct path_save { unsigned path_sz; unsigned intervals_sz; unsigned intervals_start; expr* path_expr; };
+        svector<path_save> m_path_stack;
         // Boolean expression encoding of current path (for cache keys)
         expr_ref m_path_expr;
 
@@ -163,6 +159,8 @@ namespace seq {
         // Condition evaluation helpers
         lbool eval_cond(expr* cond);
         lbool eval_range_cond(expr* c);
+        void intersect_intervals(unsigned lo, unsigned hi);
+        void exclude_interval(unsigned lo, unsigned hi);
 
         sort* re_sort(expr* r) { return r->get_sort(); }
         sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; }