seq::derive — Stage 2: range_predicate-based path state

Replace seq::derive's m_intervals + m_intervals_start append-only char-range stack and the imperative intersect_intervals / exclude_interval helpers with a single canonical range_predicate m_path_pred that tracks the feasible character set under the current path. * Add range_predicate_translator: pure AST -> range_predicate translator for the boolean-over-char_le fragment (true/false, eq with const, char_le with const, not/and/or any nesting). Returns false on the first sub-term outside the fragment so the caller can fall back to other reasoning. * push_intervals_impl: translate the candidate predicate to a range_predicate and reduce path tracking to set arithmetic (intersection + subset/empty checks). The legacy top-level and/or descent is preserved for mixed char / non-char conditions. * eval_range_cond: implication becomes subset_of and contradiction becomes !intersects, both linear in the number of ranges with no AST allocation. * range_predicate gains subset_of / intersects / disjoint_from to support allocation-free path queries. * path_save now stores the saved range_predicate by value; the stack switches from svector (CallDestructors=false) to vector because range_predicate owns an inner svector. Tests: 91/91 pass with /a, including the new range_predicate_translator unit test exercising true/false, eq, char_le, and/or/not, and De Morgan agreement. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-19 15:16:29 +00:00 · 2026-06-14 15:11:20 -07:00 · 2026-06-14 15:11:20 -07:00 · 4bef7d513c
commit 4bef7d513c
parent 904090eec0
10 changed files with 405 additions and 128 deletions
--- a/src/ast/rewriter/seq_derive.cpp
+++ b/src/ast/rewriter/seq_derive.cpp
@ -89,9 +89,7 @@ namespace seq {
            m_depth = 0;
            // Initialize path state for inline pruning
            m_path.reset();
-            m_intervals.reset();
-            m_intervals.push_back({0u, u().max_char()});
-            m_intervals_start = 0;
+            m_path_pred = range_predicate::top(u().max_char());
            m_path_expr = m.mk_true();
            result = derive_rec(r);
            m_top_cache.insert(r, result);
@ -915,31 +913,27 @@ namespace seq {
        // Check if (c, sign) is already determined by the path
        lbool cv = eval_path_cond(c);
        if (cv == l_true && !sign) return l_true;   // c implied true, push(c,false) is redundant
-        if (cv == l_false && sign) return l_true;   // c implied false, push(c,true) is redundant  
+        if (cv == l_false && sign) return l_true;   // c implied false, push(c,true) is redundant
        if (cv == l_true && sign) return l_false;   // c implied true, push(c,true) contradicts
        if (cv == l_false && !sign) return l_false; // c implied false, push(c,false) contradicts

        // Save current state
        unsigned saved_path_sz = m_path.size();
-        unsigned saved_intervals_sz = m_intervals.size();
-        unsigned saved_intervals_start = m_intervals_start;
+        range_predicate saved_path_pred = m_path_pred;
        expr* saved_path_expr = m_path_expr;

        // Push atoms onto path and check for contradiction or implication
        lbool result = push_path_atoms(c, sign);
        if (result != l_undef) {
            m_path.shrink(saved_path_sz);
-            m_intervals.shrink(saved_intervals_sz);
-            m_intervals_start = saved_intervals_start;
            return result;
        }

-        // Update intervals
+        // Update path predicate (feasible character set)
        result = push_intervals_impl(c, sign);
        if (result != l_undef) {
            m_path.shrink(saved_path_sz);
-            m_intervals.shrink(saved_intervals_sz);
-            m_intervals_start = saved_intervals_start;
+            m_path_pred = std::move(saved_path_pred);
            return result;
        }

@ -949,16 +943,15 @@ namespace seq {
        m_trail.push_back(m_path_expr);

        // Commit: save state for pop()
-        m_path_stack.push_back({ saved_path_sz, saved_intervals_sz, saved_intervals_start, saved_path_expr });
+        m_path_stack.push_back({ saved_path_sz, std::move(saved_path_pred), saved_path_expr });
        return l_undef;
    }

    void derive::pop() {
        SASSERT(!m_path_stack.empty());
-        auto const& saved = m_path_stack.back();
+        auto& saved = m_path_stack.back();
        m_path.shrink(saved.path_sz);
-        m_intervals.shrink(saved.intervals_sz);
-        m_intervals_start = saved.intervals_start;
+        m_path_pred = std::move(saved.saved_path_pred);
        m_path_expr = saved.path_expr;
        m_path_stack.pop_back();
    }
@ -1083,50 +1076,35 @@ namespace seq {
        return l_undef;
    }

-    // Update m_intervals based on the condition. Returns l_true if implied, l_false if inconsistent, l_undef if pushed.
-    // Operates on the active suffix m_intervals[m_intervals_start..end].
-    // On modification, appends new intervals and updates m_intervals_start.
+    // Update m_path_pred with the feasible-character constraint induced by
+    // (c, sign). Returns l_true if already implied (no change), l_false if
+    // the resulting set becomes empty (contradiction), and l_undef otherwise.
    lbool derive::push_intervals_impl(expr* c, bool sign) {
-        unsigned lo = 0, hi = 0;
-        bool negated = false;
-        if (m_util.is_char_const_range(m_ele, c, lo, hi, negated)) {
-            bool effective_neg = (negated != sign);
-            if (!effective_neg) {
-                if (lo <= hi) {
-                    // Check if current intervals already imply [lo,hi]
-                    bool already_subset = true;
-                    for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
-                        if (m_intervals[i].first < lo || m_intervals[i].second > hi) { already_subset = false; break; }
-                    }
-                    if (already_subset) return l_true;
-                    intersect_intervals(lo, hi);
-                } else {
-                    // lo > hi means empty range — contradiction
-                    return l_false;
-                }
-            } else {
-                if (lo <= hi) {
-                    // Check if current intervals already exclude [lo,hi]
-                    bool already_excluded = true;
-                    for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
-                        if (m_intervals[i].first <= hi && m_intervals[i].second >= lo) { already_excluded = false; break; }
-                    }
-                    if (already_excluded) return l_true;
-                    exclude_interval(lo, hi);
-                }
-            }
-        } else if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
+        range_predicate p(u().max_char());
+        if (m_pred_xlate.translate(m_ele, c, p)) {
+            range_predicate constraint = sign ? ~p : std::move(p);
+            if (m_path_pred.subset_of(constraint))
+                return l_true;
+            range_predicate new_pred = m_path_pred & constraint;
+            if (new_pred.is_empty())
+                return l_false;
+            m_path_pred = std::move(new_pred);
+            return l_undef;
+        }
+        // Translation failed: descend into a top-level and (under sign=false)
+        // or top-level or (under sign=true) to extract translatable
+        // sub-conditions. Non-translatable arguments are ignored and only
+        // weaken the implied/undef return.
+        if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
            bool all_implied = true;
            for (expr* arg : *to_app(c)) {
                lbool r = push_intervals_impl(arg, sign);
                if (r == l_false) return l_false;
                if (r == l_undef) all_implied = false;
            }
-            unsigned n = m_intervals.size() - m_intervals_start;
-            return all_implied ? l_true : (n == 0 ? l_false : l_undef);
+            return all_implied ? l_true : (m_path_pred.is_empty() ? l_false : l_undef);
        }
-        unsigned n = m_intervals.size() - m_intervals_start;
-        return n == 0 ? l_false : l_undef;
+        return m_path_pred.is_empty() ? l_false : l_undef;
    }

    // Evaluate a condition against the current path and intervals.
@ -1219,77 +1197,18 @@ namespace seq {
    }

    lbool derive::eval_range_cond(expr* c) {
-        unsigned n = m_intervals.size() - m_intervals_start;
-        if (n == 0)
+        if (m_path_pred.is_empty())
            return l_false;
-        unsigned lo = 0, hi = 0;
-        bool negated = false;
-        if (!m_util.is_char_const_range(m_ele, c, lo, hi, negated))
+        range_predicate p(u().max_char());
+        if (!m_pred_xlate.translate(m_ele, c, p))
            return l_undef;
-        if (lo > hi) {
-            return negated ? l_true : l_false;
-        }
-        // Check if [lo, hi] overlaps with intervals and/or contains all intervals
-        bool any_overlap = false;
-        bool all_contained = true;
-        for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
-            auto [r_lo, r_hi] = m_intervals[i];
-            if (std::max(r_lo, lo) <= std::min(r_hi, hi))
-                any_overlap = true;
-            if (r_lo < lo || r_hi > hi)
-                all_contained = false;
-        }
-        if (!negated) {
-            if (!any_overlap) return l_false;
-            if (all_contained) return l_true;
-        } else {
-            if (all_contained) return l_false;
-            if (!any_overlap) return l_true;
-        }
+        // c is implied true iff every feasible char satisfies it.
+        if (m_path_pred.subset_of(p))
+            return l_true;
+        // c is implied false iff no feasible char satisfies it.
+        if (!m_path_pred.intersects(p))
+            return l_false;
        return l_undef;
    }

-    // Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi]
-    void derive::intersect_intervals(unsigned lo, unsigned hi) {
-        // Copy active suffix to end, update start, then filter
-        unsigned old_sz = m_intervals.size();
-        for (unsigned i = m_intervals_start; i < old_sz; ++i)
-            m_intervals.push_back(m_intervals[i]);
-        m_intervals_start = old_sz;
-        // Filter in-place within new suffix: drop intervals disjoint from [lo,hi],
-        // keep the intersection for overlapping ones.
-        unsigned j = m_intervals_start;
-        for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
-            auto [lo1, hi1] = m_intervals[i];
-            if (hi < lo1 || lo > hi1)
-                continue;  // disjoint with this interval — drop it
-            m_intervals[j++] = {std::max(lo1, lo), std::min(hi1, hi)};
-        }
-        m_intervals.shrink(j);
-    }
-
-    // Exclude [lo, hi] from the active suffix m_intervals[m_intervals_start..end]
-    void derive::exclude_interval(unsigned lo, unsigned hi) {
-        unsigned max_char = u().max_char();
-        if (lo == 0 && hi >= max_char) { m_intervals_start = m_intervals.size(); return; }
-        if (lo == 0) { intersect_intervals(hi + 1, max_char); return; }
-        if (hi >= max_char) { intersect_intervals(0, lo - 1); return; }
-        // Each interval [ilo, ihi] minus [lo, hi] → up to 2 pieces
-        // Append new results past the end, then move start
-        unsigned old_start = m_intervals_start;
-        unsigned old_sz = m_intervals.size();
-        for (unsigned i = old_start; i < old_sz; ++i) {
-            auto [ilo, ihi] = m_intervals[i];
-            if (ihi < lo || ilo > hi) {
-                m_intervals.push_back(m_intervals[i]);
-            } else {
-                if (ilo < lo)
-                    m_intervals.push_back({ilo, lo - 1});
-                if (ihi > hi)
-                    m_intervals.push_back({hi + 1, ihi});
-            }
-        }
-        m_intervals_start = old_sz;
-    }
-
 }