From bcdbc80ab8c22d61201dd5ab0b038f3aa73a601e Mon Sep 17 00:00:00 2001 From: Margus Veanes <223556219+Copilot@users.noreply.github.com> Date: Sun, 14 Jun 2026 18:06:27 -0700 Subject: [PATCH] seq::derive: revert Stage 2 path-tracking; keep range_predicate value type for Stage 3 Stage 2 replaced log-structured m_intervals path state with stacked range_predicate values. Profiling showed this caused a ~2.4x allocation blowup on the regex-equivalence corpus: each push() heap-allocated a fresh svector for the narrowed set, vs. the original code's allocation-free append-and-rewind in a single shared svector. On the hot file flat_vs_loop_015_n8.smt2 the cost grew from 54K allocs (0.03s) to 100M allocs (1.49s). Revert the path-tracking to its derive-branch implementation. Keep the seq::range_predicate value type and seq::range_predicate_translator as standalone modules with unit tests; they remain available for Stage 3 (OneStep predicate, smart-constructor side conditions, AC normalization) where their value-type semantics pay off as cache keys. Add a small O(1) range_predicate::swap helper for future use. Validated on inputs/regex-equivalence (1523 files, -T:5, model_validate=true, 8 workers): derive branch: 633s wall, 485 sat / 951 unsat / 87 to reverted dwr (this): 634s wall, 486 sat / 952 unsat / 85 to 0 soundness disagreements; 4 trivial nondeterministic diffs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/range_predicate.h | 6 ++ src/ast/rewriter/seq_derive.cpp | 160 ++++++++++++++++++++++------- src/ast/rewriter/seq_derive.h | 20 ++-- 3 files changed, 137 insertions(+), 49 deletions(-) diff --git a/src/ast/rewriter/range_predicate.h b/src/ast/rewriter/range_predicate.h index 93c8e3219..f2b4a4f30 100644 --- a/src/ast/rewriter/range_predicate.h +++ b/src/ast/rewriter/range_predicate.h @@ -113,6 +113,12 @@ namespace seq { unsigned hash() const; + // O(1) swap of internals (the underlying svector swap is pointer-swap). + void swap(range_predicate& o) noexcept { + m_ranges.swap(o.m_ranges); + std::swap(m_max_char, o.m_max_char); + } + // ---------------- Boolean operations ---------------- range_predicate operator|(range_predicate const& o) const; // union diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index 26093ee18..48db95e3b 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -89,7 +89,9 @@ namespace seq { m_depth = 0; // Initialize path state for inline pruning m_path.reset(); - m_path_pred = range_predicate::top(u().max_char()); + m_intervals.reset(); + m_intervals.push_back({0u, u().max_char()}); + m_intervals_start = 0; m_path_expr = m.mk_true(); result = derive_rec(r); m_top_cache.insert(r, result); @@ -913,27 +915,31 @@ namespace seq { // Check if (c, sign) is already determined by the path lbool cv = eval_path_cond(c); if (cv == l_true && !sign) return l_true; // c implied true, push(c,false) is redundant - if (cv == l_false && sign) return l_true; // c implied false, push(c,true) is redundant + if (cv == l_false && sign) return l_true; // c implied false, push(c,true) is redundant if (cv == l_true && sign) return l_false; // c implied true, push(c,true) contradicts if (cv == l_false && !sign) return l_false; // c implied false, push(c,false) contradicts // Save current state unsigned saved_path_sz = m_path.size(); - range_predicate saved_path_pred = m_path_pred; + unsigned saved_intervals_sz = m_intervals.size(); + unsigned saved_intervals_start = m_intervals_start; expr* saved_path_expr = m_path_expr; // Push atoms onto path and check for contradiction or implication lbool result = push_path_atoms(c, sign); if (result != l_undef) { m_path.shrink(saved_path_sz); + m_intervals.shrink(saved_intervals_sz); + m_intervals_start = saved_intervals_start; return result; } - // Update path predicate (feasible character set) + // Update intervals result = push_intervals_impl(c, sign); if (result != l_undef) { m_path.shrink(saved_path_sz); - m_path_pred = std::move(saved_path_pred); + m_intervals.shrink(saved_intervals_sz); + m_intervals_start = saved_intervals_start; return result; } @@ -943,15 +949,16 @@ namespace seq { m_trail.push_back(m_path_expr); // Commit: save state for pop() - m_path_stack.push_back({ saved_path_sz, std::move(saved_path_pred), saved_path_expr }); + m_path_stack.push_back({ saved_path_sz, saved_intervals_sz, saved_intervals_start, saved_path_expr }); return l_undef; } void derive::pop() { SASSERT(!m_path_stack.empty()); - auto& saved = m_path_stack.back(); + auto const& saved = m_path_stack.back(); m_path.shrink(saved.path_sz); - m_path_pred = std::move(saved.saved_path_pred); + m_intervals.shrink(saved.intervals_sz); + m_intervals_start = saved.intervals_start; m_path_expr = saved.path_expr; m_path_stack.pop_back(); } @@ -1076,35 +1083,50 @@ namespace seq { return l_undef; } - // Update m_path_pred with the feasible-character constraint induced by - // (c, sign). Returns l_true if already implied (no change), l_false if - // the resulting set becomes empty (contradiction), and l_undef otherwise. + // Update m_intervals based on the condition. Returns l_true if implied, l_false if inconsistent, l_undef if pushed. + // Operates on the active suffix m_intervals[m_intervals_start..end]. + // On modification, appends new intervals and updates m_intervals_start. lbool derive::push_intervals_impl(expr* c, bool sign) { - range_predicate p(u().max_char()); - if (m_pred_xlate.translate(m_ele, c, p)) { - range_predicate constraint = sign ? ~p : std::move(p); - if (m_path_pred.subset_of(constraint)) - return l_true; - range_predicate new_pred = m_path_pred & constraint; - if (new_pred.is_empty()) - return l_false; - m_path_pred = std::move(new_pred); - return l_undef; - } - // Translation failed: descend into a top-level and (under sign=false) - // or top-level or (under sign=true) to extract translatable - // sub-conditions. Non-translatable arguments are ignored and only - // weaken the implied/undef return. - if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) { + unsigned lo = 0, hi = 0; + bool negated = false; + if (m_util.is_char_const_range(m_ele, c, lo, hi, negated)) { + bool effective_neg = (negated != sign); + if (!effective_neg) { + if (lo <= hi) { + // Check if current intervals already imply [lo,hi] + bool already_subset = true; + for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) { + if (m_intervals[i].first < lo || m_intervals[i].second > hi) { already_subset = false; break; } + } + if (already_subset) return l_true; + intersect_intervals(lo, hi); + } else { + // lo > hi means empty range — contradiction + return l_false; + } + } else { + if (lo <= hi) { + // Check if current intervals already exclude [lo,hi] + bool already_excluded = true; + for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) { + if (m_intervals[i].first <= hi && m_intervals[i].second >= lo) { already_excluded = false; break; } + } + if (already_excluded) return l_true; + exclude_interval(lo, hi); + } + } + } else if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) { bool all_implied = true; for (expr* arg : *to_app(c)) { lbool r = push_intervals_impl(arg, sign); if (r == l_false) return l_false; if (r == l_undef) all_implied = false; } - return all_implied ? l_true : (m_path_pred.is_empty() ? l_false : l_undef); + unsigned n = m_intervals.size() - m_intervals_start; + return all_implied ? l_true : (n == 0 ? l_false : l_undef); } - return m_path_pred.is_empty() ? l_false : l_undef; + unsigned n = m_intervals.size() - m_intervals_start; + return n == 0 ? l_false : l_undef; } // Evaluate a condition against the current path and intervals. @@ -1197,18 +1219,80 @@ namespace seq { } lbool derive::eval_range_cond(expr* c) { - if (m_path_pred.is_empty()) + unsigned n = m_intervals.size() - m_intervals_start; + if (n == 0) return l_false; - range_predicate p(u().max_char()); - if (!m_pred_xlate.translate(m_ele, c, p)) + unsigned lo = 0, hi = 0; + bool negated = false; + if (!m_util.is_char_const_range(m_ele, c, lo, hi, negated)) return l_undef; - // c is implied true iff every feasible char satisfies it. - if (m_path_pred.subset_of(p)) - return l_true; - // c is implied false iff no feasible char satisfies it. - if (!m_path_pred.intersects(p)) - return l_false; + if (lo > hi) { + return negated ? l_true : l_false; + } + // Check if [lo, hi] overlaps with intervals and/or contains all intervals + bool any_overlap = false; + bool all_contained = true; + for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) { + auto [r_lo, r_hi] = m_intervals[i]; + if (std::max(r_lo, lo) <= std::min(r_hi, hi)) + any_overlap = true; + if (r_lo < lo || r_hi > hi) + all_contained = false; + } + if (!negated) { + if (!any_overlap) return l_false; + if (all_contained) return l_true; + } else { + if (all_contained) return l_false; + if (!any_overlap) return l_true; + } return l_undef; } + // Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi] + void derive::intersect_intervals(unsigned lo, unsigned hi) { + // Copy active suffix to end, update start, then filter + unsigned old_start = m_intervals_start; + unsigned old_sz = m_intervals.size(); + for (unsigned i = old_start; i < old_sz; ++i) + m_intervals.push_back(m_intervals[i]); + m_intervals_start = old_sz; + // Filter in-place within new suffix + unsigned j = m_intervals_start; + for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) { + auto [lo1, hi1] = m_intervals[i]; + if (hi < lo1 || lo > hi1) { + j = old_sz; + break; + } + if (hi1 >= lo) + m_intervals[j++] = {std::max(lo1, lo), std::min(hi1, hi)}; + } + m_intervals.shrink(j); + } + + // Exclude [lo, hi] from the active suffix m_intervals[m_intervals_start..end] + void derive::exclude_interval(unsigned lo, unsigned hi) { + unsigned max_char = u().max_char(); + if (lo == 0 && hi >= max_char) { m_intervals_start = m_intervals.size(); return; } + if (lo == 0) { intersect_intervals(hi + 1, max_char); return; } + if (hi >= max_char) { intersect_intervals(0, lo - 1); return; } + // Each interval [ilo, ihi] minus [lo, hi] → up to 2 pieces + // Append new results past the end, then move start + unsigned old_start = m_intervals_start; + unsigned old_sz = m_intervals.size(); + for (unsigned i = old_start; i < old_sz; ++i) { + auto [ilo, ihi] = m_intervals[i]; + if (ihi < lo || ilo > hi) { + m_intervals.push_back(m_intervals[i]); + } else { + if (ilo < lo) + m_intervals.push_back({ilo, lo - 1}); + if (ihi > hi) + m_intervals.push_back({hi + 1, ihi}); + } + } + m_intervals_start = old_sz; + } + } diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 2865c0c89..dadd7f67b 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -28,8 +28,6 @@ Authors: #include "ast/arith_decl_plugin.h" #include "ast/array_decl_plugin.h" #include "ast/rewriter/bool_rewriter.h" -#include "ast/rewriter/range_predicate.h" -#include "ast/rewriter/range_predicate_translator.h" #include "util/obj_pair_hashtable.h" #include "util/obj_triple_hashtable.h" #include @@ -83,18 +81,16 @@ namespace seq { expr_ref m_ele; // Path state for inline pruning during mk_inter/mk_union/mk_complement + using intervals_t = svector>; // Path: vector of signed atoms svector> m_path; - // Feasible characters for the current path, expressed as a sorted - // disjoint range set over [0, max_char]. - range_predicate m_path_pred; - // Translator from AST char-predicates to range_predicate values. - range_predicate_translator m_pred_xlate { m, m_util }; - // Stack of saved states for push/pop. range_predicate has a non-trivial - // destructor (owns a svector), so use vector<> (CallDestructors=true). - struct path_save { unsigned path_sz; range_predicate saved_path_pred; expr* path_expr; }; - vector m_path_stack; + // Intervals: feasible character ranges under current path (append-only) + intervals_t m_intervals; + unsigned m_intervals_start { 0 }; + // Stack of saved states for push/pop + struct path_save { unsigned path_sz; unsigned intervals_sz; unsigned intervals_start; expr* path_expr; }; + svector m_path_stack; // Boolean expression encoding of current path (for cache keys) expr_ref m_path_expr; @@ -163,6 +159,8 @@ namespace seq { // Condition evaluation helpers lbool eval_cond(expr* cond); lbool eval_range_cond(expr* c); + void intersect_intervals(unsigned lo, unsigned hi); + void exclude_interval(unsigned lo, unsigned hi); sort* re_sort(expr* r) { return r->get_sort(); } sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; }