From 8fe2f3c58af8c6e9e86298e0426d52833f478ba0 Mon Sep 17 00:00:00 2001 From: Lev Nachmanson <5377127+levnach@users.noreply.github.com> Date: Mon, 29 Jun 2026 16:36:51 -0700 Subject: [PATCH] nlsat: fix levelwise (lws) SIGSEGV instead of disabling it (#10001) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Alternative to #9991. Instead of disabling `nlsat.lws` by default, this **fixes the underlying bug** so levelwise single-cell projection stays enabled. ## Root cause The crash was reproduced on the QF_NIA benchmark from #9991 (`20170427-VeryMax/ITS/From_AProVE_2014__Round3.jar-obl-8__p11898_terminationG_0.smt2`, ~40% SIGSEGV at `-T:20`). A core-dump backtrace points at: ``` mpbq_manager::le (mpbq.cpp:362) algebraic_numbers::manager::imp::compare (algebraic_numbers.cpp:1913) c = 0xea24052d29f2d500 <- wild pointer algebraic_numbers::manager::imp::compare (algebraic_numbers.cpp:2128) nlsat::levelwise::impl::root_function_lt (levelwise.cpp:949) ... std::__unguarded_linear_insert ... <- OOB read std::sort nlsat::levelwise::impl::sort_root_function_partitions ``` The comparator (`root_function_lt` → `anum_manager::compare`, and `anum_manager::lt`) **refines the isolating intervals of the algebraic numbers it compares** and may **hit the resource limit (throwing)** mid-comparison. Both make the order it induces non-deterministic / not a strict weak ordering across a single `std::sort` — undefined behavior. libstdc++'s *unguarded* insertion pass then walks past `begin()` and dereferences a wild anum cell → SIGSEGV. This only fires when a timeout interrupts levelwise, explaining the non-determinism (`signal-11`). ## Fix Replace the two affected `std::sort` calls (`sort_root_function_partitions` and `add_adjacent_root_resultants`) with a **bounds-checked insertion sort over an index permutation**. A fully guarded insertion sort can never read out of bounds regardless of comparator consistency, and unwinds cleanly if `compare` throws on cancellation. The partitions sorted here are small, so the O(n²) cost is negligible. `nlsat.lws` stays `true`. ## Verification On the Linux repro box (Ubuntu 24.04, g++ 13), RelWithDebInfo: - **Before:** ~40% SIGSEGV (e.g. 5/16 runs at `-T:20`). - **After:** **0/30** SIGSEGV; results are `unsat`/`timeout`. - Sanity batch over 25 QF_NIA/VeryMax/ITS files: no crashes, expected sat/unsat/timeout mix. - `model_validate=true` full solve still returns `unsat`. --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/math/polynomial/algebraic_numbers.cpp | 78 +++++++++++++++++++++- src/nlsat/levelwise.cpp | 79 ++++++++++++++++++++--- 2 files changed, 144 insertions(+), 13 deletions(-) diff --git a/src/math/polynomial/algebraic_numbers.cpp b/src/math/polynomial/algebraic_numbers.cpp index 07b49e5ce4..cbf2980dad 100644 --- a/src/math/polynomial/algebraic_numbers.cpp +++ b/src/math/polynomial/algebraic_numbers.cpp @@ -593,10 +593,82 @@ namespace algebraic_numbers { } } + // Bounds-safe, mutation-aware merge sort of an index permutation. + // + // We deliberately avoid std::sort: the comparator (lt -> compare) is NOT pure + // -- it MUTATES the algebraic numbers it compares by refining their isolating + // intervals (possibly collapsing a root to a rational), and can hit the + // resource limit and throw. That refinement is monotone toward the one true + // real order (a decided sign is permanent), but a comparison can transiently + // strengthen from "uncertain" to "decided". std::sort (introsort) relies on a + // comparator-derived sentinel and re-compares a pivot repeatedly; a + // strengthening invalidates the sentinel and its *unguarded* insertion pass + // walks off the array -> out-of-bounds read -> SIGSEGV (a try/catch could not + // help). Merge sort is safe because it never re-compares a pair and uses no + // comparator-derived sentinel: every loop bound is arithmetic, so an + // inconsistent comparator can only yield a wrong order, never an OOB access or + // a hang. Runs are ordered by decided signs that later refinement cannot + // un-decide, so deeper merges stay correct and inherit cheaper intervals. + // O(n log n) comparisons, O(n) scratch. See also nlsat/levelwise.cpp. + void merge_sort_roots_perm(numeral_vector & r, unsigned_vector & perm) { + unsigned n = perm.size(); + if (n < 2) + return; + unsigned_vector tmp; + tmp.resize(n, 0); + // Strict, total, stable index comparator: decided sign first, then index + // tiebreak (covers the equal/limit case so the order stays deterministic). + auto idx_lt = [&](unsigned x, unsigned y) { + ::sign s = compare(r[x], r[y]); + return s != sign_zero ? s == sign_neg : x < y; + }; + for (unsigned width = 1; width < n; width <<= 1) { + for (unsigned lo = 0; lo < n; lo += (width << 1)) { + unsigned mid = std::min(lo + width, n); + unsigned hi = std::min(lo + (width << 1), n); + unsigned i = lo, j = mid, k = lo; + while (i < mid && j < hi) + tmp[k++] = idx_lt(perm[j], perm[i]) ? perm[j++] : perm[i++]; + while (i < mid) + tmp[k++] = perm[i++]; + while (j < hi) + tmp[k++] = perm[j++]; + } + perm.swap(tmp); + } + } + void sort_roots(numeral_vector & r) { - if (m_limit.inc()) { - // DEBUG_CODE(check_transitivity(r);); - std::sort(r.begin(), r.end(), lt_proc(m_wrapper)); + if (!m_limit.inc()) + return; + // DEBUG_CODE(check_transitivity(r);); + unsigned n = r.size(); + if (n < 2) + return; + unsigned_vector perm; + perm.resize(n, 0); + for (unsigned i = 0; i < n; ++i) + perm[i] = i; + merge_sort_roots_perm(r, perm); + // Apply the permutation in place via swap cycles. anum swap is a cheap + // pointer swap (move nulls the source), so this is O(n) cheap moves. + unsigned_vector pos; // pos[v] = current position of element v + pos.resize(n, 0); + unsigned_vector at; // at[p] = element currently at position p + at.resize(n, 0); + for (unsigned i = 0; i < n; ++i) { + pos[i] = i; + at[i] = i; + } + for (unsigned target = 0; target < n; ++target) { + unsigned want = perm[target]; // element that should end up at target + unsigned cur = pos[want]; // where it currently is + if (cur == target) + continue; + unsigned other = at[target]; // element currently at target + std::swap(r[target], r[cur]); + at[target] = want; at[cur] = other; + pos[want] = target; pos[other] = cur; } } diff --git a/src/nlsat/levelwise.cpp b/src/nlsat/levelwise.cpp index e085d6173c..ee1febb1b3 100644 --- a/src/nlsat/levelwise.cpp +++ b/src/nlsat/levelwise.cpp @@ -956,6 +956,56 @@ namespace nlsat { return m_pm.id(a.ire.p) < m_pm.id(b.ire.p); } + // Sort an index permutation with a bounds-safe, mutation-aware merge sort. + // + // We deliberately avoid std::sort here. The comparator (root_function_lt -> + // anum_manager::compare) is NOT pure: it MUTATES the algebraic numbers it + // compares by refining their isolating intervals (and may collapse a root to a + // rational, or hit the resource limit and throw). That refinement is monotone + // and converges toward the one true real order -- a *decided* sign is permanent + // -- but a comparison can transiently strengthen from "uncertain" to "decided" + // as intervals tighten. std::sort (introsort) relies on a comparator-derived + // sentinel and re-compares a pivot repeatedly; such a strengthening invalidates + // the sentinel mid-loop and its *unguarded* insertion pass then walks off the + // array -> SIGSEGV (an out-of-bounds read, so a try/catch around the sort would + // not help). + // + // Merge sort is safe BECAUSE of how it meets a mutating comparator: + // 1. It never re-compares a pair (each unordered pair is compared at exactly + // one merge level), so "the verdict for this pair changed" cannot occur + // within the sort. + // 2. It uses no comparator-derived sentinel; every loop bound is arithmetic + // (i < mid, j < hi), so an inconsistent comparator can only yield a wrong + // order, never an out-of-bounds access or non-termination. + // 3. Refinement only helps: runs are ordered by decided signs (the true + // order), which later refinement cannot un-decide, so each run stays + // sorted and deeper merges inherit tighter, cheaper intervals. + // It runs in O(n log n) comparisons and O(n) scratch, and unwinds cleanly if + // compare throws on cancellation. + template + void merge_sort_perm(std_vector& perm, Less less) { + unsigned n = static_cast(perm.size()); + if (n < 2) + return; + std_vector tmp(n); + for (unsigned width = 1; width < n; width <<= 1) { + for (unsigned lo = 0; lo < n; lo += (width << 1)) { + unsigned mid = std::min(lo + width, n); + unsigned hi = std::min(lo + (width << 1), n); + unsigned i = lo, j = mid, k = lo; + // Take from the right run only on a strict decrease, so equal/ + // undecided pairs keep their relative order (stable). + while (i < mid && j < hi) + tmp[k++] = less(perm[j], perm[i]) ? perm[j++] : perm[i++]; + while (i < mid) + tmp[k++] = perm[i++]; + while (j < hi) + tmp[k++] = perm[j++]; + } + perm.swap(tmp); + } + } + // Apply a permutation to a range of root_functions using swap cycles, // avoiding the bulk anum allocations that std::sort's move operations cause. void apply_permutation(std_vector& rfs, unsigned offset, std_vector const& perm) { @@ -982,7 +1032,7 @@ namespace nlsat { if (mid_pos > 1) { std_vector perm(mid_pos); std::iota(perm.begin(), perm.end(), 0u); - std::sort(perm.begin(), perm.end(), [&](unsigned a, unsigned b) { + merge_sort_perm(perm, [&](unsigned a, unsigned b) { return root_function_lt(rfs[a], rfs[b], true); }); apply_permutation(rfs, 0, perm); @@ -993,7 +1043,7 @@ namespace nlsat { if (upper_sz > 1) { std_vector perm(upper_sz); std::iota(perm.begin(), perm.end(), 0u); - std::sort(perm.begin(), perm.end(), [&](unsigned a, unsigned b) { + merge_sort_perm(perm, [&](unsigned a, unsigned b) { return root_function_lt(rfs[mid_pos + a], rfs[mid_pos + b], false); }); apply_permutation(rfs, mid_pos, perm); @@ -1192,20 +1242,29 @@ namespace nlsat { if (root_vals.size() < 2) return; - std::sort(root_vals.begin(), root_vals.end(), [&](auto const& a, auto const& b) { - return m_am.lt(a.first, b.first); + // Sort root values by an index permutation with the bounds-safe, + // mutation-aware merge sort (see merge_sort_perm). As in + // sort_root_function_partitions, the comparator (anum_manager::lt -> + // compare) MUTATES the algebraic numbers it compares (it refines their + // isolating intervals and may hit the resource limit and throw), so it is + // not a fixed strict weak ordering over a single sort; std::sort here would + // be undefined behavior and crash via an out-of-bounds read on timeout. + std_vector perm(root_vals.size()); + std::iota(perm.begin(), perm.end(), 0u); + merge_sort_perm(perm, [&](unsigned a, unsigned b) { + return m_am.lt(root_vals[a].first, root_vals[b].first); }); - + TRACE(lws, tout << " Sorted roots:\n"; - for (unsigned j = 0; j < root_vals.size(); ++j) - m_pm.display(m_am.display_decimal(tout << " [" << j << "] val=", root_vals[j].first, 5) << " poly=", root_vals[j].second) << "\n"; + for (unsigned j = 0; j < perm.size(); ++j) + m_pm.display(m_am.display_decimal(tout << " [" << j << "] val=", root_vals[perm[j]].first, 5) << " poly=", root_vals[perm[j]].second) << "\n"; ); std::set> added_pairs; - for (unsigned j = 0; j + 1 < root_vals.size(); ++j) { - poly* p1 = root_vals[j].second; - poly* p2 = root_vals[j + 1].second; + for (unsigned j = 0; j + 1 < perm.size(); ++j) { + poly* p1 = root_vals[perm[j]].second; + poly* p2 = root_vals[perm[j + 1]].second; if (!p1 || !p2 || p1 == p2) continue; if (p1 > p2) std::swap(p1, p2);