From 2490e86d3f5127456d5eba70c80bd00cad0efa1a Mon Sep 17 00:00:00 2001 From: Lev Nachmanson <5377127+levnach@users.noreply.github.com> Date: Tue, 30 Jun 2026 08:40:33 -0700 Subject: [PATCH] nlsat/anum: share mutation-aware merge sort in one helper (#10006) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Follow-up to #10001 addressing @NikolajBjorner's review comment: > isn't this nearly identical AI generated code to the other file? There has to be some modular approach to deal with sorting vectors? #10001 introduced two nearly-identical copies of a bounds-safe, mutation-aware index-permutation merge sort: - `algebraic_numbers.cpp::merge_sort_roots_perm` - `nlsat/levelwise.cpp::merge_sort_perm` Both exist because the comparator (`anum_manager::compare`/`lt`) is **not pure**: it mutates the algebraic numbers it compares (refining isolating intervals) and may throw on the resource limit, which makes `std::sort` undefined behavior (the original SIGSEGV). ## Change Extract the algorithm into a single shared helper `util/index_sort_with_mutations.h` (`stable_index_merge_sort`). The long rationale for why `std::sort` is unsafe and merge sort is safe now lives in exactly one place. Both call sites become thin wrappers that build the scratch buffer and forward their local comparator. No behavioral change: same stable O(n log n) merge sort over an index permutation. ## Verification CMake/Ninja Release build: - `test-z3 /seq algebraic_numbers` — PASS - `test-z3 /seq algebraic` — PASS - NRA/NIA smoke solves with `nlsat.lws=true` return expected sat/unsat. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/math/polynomial/algebraic_numbers.cpp | 42 +++--------- src/nlsat/levelwise.cpp | 53 +++------------ src/util/index_sort_with_mutations.h | 79 +++++++++++++++++++++++ 3 files changed, 98 insertions(+), 76 deletions(-) create mode 100644 src/util/index_sort_with_mutations.h diff --git a/src/math/polynomial/algebraic_numbers.cpp b/src/math/polynomial/algebraic_numbers.cpp index cbf2980dad..a890913c2d 100644 --- a/src/math/polynomial/algebraic_numbers.cpp +++ b/src/math/polynomial/algebraic_numbers.cpp @@ -22,6 +22,7 @@ Notes: #include "util/mpbqi.h" #include "util/timeit.h" #include "util/common_msgs.h" +#include "util/index_sort_with_mutations.h" #include "math/polynomial/algebraic_numbers.h" #include "math/polynomial/upolynomial.h" #include "math/polynomial/sexpr2upolynomial.h" @@ -593,49 +594,24 @@ namespace algebraic_numbers { } } - // Bounds-safe, mutation-aware merge sort of an index permutation. - // - // We deliberately avoid std::sort: the comparator (lt -> compare) is NOT pure - // -- it MUTATES the algebraic numbers it compares by refining their isolating - // intervals (possibly collapsing a root to a rational), and can hit the - // resource limit and throw. That refinement is monotone toward the one true - // real order (a decided sign is permanent), but a comparison can transiently - // strengthen from "uncertain" to "decided". std::sort (introsort) relies on a - // comparator-derived sentinel and re-compares a pivot repeatedly; a - // strengthening invalidates the sentinel and its *unguarded* insertion pass - // walks off the array -> out-of-bounds read -> SIGSEGV (a try/catch could not - // help). Merge sort is safe because it never re-compares a pair and uses no - // comparator-derived sentinel: every loop bound is arithmetic, so an - // inconsistent comparator can only yield a wrong order, never an OOB access or - // a hang. Runs are ordered by decided signs that later refinement cannot - // un-decide, so deeper merges stay correct and inherit cheaper intervals. - // O(n log n) comparisons, O(n) scratch. See also nlsat/levelwise.cpp. + // Sort an index permutation with a bounds-safe, mutation-aware merge + // sort. The comparator (compare/lt) is NOT pure: it MUTATES the + // algebraic numbers it compares (refining their isolating intervals) and + // may throw on the resource limit, so std::sort would be undefined + // behavior here. See util/index_sort_with_mutations.h for the rationale. void merge_sort_roots_perm(numeral_vector & r, unsigned_vector & perm) { unsigned n = perm.size(); if (n < 2) return; - unsigned_vector tmp; - tmp.resize(n, 0); + unsigned_vector scratch; + scratch.resize(n, 0); // Strict, total, stable index comparator: decided sign first, then index // tiebreak (covers the equal/limit case so the order stays deterministic). auto idx_lt = [&](unsigned x, unsigned y) { ::sign s = compare(r[x], r[y]); return s != sign_zero ? s == sign_neg : x < y; }; - for (unsigned width = 1; width < n; width <<= 1) { - for (unsigned lo = 0; lo < n; lo += (width << 1)) { - unsigned mid = std::min(lo + width, n); - unsigned hi = std::min(lo + (width << 1), n); - unsigned i = lo, j = mid, k = lo; - while (i < mid && j < hi) - tmp[k++] = idx_lt(perm[j], perm[i]) ? perm[j++] : perm[i++]; - while (i < mid) - tmp[k++] = perm[i++]; - while (j < hi) - tmp[k++] = perm[j++]; - } - perm.swap(tmp); - } + stable_index_merge_sort(perm.data(), scratch.data(), n, idx_lt); } void sort_roots(numeral_vector & r) { diff --git a/src/nlsat/levelwise.cpp b/src/nlsat/levelwise.cpp index ee1febb1b3..da099d8fdc 100644 --- a/src/nlsat/levelwise.cpp +++ b/src/nlsat/levelwise.cpp @@ -5,6 +5,7 @@ #include "math/polynomial/polynomial.h" #include "nlsat_common.h" #include "util/vector.h" +#include "util/index_sort_with_mutations.h" #include "util/trace.h" #include @@ -956,54 +957,20 @@ namespace nlsat { return m_pm.id(a.ire.p) < m_pm.id(b.ire.p); } - // Sort an index permutation with a bounds-safe, mutation-aware merge sort. - // - // We deliberately avoid std::sort here. The comparator (root_function_lt -> - // anum_manager::compare) is NOT pure: it MUTATES the algebraic numbers it - // compares by refining their isolating intervals (and may collapse a root to a - // rational, or hit the resource limit and throw). That refinement is monotone - // and converges toward the one true real order -- a *decided* sign is permanent - // -- but a comparison can transiently strengthen from "uncertain" to "decided" - // as intervals tighten. std::sort (introsort) relies on a comparator-derived - // sentinel and re-compares a pivot repeatedly; such a strengthening invalidates - // the sentinel mid-loop and its *unguarded* insertion pass then walks off the - // array -> SIGSEGV (an out-of-bounds read, so a try/catch around the sort would - // not help). - // - // Merge sort is safe BECAUSE of how it meets a mutating comparator: - // 1. It never re-compares a pair (each unordered pair is compared at exactly - // one merge level), so "the verdict for this pair changed" cannot occur - // within the sort. - // 2. It uses no comparator-derived sentinel; every loop bound is arithmetic - // (i < mid, j < hi), so an inconsistent comparator can only yield a wrong - // order, never an out-of-bounds access or non-termination. - // 3. Refinement only helps: runs are ordered by decided signs (the true - // order), which later refinement cannot un-decide, so each run stays - // sorted and deeper merges inherit tighter, cheaper intervals. - // It runs in O(n log n) comparisons and O(n) scratch, and unwinds cleanly if - // compare throws on cancellation. + // Sort an index permutation with a bounds-safe, mutation-aware merge + // sort. The comparator (root_function_lt / anum_manager::lt -> compare) + // is NOT pure: it MUTATES the algebraic numbers it compares by refining + // their isolating intervals, and may throw on the resource limit, so a + // single std::sort would be undefined behavior and can crash via an + // out-of-bounds read on timeout. See util/index_sort_with_mutations.h + // for the full rationale. template void merge_sort_perm(std_vector& perm, Less less) { unsigned n = static_cast(perm.size()); if (n < 2) return; - std_vector tmp(n); - for (unsigned width = 1; width < n; width <<= 1) { - for (unsigned lo = 0; lo < n; lo += (width << 1)) { - unsigned mid = std::min(lo + width, n); - unsigned hi = std::min(lo + (width << 1), n); - unsigned i = lo, j = mid, k = lo; - // Take from the right run only on a strict decrease, so equal/ - // undecided pairs keep their relative order (stable). - while (i < mid && j < hi) - tmp[k++] = less(perm[j], perm[i]) ? perm[j++] : perm[i++]; - while (i < mid) - tmp[k++] = perm[i++]; - while (j < hi) - tmp[k++] = perm[j++]; - } - perm.swap(tmp); - } + std_vector scratch(n); + stable_index_merge_sort(perm.data(), scratch.data(), n, less); } // Apply a permutation to a range of root_functions using swap cycles, diff --git a/src/util/index_sort_with_mutations.h b/src/util/index_sort_with_mutations.h new file mode 100644 index 0000000000..9852a87f5c --- /dev/null +++ b/src/util/index_sort_with_mutations.h @@ -0,0 +1,79 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + index_sort_with_mutations.h + +Abstract: + + Bounds-safe, mutation-aware stable merge sort of an index permutation. + + This exists to be shared between sites whose ordering comparator is NOT a + fixed strict weak ordering over a single sort -- typically because the + comparator MUTATES the objects it compares (e.g. algebraic numbers, whose + comparison refines their isolating intervals) and may even throw when a + resource limit is hit mid-sort. Against such a comparator std::sort + (introsort) is undefined behavior: it relies on a comparator-derived + sentinel and re-compares a pivot repeatedly, so a comparison that + transiently strengthens from "uncertain" to "decided" invalidates the + sentinel and its *unguarded* insertion pass walks off the array -> an + out-of-bounds read -> SIGSEGV (a try/catch could not help). + + Merge sort is safe BECAUSE of how it meets a mutating comparator: + 1. It never re-compares a pair (each unordered pair is compared at exactly + one merge level), so "the verdict for this pair changed" cannot occur + within the sort. + 2. It uses no comparator-derived sentinel; every loop bound is arithmetic + (i < mid, j < hi), so an inconsistent comparator can only yield a wrong + order, never an out-of-bounds access or non-termination. + 3. If the comparator's refinement is monotone toward a true order, runs + are ordered by decided verdicts that later refinement cannot un-decide, + so each run stays sorted and deeper merges inherit cheaper comparisons. + It runs in O(n log n) comparisons and O(n) scratch, is stable, and unwinds + cleanly if the comparator throws on cancellation. + +Author: + + Lev Nachmanson 2026 + +--*/ +#pragma once + +#include + +/** + \brief Stable merge sort of an index permutation. + + \c perm and \c scratch must each point to an array of \c n unsigned values; + \c perm holds the permutation to sort (typically 0..n-1). \c less(a, b) takes + two element indices and returns true iff element \c a must come strictly + before element \c b. On return \c perm holds the sorted permutation and + \c scratch has been used as working space. Equal/undecided pairs keep their + relative order (stable). +*/ +template +void stable_index_merge_sort(unsigned* perm, unsigned* scratch, unsigned n, Less less) { + if (n < 2) + return; + unsigned* src = perm; + unsigned* dst = scratch; + for (unsigned width = 1; width < n; width <<= 1) { + for (unsigned lo = 0; lo < n; lo += (width << 1)) { + unsigned mid = std::min(lo + width, n); + unsigned hi = std::min(lo + (width << 1), n); + unsigned i = lo, j = mid, k = lo; + // Take from the right run only on a strict decrease, so equal/ + // undecided pairs keep their relative order (stable). + while (i < mid && j < hi) + dst[k++] = less(src[j], src[i]) ? src[j++] : src[i++]; + while (i < mid) + dst[k++] = src[i++]; + while (j < hi) + dst[k++] = src[j++]; + } + std::swap(src, dst); + } + if (src != perm) + std::copy(src, src + n, perm); +}