3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 15:16:29 +00:00

Add seq::range_predicate value type with unit tests

Introduce a specialized range-algebra over the unsigned character

domain [0, max_char], with canonical sorted-disjoint-non-adjacent

representation and linear-time union, intersection, complement,

difference, and symmetric difference operations.

This is Stage 1 of the derive-with-ranges plan: the value type only,

with unit tests covering factories, ordering, hashing, hand-picked

instances, and exhaustive de-Morgan / lattice laws over a small

domain (verified by enumerating all 64 subsets).

Integration with seq::derive's path conditions, the OneStep cache,

and the R&psi smart-constructor rewrite are deferred to later stages.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Margus Veanes 2026-06-14 14:51:40 -07:00 committed by Margus Veanes
parent 3d2d793f1d
commit 904090eec0
6 changed files with 682 additions and 0 deletions

View file

@ -35,6 +35,7 @@ z3_add_component(rewriter
pb2bv_rewriter.cpp
push_app_ite.cpp
quant_hoist.cpp
range_predicate.cpp
recfun_rewriter.cpp
rewriter.cpp
seq_axioms.cpp

View file

@ -0,0 +1,292 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
range_predicate.cpp
Abstract:
Implementation of the specialized range-algebra used by symbolic
derivative computation and regex rewriting. See range_predicate.h
for the algebraic specification.
All Boolean operations are implemented as single linear sweeps over
the canonical sorted range vectors and produce canonical output
(sorted, disjoint, non-adjacent).
Authors:
Margus Veanes (veanes) 2026
--*/
#include "ast/rewriter/range_predicate.h"
#include "util/debug.h"
#include <algorithm>
#include <ostream>
namespace seq {
// -----------------------------------------------------------------------
// Factories
// -----------------------------------------------------------------------
range_predicate range_predicate::empty(unsigned max_char) {
return range_predicate(max_char);
}
range_predicate range_predicate::top(unsigned max_char) {
range_predicate r(max_char);
r.m_ranges.push_back({0u, max_char});
SASSERT(r.well_formed());
return r;
}
range_predicate range_predicate::singleton(unsigned c, unsigned max_char) {
SASSERT(c <= max_char);
range_predicate r(max_char);
r.m_ranges.push_back({c, c});
SASSERT(r.well_formed());
return r;
}
range_predicate range_predicate::range(unsigned lo, unsigned hi, unsigned max_char) {
range_predicate r(max_char);
if (lo <= hi && lo <= max_char) {
unsigned clipped_hi = hi <= max_char ? hi : max_char;
r.m_ranges.push_back({lo, clipped_hi});
}
SASSERT(r.well_formed());
return r;
}
// -----------------------------------------------------------------------
// Invariants and observers
// -----------------------------------------------------------------------
bool range_predicate::well_formed() const {
for (unsigned i = 0; i < m_ranges.size(); ++i) {
auto [lo, hi] = m_ranges[i];
if (lo > hi) return false;
if (hi > m_max_char) return false;
if (i > 0) {
unsigned prev_hi = m_ranges[i - 1].second;
// Non-adjacent and sorted: prev_hi + 1 < lo, with care
// around prev_hi == UINT_MAX which we never expect because
// hi <= m_max_char.
if (prev_hi + 1 >= lo) return false;
}
}
return true;
}
bool range_predicate::contains(unsigned c) const {
// Binary search on first element of pairs.
unsigned lo = 0, hi = m_ranges.size();
while (lo < hi) {
unsigned mid = lo + (hi - lo) / 2;
auto [a, b] = m_ranges[mid];
if (c < a) hi = mid;
else if (c > b) lo = mid + 1;
else return true;
}
return false;
}
uint64_t range_predicate::cardinality() const {
uint64_t n = 0;
for (auto [lo, hi] : m_ranges)
n += static_cast<uint64_t>(hi) - static_cast<uint64_t>(lo) + 1u;
return n;
}
// -----------------------------------------------------------------------
// Equality, ordering, hashing
// -----------------------------------------------------------------------
bool range_predicate::equals(range_predicate const& o) const {
if (m_max_char != o.m_max_char) return false;
if (m_ranges.size() != o.m_ranges.size()) return false;
for (unsigned i = 0; i < m_ranges.size(); ++i)
if (m_ranges[i] != o.m_ranges[i]) return false;
return true;
}
bool range_predicate::operator<(range_predicate const& o) const {
if (m_max_char != o.m_max_char)
return m_max_char < o.m_max_char;
unsigned n = std::min(m_ranges.size(), o.m_ranges.size());
for (unsigned i = 0; i < n; ++i) {
auto a = m_ranges[i];
auto b = o.m_ranges[i];
if (a.first != b.first) return a.first < b.first;
if (a.second != b.second) return a.second < b.second;
}
return m_ranges.size() < o.m_ranges.size();
}
unsigned range_predicate::hash() const {
// FNV-1a 32-bit over (max_char, then each (lo, hi)).
uint32_t h = 2166136261u;
auto step = [&](uint32_t x) {
h ^= x;
h *= 16777619u;
};
step(m_max_char);
for (auto [lo, hi] : m_ranges) {
step(lo);
step(hi);
}
return h;
}
// -----------------------------------------------------------------------
// Boolean operations
// -----------------------------------------------------------------------
namespace {
// Append (lo, hi) to result, merging with the previous range if
// adjacent or overlapping. Maintains canonical form.
inline void append_merged(svector<std::pair<unsigned, unsigned>>& result,
unsigned lo, unsigned hi) {
SASSERT(lo <= hi);
if (!result.empty() && result.back().second + 1 >= lo) {
if (result.back().second < hi)
result.back().second = hi;
} else {
result.push_back({lo, hi});
}
}
}
range_predicate range_predicate::operator|(range_predicate const& o) const {
SASSERT(m_max_char == o.m_max_char);
range_predicate r(m_max_char);
unsigned i = 0, j = 0;
const unsigned n = m_ranges.size();
const unsigned m = o.m_ranges.size();
while (i < n && j < m) {
auto a = m_ranges[i];
auto b = o.m_ranges[j];
if (a.first <= b.first) {
append_merged(r.m_ranges, a.first, a.second);
++i;
} else {
append_merged(r.m_ranges, b.first, b.second);
++j;
}
}
while (i < n) {
auto a = m_ranges[i++];
append_merged(r.m_ranges, a.first, a.second);
}
while (j < m) {
auto b = o.m_ranges[j++];
append_merged(r.m_ranges, b.first, b.second);
}
SASSERT(r.well_formed());
return r;
}
range_predicate range_predicate::operator&(range_predicate const& o) const {
SASSERT(m_max_char == o.m_max_char);
range_predicate r(m_max_char);
unsigned i = 0, j = 0;
const unsigned n = m_ranges.size();
const unsigned m = o.m_ranges.size();
while (i < n && j < m) {
auto [a_lo, a_hi] = m_ranges[i];
auto [b_lo, b_hi] = o.m_ranges[j];
unsigned lo = std::max(a_lo, b_lo);
unsigned hi = std::min(a_hi, b_hi);
if (lo <= hi)
r.m_ranges.push_back({lo, hi});
// Advance the range that ends first.
if (a_hi < b_hi) ++i;
else if (b_hi < a_hi) ++j;
else { ++i; ++j; }
}
SASSERT(r.well_formed());
return r;
}
range_predicate range_predicate::operator~() const {
range_predicate r(m_max_char);
unsigned cursor = 0;
for (auto [lo, hi] : m_ranges) {
if (cursor < lo)
r.m_ranges.push_back({cursor, lo - 1});
// Step past hi without overflow: hi <= m_max_char and we
// only step when more characters remain.
if (hi >= m_max_char) {
cursor = m_max_char + 1; // sentinel: no more characters
break;
}
cursor = hi + 1;
}
if (cursor <= m_max_char)
r.m_ranges.push_back({cursor, m_max_char});
SASSERT(r.well_formed());
return r;
}
range_predicate range_predicate::operator-(range_predicate const& o) const {
SASSERT(m_max_char == o.m_max_char);
// A - B by linear sweep: for each range of A, subtract overlapping
// ranges of B. Both inputs are sorted so we advance j monotonically.
range_predicate r(m_max_char);
unsigned j = 0;
const unsigned m = o.m_ranges.size();
for (auto [a_lo, a_hi] : m_ranges) {
unsigned cursor = a_lo;
while (j < m && o.m_ranges[j].second < cursor)
++j;
unsigned k = j;
while (k < m && o.m_ranges[k].first <= a_hi) {
auto [b_lo, b_hi] = o.m_ranges[k];
if (cursor < b_lo)
r.m_ranges.push_back({cursor, std::min(a_hi, b_lo - 1)});
if (b_hi >= a_hi) {
cursor = a_hi + 1;
break;
}
cursor = b_hi + 1;
++k;
}
if (cursor <= a_hi)
r.m_ranges.push_back({cursor, a_hi});
}
SASSERT(r.well_formed());
return r;
}
range_predicate range_predicate::operator^(range_predicate const& o) const {
SASSERT(m_max_char == o.m_max_char);
// (A | B) - (A & B), but implemented directly with one linear sweep
// over the union of breakpoints.
return (*this | o) - (*this & o);
}
// -----------------------------------------------------------------------
// Display
// -----------------------------------------------------------------------
std::ostream& range_predicate::display(std::ostream& out) const {
if (m_ranges.empty()) {
return out << "[]";
}
out << "[";
bool first = true;
for (auto [lo, hi] : m_ranges) {
if (!first) out << ",";
first = false;
if (lo == hi)
out << lo;
else
out << lo << "-" << hi;
}
return out << "]";
}
}

View file

@ -0,0 +1,127 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
range_predicate.h
Abstract:
Specialized range-algebra over an unsigned character domain [0, max_char].
A range_predicate represents a subset of the character domain as a
sorted sequence of non-overlapping, non-adjacent, non-empty ranges:
[(lo_0, hi_0), (lo_1, hi_1), ...] with hi_i + 1 < lo_{i+1}.
The representation is canonical, so two range_predicates over the same
domain are extensionally equivalent iff their internal vectors are
elementwise equal.
All Boolean operations (union, intersection, complement, difference)
are linear in the total number of ranges and produce the canonical
representation.
Intended use:
* path conditions for symbolic derivative computation,
* OneStep predicates capturing length-1 acceptance,
* smart-constructor side conditions for regex rewrites such as
R & psi --> toregex(OneStep(R) & psi).
The type is a pure value: no ast_manager allocation occurs in its
construction or its Boolean operations. Conversion to and from
expr* is the responsibility of a separate translator (see callers
in seq_derive / seq_rewriter).
Authors:
Margus Veanes (veanes) 2026
--*/
#pragma once
#include "util/vector.h"
#include <iosfwd>
#include <utility>
namespace seq {
class range_predicate {
using range_t = std::pair<unsigned, unsigned>;
using ranges_t = svector<range_t>;
// Sorted by first; ranges are disjoint and non-adjacent;
// every range satisfies lo <= hi <= m_max_char.
ranges_t m_ranges;
unsigned m_max_char { 0 };
// Invariant check used in debug builds.
bool well_formed() const;
public:
range_predicate() = default;
explicit range_predicate(unsigned max_char) : m_max_char(max_char) {}
// ---------------- Factory functions ----------------
static range_predicate empty(unsigned max_char);
static range_predicate top(unsigned max_char);
static range_predicate singleton(unsigned c, unsigned max_char);
static range_predicate range(unsigned lo, unsigned hi, unsigned max_char);
// ---------------- Observers ----------------
unsigned max_char() const { return m_max_char; }
unsigned num_ranges() const { return m_ranges.size(); }
range_t operator[](unsigned i) const { return m_ranges[i]; }
ranges_t const& ranges() const { return m_ranges; }
bool is_empty() const { return m_ranges.empty(); }
bool is_top() const {
return m_ranges.size() == 1
&& m_ranges[0].first == 0
&& m_ranges[0].second == m_max_char;
}
bool is_singleton(unsigned& c) const {
if (m_ranges.size() != 1) return false;
if (m_ranges[0].first != m_ranges[0].second) return false;
c = m_ranges[0].first;
return true;
}
bool contains(unsigned c) const;
// Number of characters in the predicate (well-defined for any domain).
uint64_t cardinality() const;
// ---------------- Equality, ordering, hashing ----------------
bool equals(range_predicate const& o) const;
bool operator==(range_predicate const& o) const { return equals(o); }
bool operator!=(range_predicate const& o) const { return !equals(o); }
// Total order: lexicographic on the canonical range sequence,
// with shorter sequences ordered before longer prefixes.
// Predicates over different domains compare by max_char first.
bool operator<(range_predicate const& o) const;
bool less_than(range_predicate const& o) const { return *this < o; }
unsigned hash() const;
// ---------------- Boolean operations ----------------
range_predicate operator|(range_predicate const& o) const; // union
range_predicate operator&(range_predicate const& o) const; // intersection
range_predicate operator-(range_predicate const& o) const; // difference
range_predicate operator^(range_predicate const& o) const; // symmetric diff
range_predicate operator~() const; // complement
// ---------------- Display ----------------
std::ostream& display(std::ostream& out) const;
};
inline std::ostream& operator<<(std::ostream& out, range_predicate const& p) {
return p.display(out);
}
}

View file

@ -120,6 +120,7 @@ add_executable(test-z3
quant_elim.cpp
quant_solve.cpp
random.cpp
range_predicate.cpp
rational.cpp
rcf.cpp
region.cpp

View file

@ -113,6 +113,7 @@
X(api_bug) \
X(api_special_relations) \
X(arith_rewriter) \
X(range_predicate) \
X(check_assumptions) \
X(smt_context) \
X(theory_dl) \

View file

@ -0,0 +1,260 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
test/range_predicate.cpp
Abstract:
Unit tests for the range-algebra value type seq::range_predicate.
The tests exercise:
* factory constructors and canonical-form invariants,
* extensional equality and total ordering,
* Boolean operations (|, &, ~, -, ^) on hand-picked instances,
* exhaustive verification of de-Morgan and lattice laws on a
small character domain, by enumerating every subset.
Author:
Margus Veanes (veanes) 2026
--*/
#include "ast/rewriter/range_predicate.h"
#include "util/debug.h"
#include <cstdint>
#include <iostream>
#include <sstream>
using seq::range_predicate;
namespace {
// Build a range_predicate from a bitmask over [0, max_char] for testing.
range_predicate from_mask(uint64_t mask, unsigned max_char) {
range_predicate r = range_predicate::empty(max_char);
for (unsigned c = 0; c <= max_char; ++c)
if ((mask >> c) & 1u)
r = r | range_predicate::singleton(c, max_char);
return r;
}
// Convert a range_predicate back to a bitmask for cross-checking.
uint64_t to_mask(range_predicate const& r) {
uint64_t mask = 0;
for (unsigned c = 0; c <= r.max_char(); ++c)
if (r.contains(c))
mask |= (uint64_t(1) << c);
return mask;
}
void test_factories() {
auto e = range_predicate::empty(255);
ENSURE(e.is_empty());
ENSURE(!e.is_top());
ENSURE(e.num_ranges() == 0);
ENSURE(e.cardinality() == 0);
auto t = range_predicate::top(255);
ENSURE(!t.is_empty());
ENSURE(t.is_top());
ENSURE(t.num_ranges() == 1);
ENSURE(t.cardinality() == 256);
ENSURE(t.contains(0));
ENSURE(t.contains(255));
auto s = range_predicate::singleton(42, 255);
ENSURE(s.num_ranges() == 1);
ENSURE(s.cardinality() == 1);
ENSURE(s.contains(42));
ENSURE(!s.contains(41));
unsigned c = 0;
ENSURE(s.is_singleton(c));
ENSURE(c == 42);
auto r = range_predicate::range(10, 20, 255);
ENSURE(r.num_ranges() == 1);
ENSURE(r.cardinality() == 11);
ENSURE(r.contains(10));
ENSURE(r.contains(20));
ENSURE(!r.contains(9));
ENSURE(!r.contains(21));
// Reversed bounds produce empty.
auto bad = range_predicate::range(20, 10, 255);
ENSURE(bad.is_empty());
// Clipping at max_char.
auto clipped = range_predicate::range(200, 1000, 255);
ENSURE(clipped.num_ranges() == 1);
ENSURE(clipped[0] == std::make_pair(200u, 255u));
}
void test_equality_and_order() {
auto a = range_predicate::range(1, 5, 31);
auto b = range_predicate::range(1, 5, 31);
auto c = range_predicate::range(1, 6, 31);
ENSURE(a == b);
ENSURE(a != c);
ENSURE(a.hash() == b.hash());
ENSURE(a < c || c < a);
ENSURE(!(a < a));
auto empty = range_predicate::empty(31);
ENSURE(empty < a);
// Canonical merging of adjacent ranges.
auto d = range_predicate::range(0, 4, 31) | range_predicate::range(5, 10, 31);
auto e = range_predicate::range(0, 10, 31);
ENSURE(d == e);
}
void test_union_intersection_hand() {
unsigned const M = 31;
auto a = range_predicate::range(0, 4, M) | range_predicate::range(10, 14, M);
auto b = range_predicate::range(3, 11, M);
auto u = a | b; // [0,14]
ENSURE(u.num_ranges() == 1);
ENSURE(u[0] == std::make_pair(0u, 14u));
auto i = a & b; // [3,4] U [10,11]
ENSURE(i.num_ranges() == 2);
ENSURE(i[0] == std::make_pair(3u, 4u));
ENSURE(i[1] == std::make_pair(10u, 11u));
auto d = a - b; // [0,2] U [12,14]
ENSURE(d.num_ranges() == 2);
ENSURE(d[0] == std::make_pair(0u, 2u));
ENSURE(d[1] == std::make_pair(12u, 14u));
auto x = a ^ b; // [0,2] U [5,9] U [12,14]
ENSURE(x.num_ranges() == 3);
ENSURE(x[0] == std::make_pair(0u, 2u));
ENSURE(x[1] == std::make_pair(5u, 9u));
ENSURE(x[2] == std::make_pair(12u, 14u));
}
void test_complement_hand() {
unsigned const M = 10;
auto e = range_predicate::empty(M);
ENSURE((~e).is_top());
auto t = range_predicate::top(M);
ENSURE((~t).is_empty());
// ~([2,3] U [7,8]) = [0,1] U [4,6] U [9,10]
auto a = range_predicate::range(2, 3, M) | range_predicate::range(7, 8, M);
auto na = ~a;
ENSURE(na.num_ranges() == 3);
ENSURE(na[0] == std::make_pair(0u, 1u));
ENSURE(na[1] == std::make_pair(4u, 6u));
ENSURE(na[2] == std::make_pair(9u, 10u));
// ~([0,4]) = [5,10]
auto b = range_predicate::range(0, 4, M);
auto nb = ~b;
ENSURE(nb.num_ranges() == 1);
ENSURE(nb[0] == std::make_pair(5u, 10u));
// ~([5,10]) = [0,4]
auto cnb = ~nb;
ENSURE(cnb == b);
}
// Exhaustively verify the lattice / de-Morgan laws on a small domain
// by enumerating every possible subset (bitmask).
void test_exhaustive_laws() {
unsigned const M = 5; // 6 characters -> 64 subsets
unsigned const N = 1u << (M + 1);
for (unsigned i = 0; i < N; ++i) {
range_predicate A = from_mask(i, M);
ENSURE(to_mask(A) == i);
// ~ ~ A == A
ENSURE(~~A == A);
// A | ~A == top
ENSURE((A | ~A).is_top());
// A & ~A == empty
ENSURE((A & ~A).is_empty());
// cardinality matches popcount
unsigned pop = 0;
for (unsigned k = 0; k <= M; ++k) if ((i >> k) & 1u) ++pop;
ENSURE(A.cardinality() == pop);
}
for (unsigned i = 0; i < N; ++i) {
range_predicate A = from_mask(i, M);
for (unsigned j = 0; j < N; ++j) {
range_predicate B = from_mask(j, M);
// Bitmask reference semantics.
ENSURE(to_mask(A | B) == (i | j));
ENSURE(to_mask(A & B) == (i & j));
ENSURE(to_mask(A - B) == (i & ~j & ((1u << (M + 1)) - 1u)));
ENSURE(to_mask(A ^ B) == (i ^ j));
// de-Morgan
ENSURE(~(A | B) == (~A & ~B));
ENSURE(~(A & B) == (~A | ~B));
// Commutativity
ENSURE((A | B) == (B | A));
ENSURE((A & B) == (B & A));
// (A - B) == A & ~B
ENSURE((A - B) == (A & ~B));
// (A ^ B) == (A | B) - (A & B)
ENSURE((A ^ B) == ((A | B) - (A & B)));
// Extensional equality is reflexive on equal masks.
if (i == j) {
ENSURE(A == B);
ENSURE(A.hash() == B.hash());
}
}
}
}
void test_total_order_strict() {
unsigned const M = 5;
unsigned const N = 1u << (M + 1);
// Strict total order: for any distinct A, B exactly one of A<B, B<A holds.
for (unsigned i = 0; i < N; ++i) {
range_predicate A = from_mask(i, M);
ENSURE(!(A < A));
for (unsigned j = i + 1; j < N; ++j) {
range_predicate B = from_mask(j, M);
bool lt = A < B;
bool gt = B < A;
ENSURE(lt != gt);
ENSURE(lt || gt);
}
}
}
void test_display() {
std::ostringstream oss;
oss << range_predicate::empty(31);
ENSURE(oss.str() == "[]");
oss.str("");
oss << range_predicate::range(3, 7, 31);
ENSURE(oss.str() == "[3-7]");
oss.str("");
oss << range_predicate::singleton(9, 31);
ENSURE(oss.str() == "[9]");
oss.str("");
auto p = range_predicate::range(0, 2, 31) | range_predicate::singleton(5, 31);
oss << p;
ENSURE(oss.str() == "[0-2,5]");
}
}
void tst_range_predicate() {
test_factories();
test_equality_and_order();
test_union_intersection_hand();
test_complement_hand();
test_exhaustive_laws();
test_total_order_strict();
test_display();
std::cout << "range_predicate unit tests passed\n";
}