3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 15:16:29 +00:00

seq::derive — Stage 2: range_predicate-based path state

Replace seq::derive's m_intervals + m_intervals_start append-only
char-range stack and the imperative intersect_intervals /
exclude_interval helpers with a single canonical range_predicate
m_path_pred that tracks the feasible character set under the
current path.

* Add range_predicate_translator: pure AST -> range_predicate
  translator for the boolean-over-char_le fragment
  (true/false, eq with const, char_le with const, not/and/or any
  nesting). Returns false on the first sub-term outside the
  fragment so the caller can fall back to other reasoning.

* push_intervals_impl: translate the candidate predicate to a
  range_predicate and reduce path tracking to set arithmetic
  (intersection + subset/empty checks). The legacy top-level
  and/or descent is preserved for mixed char / non-char
  conditions.

* eval_range_cond: implication becomes subset_of and contradiction
  becomes !intersects, both linear in the number of ranges with no
  AST allocation.

* range_predicate gains subset_of / intersects / disjoint_from to
  support allocation-free path queries.

* path_save now stores the saved range_predicate by value; the
  stack switches from svector (CallDestructors=false) to vector
  because range_predicate owns an inner svector.

Tests: 91/91 pass with /a, including the new
range_predicate_translator unit test exercising true/false, eq,
char_le, and/or/not, and De Morgan agreement.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Margus Veanes 2026-06-14 15:11:20 -07:00
parent 904090eec0
commit 4bef7d513c
10 changed files with 405 additions and 128 deletions

View file

@ -36,6 +36,7 @@ z3_add_component(rewriter
push_app_ite.cpp
quant_hoist.cpp
range_predicate.cpp
range_predicate_translator.cpp
recfun_rewriter.cpp
rewriter.cpp
seq_axioms.cpp

View file

@ -101,6 +101,41 @@ namespace seq {
return n;
}
bool range_predicate::subset_of(range_predicate const& o) const {
SASSERT(m_max_char == o.m_max_char);
// Every range of *this must lie inside some range of o.
// Both range sequences are sorted, so we can walk them linearly.
unsigned j = 0;
const unsigned n = m_ranges.size();
const unsigned m = o.m_ranges.size();
for (unsigned i = 0; i < n; ++i) {
auto [a_lo, a_hi] = m_ranges[i];
while (j < m && o.m_ranges[j].second < a_lo)
++j;
if (j == m)
return false;
auto [b_lo, b_hi] = o.m_ranges[j];
if (a_lo < b_lo || a_hi > b_hi)
return false;
}
return true;
}
bool range_predicate::intersects(range_predicate const& o) const {
SASSERT(m_max_char == o.m_max_char);
unsigned i = 0, j = 0;
const unsigned n = m_ranges.size();
const unsigned m = o.m_ranges.size();
while (i < n && j < m) {
auto [a_lo, a_hi] = m_ranges[i];
auto [b_lo, b_hi] = o.m_ranges[j];
if (a_hi < b_lo) ++i;
else if (b_hi < a_lo) ++j;
else return true;
}
return false;
}
// -----------------------------------------------------------------------
// Equality, ordering, hashing
// -----------------------------------------------------------------------

View file

@ -90,6 +90,12 @@ namespace seq {
}
bool contains(unsigned c) const;
// (*this) is a subset of o.
bool subset_of(range_predicate const& o) const;
// (*this) and o share at least one character.
bool intersects(range_predicate const& o) const;
bool disjoint_from(range_predicate const& o) const { return !intersects(o); }
// Number of characters in the predicate (well-defined for any domain).
uint64_t cardinality() const;

View file

@ -0,0 +1,102 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
range_predicate_translator.cpp
Abstract:
Implementation of seq::range_predicate_translator. See header for the
algebraic specification of the translatable fragment.
Authors:
Margus Veanes (veanes) 2026
--*/
#include "ast/rewriter/range_predicate_translator.h"
namespace seq {
bool range_predicate_translator::translate(expr* ele, expr* cond,
range_predicate& out) const {
unsigned const max_char = m_u.max_char();
if (m.is_true(cond)) {
out = range_predicate::top(max_char);
return true;
}
if (m.is_false(cond)) {
out = range_predicate::empty(max_char);
return true;
}
expr* a = nullptr;
expr* b = nullptr;
unsigned c = 0;
if (m.is_eq(cond, a, b)) {
if (a == ele && m_u.is_const_char(b, c)) {
out = range_predicate::singleton(c, max_char);
return true;
}
if (b == ele && m_u.is_const_char(a, c)) {
out = range_predicate::singleton(c, max_char);
return true;
}
return false;
}
if (m_u.is_char_le(cond, a, b)) {
// ele <= c --> [0, c]
if (a == ele && m_u.is_const_char(b, c)) {
out = range_predicate::range(0, c, max_char);
return true;
}
// c <= ele --> [c, max_char]
if (b == ele && m_u.is_const_char(a, c)) {
out = range_predicate::range(c, max_char, max_char);
return true;
}
return false;
}
expr* arg = nullptr;
if (m.is_not(cond, arg)) {
range_predicate tmp(max_char);
if (!translate(ele, arg, tmp))
return false;
out = ~tmp;
return true;
}
if (m.is_and(cond)) {
range_predicate acc = range_predicate::top(max_char);
for (expr* sub : *to_app(cond)) {
range_predicate p(max_char);
if (!translate(ele, sub, p))
return false;
acc = acc & p;
}
out = acc;
return true;
}
if (m.is_or(cond)) {
range_predicate acc = range_predicate::empty(max_char);
for (expr* sub : *to_app(cond)) {
range_predicate p(max_char);
if (!translate(ele, sub, p))
return false;
acc = acc | p;
}
out = acc;
return true;
}
return false;
}
}

View file

@ -0,0 +1,57 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
range_predicate_translator.h
Abstract:
Translates Boolean conditions over a designated character element into
seq::range_predicate values.
Symbolic derivative computation produces transition regexes whose ITE
conditions are character predicates over a free element variable
(the de Bruijn variable 0). This translator lifts such conditions
into the range-algebra value representation so that path reasoning
becomes pure value-level set arithmetic instead of AST-level boolean
rewriting.
Translated fragment (any nesting of the following):
true, false
eq(ele, char_const) eq(char_const, ele)
char_le(ele, char_const) char_le(char_const, ele)
and(...), or(...), not(...)
Conditions outside this fragment are reported as non-translatable so
the caller can fall back to other reasoning.
Authors:
Margus Veanes (veanes) 2026
--*/
#pragma once
#include "ast/rewriter/range_predicate.h"
#include "ast/seq_decl_plugin.h"
namespace seq {
class range_predicate_translator {
ast_manager& m;
seq_util& m_u;
public:
range_predicate_translator(ast_manager& m, seq_util& u) : m(m), m_u(u) {}
// Translate cond into the range_predicate denoting
// { c in [0, max_char] : cond[ele := c] evaluates to true }.
// Returns true and stores the result in out on success; returns
// false (leaving out untouched) if cond contains a sub-term that
// is not in the translatable fragment described above.
bool translate(expr* ele, expr* cond, range_predicate& out) const;
};
}

View file

@ -89,9 +89,7 @@ namespace seq {
m_depth = 0;
// Initialize path state for inline pruning
m_path.reset();
m_intervals.reset();
m_intervals.push_back({0u, u().max_char()});
m_intervals_start = 0;
m_path_pred = range_predicate::top(u().max_char());
m_path_expr = m.mk_true();
result = derive_rec(r);
m_top_cache.insert(r, result);
@ -915,31 +913,27 @@ namespace seq {
// Check if (c, sign) is already determined by the path
lbool cv = eval_path_cond(c);
if (cv == l_true && !sign) return l_true; // c implied true, push(c,false) is redundant
if (cv == l_false && sign) return l_true; // c implied false, push(c,true) is redundant
if (cv == l_false && sign) return l_true; // c implied false, push(c,true) is redundant
if (cv == l_true && sign) return l_false; // c implied true, push(c,true) contradicts
if (cv == l_false && !sign) return l_false; // c implied false, push(c,false) contradicts
// Save current state
unsigned saved_path_sz = m_path.size();
unsigned saved_intervals_sz = m_intervals.size();
unsigned saved_intervals_start = m_intervals_start;
range_predicate saved_path_pred = m_path_pred;
expr* saved_path_expr = m_path_expr;
// Push atoms onto path and check for contradiction or implication
lbool result = push_path_atoms(c, sign);
if (result != l_undef) {
m_path.shrink(saved_path_sz);
m_intervals.shrink(saved_intervals_sz);
m_intervals_start = saved_intervals_start;
return result;
}
// Update intervals
// Update path predicate (feasible character set)
result = push_intervals_impl(c, sign);
if (result != l_undef) {
m_path.shrink(saved_path_sz);
m_intervals.shrink(saved_intervals_sz);
m_intervals_start = saved_intervals_start;
m_path_pred = std::move(saved_path_pred);
return result;
}
@ -949,16 +943,15 @@ namespace seq {
m_trail.push_back(m_path_expr);
// Commit: save state for pop()
m_path_stack.push_back({ saved_path_sz, saved_intervals_sz, saved_intervals_start, saved_path_expr });
m_path_stack.push_back({ saved_path_sz, std::move(saved_path_pred), saved_path_expr });
return l_undef;
}
void derive::pop() {
SASSERT(!m_path_stack.empty());
auto const& saved = m_path_stack.back();
auto& saved = m_path_stack.back();
m_path.shrink(saved.path_sz);
m_intervals.shrink(saved.intervals_sz);
m_intervals_start = saved.intervals_start;
m_path_pred = std::move(saved.saved_path_pred);
m_path_expr = saved.path_expr;
m_path_stack.pop_back();
}
@ -1083,50 +1076,35 @@ namespace seq {
return l_undef;
}
// Update m_intervals based on the condition. Returns l_true if implied, l_false if inconsistent, l_undef if pushed.
// Operates on the active suffix m_intervals[m_intervals_start..end].
// On modification, appends new intervals and updates m_intervals_start.
// Update m_path_pred with the feasible-character constraint induced by
// (c, sign). Returns l_true if already implied (no change), l_false if
// the resulting set becomes empty (contradiction), and l_undef otherwise.
lbool derive::push_intervals_impl(expr* c, bool sign) {
unsigned lo = 0, hi = 0;
bool negated = false;
if (m_util.is_char_const_range(m_ele, c, lo, hi, negated)) {
bool effective_neg = (negated != sign);
if (!effective_neg) {
if (lo <= hi) {
// Check if current intervals already imply [lo,hi]
bool already_subset = true;
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
if (m_intervals[i].first < lo || m_intervals[i].second > hi) { already_subset = false; break; }
}
if (already_subset) return l_true;
intersect_intervals(lo, hi);
} else {
// lo > hi means empty range — contradiction
return l_false;
}
} else {
if (lo <= hi) {
// Check if current intervals already exclude [lo,hi]
bool already_excluded = true;
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
if (m_intervals[i].first <= hi && m_intervals[i].second >= lo) { already_excluded = false; break; }
}
if (already_excluded) return l_true;
exclude_interval(lo, hi);
}
}
} else if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
range_predicate p(u().max_char());
if (m_pred_xlate.translate(m_ele, c, p)) {
range_predicate constraint = sign ? ~p : std::move(p);
if (m_path_pred.subset_of(constraint))
return l_true;
range_predicate new_pred = m_path_pred & constraint;
if (new_pred.is_empty())
return l_false;
m_path_pred = std::move(new_pred);
return l_undef;
}
// Translation failed: descend into a top-level and (under sign=false)
// or top-level or (under sign=true) to extract translatable
// sub-conditions. Non-translatable arguments are ignored and only
// weaken the implied/undef return.
if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
bool all_implied = true;
for (expr* arg : *to_app(c)) {
lbool r = push_intervals_impl(arg, sign);
if (r == l_false) return l_false;
if (r == l_undef) all_implied = false;
}
unsigned n = m_intervals.size() - m_intervals_start;
return all_implied ? l_true : (n == 0 ? l_false : l_undef);
return all_implied ? l_true : (m_path_pred.is_empty() ? l_false : l_undef);
}
unsigned n = m_intervals.size() - m_intervals_start;
return n == 0 ? l_false : l_undef;
return m_path_pred.is_empty() ? l_false : l_undef;
}
// Evaluate a condition against the current path and intervals.
@ -1219,77 +1197,18 @@ namespace seq {
}
lbool derive::eval_range_cond(expr* c) {
unsigned n = m_intervals.size() - m_intervals_start;
if (n == 0)
if (m_path_pred.is_empty())
return l_false;
unsigned lo = 0, hi = 0;
bool negated = false;
if (!m_util.is_char_const_range(m_ele, c, lo, hi, negated))
range_predicate p(u().max_char());
if (!m_pred_xlate.translate(m_ele, c, p))
return l_undef;
if (lo > hi) {
return negated ? l_true : l_false;
}
// Check if [lo, hi] overlaps with intervals and/or contains all intervals
bool any_overlap = false;
bool all_contained = true;
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
auto [r_lo, r_hi] = m_intervals[i];
if (std::max(r_lo, lo) <= std::min(r_hi, hi))
any_overlap = true;
if (r_lo < lo || r_hi > hi)
all_contained = false;
}
if (!negated) {
if (!any_overlap) return l_false;
if (all_contained) return l_true;
} else {
if (all_contained) return l_false;
if (!any_overlap) return l_true;
}
// c is implied true iff every feasible char satisfies it.
if (m_path_pred.subset_of(p))
return l_true;
// c is implied false iff no feasible char satisfies it.
if (!m_path_pred.intersects(p))
return l_false;
return l_undef;
}
// Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi]
void derive::intersect_intervals(unsigned lo, unsigned hi) {
// Copy active suffix to end, update start, then filter
unsigned old_sz = m_intervals.size();
for (unsigned i = m_intervals_start; i < old_sz; ++i)
m_intervals.push_back(m_intervals[i]);
m_intervals_start = old_sz;
// Filter in-place within new suffix: drop intervals disjoint from [lo,hi],
// keep the intersection for overlapping ones.
unsigned j = m_intervals_start;
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
auto [lo1, hi1] = m_intervals[i];
if (hi < lo1 || lo > hi1)
continue; // disjoint with this interval — drop it
m_intervals[j++] = {std::max(lo1, lo), std::min(hi1, hi)};
}
m_intervals.shrink(j);
}
// Exclude [lo, hi] from the active suffix m_intervals[m_intervals_start..end]
void derive::exclude_interval(unsigned lo, unsigned hi) {
unsigned max_char = u().max_char();
if (lo == 0 && hi >= max_char) { m_intervals_start = m_intervals.size(); return; }
if (lo == 0) { intersect_intervals(hi + 1, max_char); return; }
if (hi >= max_char) { intersect_intervals(0, lo - 1); return; }
// Each interval [ilo, ihi] minus [lo, hi] → up to 2 pieces
// Append new results past the end, then move start
unsigned old_start = m_intervals_start;
unsigned old_sz = m_intervals.size();
for (unsigned i = old_start; i < old_sz; ++i) {
auto [ilo, ihi] = m_intervals[i];
if (ihi < lo || ilo > hi) {
m_intervals.push_back(m_intervals[i]);
} else {
if (ilo < lo)
m_intervals.push_back({ilo, lo - 1});
if (ihi > hi)
m_intervals.push_back({hi + 1, ihi});
}
}
m_intervals_start = old_sz;
}
}

View file

@ -28,6 +28,8 @@ Authors:
#include "ast/arith_decl_plugin.h"
#include "ast/array_decl_plugin.h"
#include "ast/rewriter/bool_rewriter.h"
#include "ast/rewriter/range_predicate.h"
#include "ast/rewriter/range_predicate_translator.h"
#include "util/obj_pair_hashtable.h"
#include "util/obj_triple_hashtable.h"
#include <functional>
@ -81,16 +83,18 @@ namespace seq {
expr_ref m_ele;
// Path state for inline pruning during mk_inter/mk_union/mk_complement
using intervals_t = svector<std::pair<unsigned, unsigned>>;
// Path: vector of signed atoms
svector<std::pair<expr*, bool>> m_path;
// Intervals: feasible character ranges under current path (append-only)
intervals_t m_intervals;
unsigned m_intervals_start { 0 };
// Stack of saved states for push/pop
struct path_save { unsigned path_sz; unsigned intervals_sz; unsigned intervals_start; expr* path_expr; };
svector<path_save> m_path_stack;
// Feasible characters for the current path, expressed as a sorted
// disjoint range set over [0, max_char].
range_predicate m_path_pred;
// Translator from AST char-predicates to range_predicate values.
range_predicate_translator m_pred_xlate { m, m_util };
// Stack of saved states for push/pop. range_predicate has a non-trivial
// destructor (owns a svector), so use vector<> (CallDestructors=true).
struct path_save { unsigned path_sz; range_predicate saved_path_pred; expr* path_expr; };
vector<path_save> m_path_stack;
// Boolean expression encoding of current path (for cache keys)
expr_ref m_path_expr;
@ -159,8 +163,6 @@ namespace seq {
// Condition evaluation helpers
lbool eval_cond(expr* cond);
lbool eval_range_cond(expr* c);
void intersect_intervals(unsigned lo, unsigned hi);
void exclude_interval(unsigned lo, unsigned hi);
sort* re_sort(expr* r) { return r->get_sort(); }
sort* seq_sort(expr* r) { sort* s = nullptr; m_util.is_re(r, s); return s; }

View file

@ -121,6 +121,7 @@ add_executable(test-z3
quant_solve.cpp
random.cpp
range_predicate.cpp
range_predicate_translator.cpp
rational.cpp
rcf.cpp
region.cpp

View file

@ -114,6 +114,7 @@
X(api_special_relations) \
X(arith_rewriter) \
X(range_predicate) \
X(range_predicate_translator) \
X(check_assumptions) \
X(smt_context) \
X(theory_dl) \

View file

@ -0,0 +1,153 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
range_predicate_translator.cpp - unit tests
--*/
#include "ast/rewriter/range_predicate_translator.h"
#include "ast/reg_decl_plugins.h"
#include "ast/ast_pp.h"
#include "util/util.h"
#include <iostream>
namespace {
using seq::range_predicate;
using seq::range_predicate_translator;
static void check(bool ok, char const* what) {
if (!ok) {
std::cerr << "range_predicate_translator FAILED: " << what << "\n";
ENSURE(false);
}
}
static void run() {
ast_manager m;
reg_decl_plugins(m);
seq_util u(m);
range_predicate_translator xlate(m, u);
unsigned const M = u.max_char();
sort* char_sort = u.mk_char_sort();
expr_ref ele(m.mk_var(0, char_sort), m);
expr_ref c5(u.mk_char(5), m);
expr_ref c10(u.mk_char(10), m);
// true / false
{
range_predicate p(M);
check(xlate.translate(ele, m.mk_true(), p) && p.is_top(), "true -> top");
check(xlate.translate(ele, m.mk_false(), p) && p.is_empty(), "false -> empty");
}
// ele = 5
{
range_predicate p(M);
expr_ref cond(m.mk_eq(ele, c5), m);
check(xlate.translate(ele, cond, p), "ele = 5 translates");
check(p == range_predicate::singleton(5, M), "ele = 5 -> {5}");
}
// 5 = ele
{
range_predicate p(M);
expr_ref cond(m.mk_eq(c5, ele), m);
check(xlate.translate(ele, cond, p), "5 = ele translates");
check(p == range_predicate::singleton(5, M), "5 = ele -> {5}");
}
// ele <= 10
{
range_predicate p(M);
expr_ref cond(u.mk_le(ele, c10), m);
check(xlate.translate(ele, cond, p), "ele <= 10 translates");
check(p == range_predicate::range(0, 10, M), "ele <= 10 -> [0,10]");
}
// 5 <= ele
{
range_predicate p(M);
expr_ref cond(u.mk_le(c5, ele), m);
check(xlate.translate(ele, cond, p), "5 <= ele translates");
check(p == range_predicate::range(5, M, M), "5 <= ele -> [5,M]");
}
// 5 <= ele AND ele <= 10 --> [5, 10]
{
range_predicate p(M);
expr_ref cond(m.mk_and(u.mk_le(c5, ele), u.mk_le(ele, c10)), m);
check(xlate.translate(ele, cond, p), "[5..10] translates");
check(p == range_predicate::range(5, 10, M), "and -> intersection");
}
// not(ele <= 4) AND not(10 <= ele) --> [5, 9]
{
range_predicate p(M);
expr_ref c4(u.mk_char(4), m);
expr_ref cond(m.mk_and(m.mk_not(u.mk_le(ele, c4)), m.mk_not(u.mk_le(c10, ele))), m);
check(xlate.translate(ele, cond, p), "neg ranges translate");
check(p == range_predicate::range(5, 9, M), "negations combine");
}
// ele = 5 OR ele = 10 --> {5} u {10}
{
range_predicate p(M);
expr_ref cond(m.mk_or(m.mk_eq(ele, c5), m.mk_eq(ele, c10)), m);
check(xlate.translate(ele, cond, p), "or translates");
range_predicate expected = range_predicate::singleton(5, M)
| range_predicate::singleton(10, M);
check(p == expected, "or -> union");
}
// not(ele = 5) --> [0,4] u [6,M]
{
range_predicate p(M);
expr_ref cond(m.mk_not(m.mk_eq(ele, c5)), m);
check(xlate.translate(ele, cond, p), "not eq translates");
check(p == ~range_predicate::singleton(5, M), "not eq -> complement");
}
// Non-translatable: ele = ele (eq without const)
{
range_predicate p(M);
expr_ref cond(m.mk_eq(ele, ele), m);
check(!xlate.translate(ele, cond, p), "ele = ele rejected");
}
// Non-translatable: 5 <= other where other is not ele
{
range_predicate p(M);
expr_ref other(m.mk_var(1, char_sort), m);
expr_ref cond(u.mk_le(c5, other), m);
check(!xlate.translate(ele, cond, p), "5 <= other rejected (no ele)");
}
// Non-translatable: AND with one bad child
{
range_predicate p(M);
expr_ref other(m.mk_var(1, char_sort), m);
expr_ref bad(m.mk_eq(ele, other), m);
expr_ref cond(m.mk_and(u.mk_le(c5, ele), bad), m);
check(!xlate.translate(ele, cond, p), "and with bad child rejected");
}
// Non-translatable: OR with one bad child
{
range_predicate p(M);
expr_ref other(m.mk_var(1, char_sort), m);
expr_ref bad(u.mk_le(c5, other), m);
expr_ref cond(m.mk_or(m.mk_eq(ele, c5), bad), m);
check(!xlate.translate(ele, cond, p), "or with bad child rejected");
}
// Cross-check translation of (not (and A B)) = (or (not A) (not B))
{
range_predicate p1(M), p2(M);
expr_ref A(u.mk_le(c5, ele), m);
expr_ref B(u.mk_le(ele, c10), m);
expr_ref de_morgan_lhs(m.mk_not(m.mk_and(A, B)), m);
expr_ref de_morgan_rhs(m.mk_or(m.mk_not(A.get()), m.mk_not(B.get())), m);
check(xlate.translate(ele, de_morgan_lhs, p1), "not(and) translates");
check(xlate.translate(ele, de_morgan_rhs, p2), "or(not,not) translates");
check(p1 == p2, "de Morgan agrees");
}
}
}
void tst_range_predicate_translator() {
run();
std::cout << "range_predicate_translator tests passed\n";
}