mirror of
https://github.com/Z3Prover/z3
synced 2026-07-05 14:56:11 +00:00
Derive with ranges (#9965)
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Margus Veanes <margus@microsoft.com> Co-authored-by: Margus Veanes <veanes@users.noreply.github.com>
This commit is contained in:
parent
e76239ceda
commit
15f33f458d
27 changed files with 3597 additions and 1541 deletions
|
|
@ -115,15 +115,18 @@ add_executable(test-z3
|
|||
polynomial_factorization.cpp
|
||||
polynorm.cpp
|
||||
prime_generator.cpp
|
||||
seq_regex_bisim.cpp
|
||||
proof_checker.cpp
|
||||
qe_arith.cpp
|
||||
mbp_qel.cpp
|
||||
quant_elim.cpp
|
||||
quant_solve.cpp
|
||||
random.cpp
|
||||
range_predicate.cpp
|
||||
rational.cpp
|
||||
rcf.cpp
|
||||
region.cpp
|
||||
regex_range_collapse.cpp
|
||||
sat_local_search.cpp
|
||||
sat_lookahead.cpp
|
||||
sat_user_scope.cpp
|
||||
|
|
|
|||
|
|
@ -113,6 +113,8 @@
|
|||
X(api_bug) \
|
||||
X(api_special_relations) \
|
||||
X(arith_rewriter) \
|
||||
X(range_predicate) \
|
||||
X(regex_range_collapse) \
|
||||
X(seq_rewriter) \
|
||||
X(check_assumptions) \
|
||||
X(smt_context) \
|
||||
|
|
@ -195,6 +197,7 @@
|
|||
X(finite_set) \
|
||||
X(finite_set_rewriter) \
|
||||
X(fpa) \
|
||||
X(seq_regex_bisim) \
|
||||
X(term_enumeration) \
|
||||
X(lcube)
|
||||
|
||||
|
|
|
|||
260
src/test/range_predicate.cpp
Normal file
260
src/test/range_predicate.cpp
Normal file
|
|
@ -0,0 +1,260 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
test/range_predicate.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Unit tests for the range-algebra value type seq::range_predicate.
|
||||
|
||||
The tests exercise:
|
||||
* factory constructors and canonical-form invariants,
|
||||
* extensional equality and total ordering,
|
||||
* Boolean operations (|, &, ~, -, ^) on hand-picked instances,
|
||||
* exhaustive verification of de-Morgan and lattice laws on a
|
||||
small character domain, by enumerating every subset.
|
||||
|
||||
Author:
|
||||
|
||||
Margus Veanes (veanes) 2026
|
||||
|
||||
--*/
|
||||
|
||||
#include "ast/rewriter/seq_range_predicate.h"
|
||||
#include "util/debug.h"
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
using seq::range_predicate;
|
||||
|
||||
namespace {
|
||||
|
||||
// Build a range_predicate from a bitmask over [0, max_char] for testing.
|
||||
range_predicate from_mask(uint64_t mask, unsigned max_char) {
|
||||
range_predicate r = range_predicate::empty(max_char);
|
||||
for (unsigned c = 0; c <= max_char; ++c)
|
||||
if ((mask >> c) & 1u)
|
||||
r = r | range_predicate::singleton(c, max_char);
|
||||
return r;
|
||||
}
|
||||
|
||||
// Convert a range_predicate back to a bitmask for cross-checking.
|
||||
uint64_t to_mask(range_predicate const& r) {
|
||||
uint64_t mask = 0;
|
||||
for (unsigned c = 0; c <= r.max_char(); ++c)
|
||||
if (r.contains(c))
|
||||
mask |= (uint64_t(1) << c);
|
||||
return mask;
|
||||
}
|
||||
|
||||
void test_factories() {
|
||||
auto e = range_predicate::empty(255);
|
||||
ENSURE(e.is_empty());
|
||||
ENSURE(!e.is_top());
|
||||
ENSURE(e.num_ranges() == 0);
|
||||
ENSURE(e.cardinality() == 0);
|
||||
|
||||
auto t = range_predicate::top(255);
|
||||
ENSURE(!t.is_empty());
|
||||
ENSURE(t.is_top());
|
||||
ENSURE(t.num_ranges() == 1);
|
||||
ENSURE(t.cardinality() == 256);
|
||||
ENSURE(t.contains(0));
|
||||
ENSURE(t.contains(255));
|
||||
|
||||
auto s = range_predicate::singleton(42, 255);
|
||||
ENSURE(s.num_ranges() == 1);
|
||||
ENSURE(s.cardinality() == 1);
|
||||
ENSURE(s.contains(42));
|
||||
ENSURE(!s.contains(41));
|
||||
unsigned c = 0;
|
||||
ENSURE(s.is_singleton(c));
|
||||
ENSURE(c == 42);
|
||||
|
||||
auto r = range_predicate::range(10, 20, 255);
|
||||
ENSURE(r.num_ranges() == 1);
|
||||
ENSURE(r.cardinality() == 11);
|
||||
ENSURE(r.contains(10));
|
||||
ENSURE(r.contains(20));
|
||||
ENSURE(!r.contains(9));
|
||||
ENSURE(!r.contains(21));
|
||||
|
||||
// Reversed bounds produce empty.
|
||||
auto bad = range_predicate::range(20, 10, 255);
|
||||
ENSURE(bad.is_empty());
|
||||
|
||||
// Clipping at max_char.
|
||||
auto clipped = range_predicate::range(200, 1000, 255);
|
||||
ENSURE(clipped.num_ranges() == 1);
|
||||
ENSURE(clipped[0] == std::make_pair(200u, 255u));
|
||||
}
|
||||
|
||||
void test_equality_and_order() {
|
||||
auto a = range_predicate::range(1, 5, 31);
|
||||
auto b = range_predicate::range(1, 5, 31);
|
||||
auto c = range_predicate::range(1, 6, 31);
|
||||
ENSURE(a == b);
|
||||
ENSURE(a != c);
|
||||
ENSURE(a.hash() == b.hash());
|
||||
ENSURE(a < c || c < a);
|
||||
ENSURE(!(a < a));
|
||||
|
||||
auto empty = range_predicate::empty(31);
|
||||
ENSURE(empty < a);
|
||||
|
||||
// Canonical merging of adjacent ranges.
|
||||
auto d = range_predicate::range(0, 4, 31) | range_predicate::range(5, 10, 31);
|
||||
auto e = range_predicate::range(0, 10, 31);
|
||||
ENSURE(d == e);
|
||||
}
|
||||
|
||||
void test_union_intersection_hand() {
|
||||
unsigned const M = 31;
|
||||
auto a = range_predicate::range(0, 4, M) | range_predicate::range(10, 14, M);
|
||||
auto b = range_predicate::range(3, 11, M);
|
||||
|
||||
auto u = a | b; // [0,14]
|
||||
ENSURE(u.num_ranges() == 1);
|
||||
ENSURE(u[0] == std::make_pair(0u, 14u));
|
||||
|
||||
auto i = a & b; // [3,4] U [10,11]
|
||||
ENSURE(i.num_ranges() == 2);
|
||||
ENSURE(i[0] == std::make_pair(3u, 4u));
|
||||
ENSURE(i[1] == std::make_pair(10u, 11u));
|
||||
|
||||
auto d = a - b; // [0,2] U [12,14]
|
||||
ENSURE(d.num_ranges() == 2);
|
||||
ENSURE(d[0] == std::make_pair(0u, 2u));
|
||||
ENSURE(d[1] == std::make_pair(12u, 14u));
|
||||
|
||||
auto x = a ^ b; // [0,2] U [5,9] U [12,14]
|
||||
ENSURE(x.num_ranges() == 3);
|
||||
ENSURE(x[0] == std::make_pair(0u, 2u));
|
||||
ENSURE(x[1] == std::make_pair(5u, 9u));
|
||||
ENSURE(x[2] == std::make_pair(12u, 14u));
|
||||
}
|
||||
|
||||
void test_complement_hand() {
|
||||
unsigned const M = 10;
|
||||
auto e = range_predicate::empty(M);
|
||||
ENSURE((~e).is_top());
|
||||
auto t = range_predicate::top(M);
|
||||
ENSURE((~t).is_empty());
|
||||
|
||||
// ~([2,3] U [7,8]) = [0,1] U [4,6] U [9,10]
|
||||
auto a = range_predicate::range(2, 3, M) | range_predicate::range(7, 8, M);
|
||||
auto na = ~a;
|
||||
ENSURE(na.num_ranges() == 3);
|
||||
ENSURE(na[0] == std::make_pair(0u, 1u));
|
||||
ENSURE(na[1] == std::make_pair(4u, 6u));
|
||||
ENSURE(na[2] == std::make_pair(9u, 10u));
|
||||
|
||||
// ~([0,4]) = [5,10]
|
||||
auto b = range_predicate::range(0, 4, M);
|
||||
auto nb = ~b;
|
||||
ENSURE(nb.num_ranges() == 1);
|
||||
ENSURE(nb[0] == std::make_pair(5u, 10u));
|
||||
|
||||
// ~([5,10]) = [0,4]
|
||||
auto cnb = ~nb;
|
||||
ENSURE(cnb == b);
|
||||
}
|
||||
|
||||
// Exhaustively verify the lattice / de-Morgan laws on a small domain
|
||||
// by enumerating every possible subset (bitmask).
|
||||
void test_exhaustive_laws() {
|
||||
unsigned const M = 5; // 6 characters -> 64 subsets
|
||||
unsigned const N = 1u << (M + 1);
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
range_predicate A = from_mask(i, M);
|
||||
ENSURE(to_mask(A) == i);
|
||||
// ~ ~ A == A
|
||||
ENSURE(~~A == A);
|
||||
// A | ~A == top
|
||||
ENSURE((A | ~A).is_top());
|
||||
// A & ~A == empty
|
||||
ENSURE((A & ~A).is_empty());
|
||||
// cardinality matches popcount
|
||||
unsigned pop = 0;
|
||||
for (unsigned k = 0; k <= M; ++k) if ((i >> k) & 1u) ++pop;
|
||||
ENSURE(A.cardinality() == pop);
|
||||
}
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
range_predicate A = from_mask(i, M);
|
||||
for (unsigned j = 0; j < N; ++j) {
|
||||
range_predicate B = from_mask(j, M);
|
||||
// Bitmask reference semantics.
|
||||
ENSURE(to_mask(A | B) == (i | j));
|
||||
ENSURE(to_mask(A & B) == (i & j));
|
||||
ENSURE(to_mask(A - B) == (i & ~j & ((1u << (M + 1)) - 1u)));
|
||||
ENSURE(to_mask(A ^ B) == (i ^ j));
|
||||
// de-Morgan
|
||||
ENSURE(~(A | B) == (~A & ~B));
|
||||
ENSURE(~(A & B) == (~A | ~B));
|
||||
// Commutativity
|
||||
ENSURE((A | B) == (B | A));
|
||||
ENSURE((A & B) == (B & A));
|
||||
// (A - B) == A & ~B
|
||||
ENSURE((A - B) == (A & ~B));
|
||||
// (A ^ B) == (A | B) - (A & B)
|
||||
ENSURE((A ^ B) == ((A | B) - (A & B)));
|
||||
// Extensional equality is reflexive on equal masks.
|
||||
if (i == j) {
|
||||
ENSURE(A == B);
|
||||
ENSURE(A.hash() == B.hash());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_total_order_strict() {
|
||||
unsigned const M = 5;
|
||||
unsigned const N = 1u << (M + 1);
|
||||
// Strict total order: for any distinct A, B exactly one of A<B, B<A holds.
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
range_predicate A = from_mask(i, M);
|
||||
ENSURE(!(A < A));
|
||||
for (unsigned j = i + 1; j < N; ++j) {
|
||||
range_predicate B = from_mask(j, M);
|
||||
bool lt = A < B;
|
||||
bool gt = B < A;
|
||||
ENSURE(lt != gt);
|
||||
ENSURE(lt || gt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_display() {
|
||||
std::ostringstream oss;
|
||||
oss << range_predicate::empty(31);
|
||||
ENSURE(oss.str() == "[]");
|
||||
|
||||
oss.str("");
|
||||
oss << range_predicate::range(3, 7, 31);
|
||||
ENSURE(oss.str() == "[3-7]");
|
||||
|
||||
oss.str("");
|
||||
oss << range_predicate::singleton(9, 31);
|
||||
ENSURE(oss.str() == "[9]");
|
||||
|
||||
oss.str("");
|
||||
auto p = range_predicate::range(0, 2, 31) | range_predicate::singleton(5, 31);
|
||||
oss << p;
|
||||
ENSURE(oss.str() == "[0-2,5]");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void tst_range_predicate() {
|
||||
test_factories();
|
||||
test_equality_and_order();
|
||||
test_union_intersection_hand();
|
||||
test_complement_hand();
|
||||
test_exhaustive_laws();
|
||||
test_total_order_strict();
|
||||
test_display();
|
||||
std::cout << "range_predicate unit tests passed\n";
|
||||
}
|
||||
260
src/test/regex_range_collapse.cpp
Normal file
260
src/test/regex_range_collapse.cpp
Normal file
|
|
@ -0,0 +1,260 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
regex_range_collapse.cpp - unit tests
|
||||
|
||||
--*/
|
||||
|
||||
#include "ast/rewriter/seq_range_collapse.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "util/util.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace {
|
||||
|
||||
using seq::range_predicate;
|
||||
using seq::regex_to_range_predicate;
|
||||
using seq::range_predicate_to_regex;
|
||||
|
||||
static void check(bool ok, char const* what) {
|
||||
if (!ok) {
|
||||
std::cerr << "regex_range_collapse FAILED: " << what << "\n";
|
||||
ENSURE(false);
|
||||
}
|
||||
}
|
||||
|
||||
static expr_ref mk_singleton_str(seq_util& u, unsigned c) {
|
||||
return expr_ref(u.str.mk_string(zstring(c)), u.get_manager());
|
||||
}
|
||||
|
||||
static bool extract_range_chars(seq_util& u, expr* e, unsigned& lo, unsigned& hi) {
|
||||
expr* lo_e = nullptr; expr* hi_e = nullptr;
|
||||
expr *s = nullptr;
|
||||
zstring str;
|
||||
if (u.re.is_to_re(e, s) && u.str.is_string(s, str) && str.length() == 1) {
|
||||
lo = hi = str[0];
|
||||
return true;
|
||||
}
|
||||
else if (u.re.is_range(e, lo_e, hi_e) && u.str.is_string(lo_e) && u.str.is_string(hi_e)) {
|
||||
zstring lo_str, hi_str;
|
||||
u.str.is_string(lo_e, lo_str);
|
||||
u.str.is_string(hi_e, hi_str);
|
||||
if (lo_str.length() == 1 && hi_str.length() == 1) {
|
||||
lo = lo_str[0];
|
||||
hi = hi_str[0];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (!u.re.is_range(e, lo_e, hi_e))
|
||||
return false;
|
||||
// Accept either string-constant or (seq.unit (Char N)) bound form.
|
||||
if (u.re.is_range(e, lo, hi))
|
||||
return true;
|
||||
expr* lc = nullptr; expr* hc = nullptr;
|
||||
if (u.str.is_unit(lo_e, lc) && u.is_const_char(lc, lo) &&
|
||||
u.str.is_unit(hi_e, hc) && u.is_const_char(hc, hi))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void run() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
unsigned const M = u.max_char();
|
||||
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
sort* re_sort = u.re.mk_re(str_sort);
|
||||
|
||||
// primitives
|
||||
{
|
||||
range_predicate p(M);
|
||||
check(regex_to_range_predicate(u, u.re.mk_empty(re_sort), p) && p.is_empty(),
|
||||
"re.empty -> empty");
|
||||
check(regex_to_range_predicate(u, u.re.mk_full_char(re_sort), p) && p.is_top(),
|
||||
"re.full_char -> top");
|
||||
}
|
||||
// re.range "a" "z"
|
||||
{
|
||||
range_predicate p(M);
|
||||
expr_ref a = mk_singleton_str(u, 'a');
|
||||
expr_ref z = mk_singleton_str(u, 'z');
|
||||
expr_ref r(u.re.mk_range(a, z), m);
|
||||
check(regex_to_range_predicate(u, r, p) && p.num_ranges() == 1 &&
|
||||
p[0].first == 'a' && p[0].second == 'z',
|
||||
"re.range a z -> [a,z]");
|
||||
}
|
||||
// Disjoint union: (a..z) | (0..9)
|
||||
{
|
||||
range_predicate p(M);
|
||||
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
|
||||
expr_ref r2(u.re.mk_range(mk_singleton_str(u, '0'), mk_singleton_str(u, '9')), m);
|
||||
expr_ref un(u.re.mk_union(r1, r2), m);
|
||||
check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 2,
|
||||
"(a-z)|(0-9) -> 2 ranges");
|
||||
// canonical order: lower lo first
|
||||
check(p[0].first == '0' && p[0].second == '9' && p[1].first == 'a' && p[1].second == 'z',
|
||||
"(a-z)|(0-9) ranges in canonical order");
|
||||
}
|
||||
// Overlapping union: (a..c) | (b..f) -> (a..f)
|
||||
{
|
||||
range_predicate p(M);
|
||||
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'c')), m);
|
||||
expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'b'), mk_singleton_str(u, 'f')), m);
|
||||
expr_ref un(u.re.mk_union(r1, r2), m);
|
||||
check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 1 &&
|
||||
p[0].first == 'a' && p[0].second == 'f',
|
||||
"(a-c)|(b-f) -> (a-f)");
|
||||
}
|
||||
// Adjacent union: (a..c) | (d..f) -> (a..f) (canonical predicate merges adjacent)
|
||||
{
|
||||
range_predicate p(M);
|
||||
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'c')), m);
|
||||
expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'd'), mk_singleton_str(u, 'f')), m);
|
||||
expr_ref un(u.re.mk_union(r1, r2), m);
|
||||
check(regex_to_range_predicate(u, un, p) && p.num_ranges() == 1 &&
|
||||
p[0].first == 'a' && p[0].second == 'f',
|
||||
"(a-c)|(d-f) -> (a-f) via adjacency");
|
||||
}
|
||||
// Disjoint intersection: (a..z) & (0..9) -> empty
|
||||
{
|
||||
range_predicate p(M);
|
||||
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
|
||||
expr_ref r2(u.re.mk_range(mk_singleton_str(u, '0'), mk_singleton_str(u, '9')), m);
|
||||
expr_ref ix(u.re.mk_inter(r1, r2), m);
|
||||
check(regex_to_range_predicate(u, ix, p) && p.is_empty(),
|
||||
"(a-z)&(0-9) -> empty");
|
||||
}
|
||||
// Overlapping intersection: (a..f) & (c..z) -> (c..f)
|
||||
{
|
||||
range_predicate p(M);
|
||||
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'f')), m);
|
||||
expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'c'), mk_singleton_str(u, 'z')), m);
|
||||
expr_ref ix(u.re.mk_inter(r1, r2), m);
|
||||
check(regex_to_range_predicate(u, ix, p) && p.num_ranges() == 1 &&
|
||||
p[0].first == 'c' && p[0].second == 'f',
|
||||
"(a-f)&(c-z) -> (c-f)");
|
||||
}
|
||||
// Complement: re.complement is intentionally NOT a char-class op
|
||||
// (it operates over Σ*), so it must NOT be translated.
|
||||
{
|
||||
range_predicate p(M);
|
||||
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
|
||||
expr_ref cmp(u.re.mk_complement(r1), m);
|
||||
check(!regex_to_range_predicate(u, cmp, p),
|
||||
"re.comp of range is NOT translatable (sequence-level complement)");
|
||||
}
|
||||
// Diff: (a..f) \ (c..z) -> (a..b)
|
||||
{
|
||||
range_predicate p(M);
|
||||
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'f')), m);
|
||||
expr_ref r2(u.re.mk_range(mk_singleton_str(u, 'c'), mk_singleton_str(u, 'z')), m);
|
||||
expr_ref df(u.re.mk_diff(r1, r2), m);
|
||||
check(regex_to_range_predicate(u, df, p) && p.num_ranges() == 1 &&
|
||||
p[0].first == 'a' && p[0].second == 'b',
|
||||
"(a-f) \\ (c-z) -> (a-b)");
|
||||
}
|
||||
// Negative: re.* of a range is NOT a char class
|
||||
{
|
||||
range_predicate p(M);
|
||||
expr_ref r1(u.re.mk_range(mk_singleton_str(u, 'a'), mk_singleton_str(u, 'z')), m);
|
||||
expr_ref star(u.re.mk_star(r1), m);
|
||||
check(!regex_to_range_predicate(u, star, p),
|
||||
"re.* of range not translatable");
|
||||
}
|
||||
|
||||
// Negative: a regex whose element type is NOT a sequence of
|
||||
// characters (here (Seq Int)) must be rejected outright, even for
|
||||
// shapes that structurally resemble char-class operators.
|
||||
{
|
||||
range_predicate p(M);
|
||||
arith_util a(m);
|
||||
sort* int_seq = u.str.mk_seq(a.mk_int());
|
||||
sort* int_re = u.re.mk_re(int_seq);
|
||||
check(!regex_to_range_predicate(u, u.re.mk_empty(int_re), p),
|
||||
"re.empty over (Seq Int) is NOT a char class");
|
||||
check(!regex_to_range_predicate(u, u.re.mk_full_char(int_re), p),
|
||||
"re.full_char over (Seq Int) is NOT a char class");
|
||||
}
|
||||
|
||||
// ---- materialization round-trip ----
|
||||
|
||||
// empty -> re.empty
|
||||
{
|
||||
range_predicate p = range_predicate::empty(M);
|
||||
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||
check(u.re.is_empty(e), "empty -> re.empty");
|
||||
}
|
||||
// top -> re.full_char
|
||||
{
|
||||
range_predicate p = range_predicate::top(M);
|
||||
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||
check(u.re.is_full_char(e), "top -> re.full_char");
|
||||
}
|
||||
// single range -> re.range
|
||||
{
|
||||
range_predicate p = range_predicate::range('a', 'z', M);
|
||||
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||
unsigned lo = 0, hi = 0;
|
||||
check(extract_range_chars(u, e, lo, hi) && lo == 'a' && hi == 'z',
|
||||
"[a-z] -> re.range a z");
|
||||
}
|
||||
// singleton -> re.range c c
|
||||
{
|
||||
range_predicate p = range_predicate::singleton('A', M);
|
||||
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||
unsigned lo = 0, hi = 0;
|
||||
check(extract_range_chars(u, e, lo, hi) && lo == 'A' && hi == 'A',
|
||||
"{A} -> re.range A A");
|
||||
}
|
||||
// 2 ranges -> re.union(range_0, range_1) in canonical order
|
||||
{
|
||||
range_predicate p = range_predicate::range('0', '9', M)
|
||||
| range_predicate::range('a', 'z', M);
|
||||
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||
expr* a = nullptr; expr* b = nullptr;
|
||||
check(u.re.is_union(e, a, b), "2-range -> union");
|
||||
unsigned lo0 = 0, hi0 = 0, lo1 = 0, hi1 = 0;
|
||||
check(extract_range_chars(u, a, lo0, hi0) && lo0 == '0' && hi0 == '9',
|
||||
"union arg0 = (0-9) (canonical: lower lo first)");
|
||||
check(extract_range_chars(u, b, lo1, hi1) && lo1 == 'a' && hi1 == 'z',
|
||||
"union arg1 = (a-z)");
|
||||
}
|
||||
// 3 ranges -> right-associated union
|
||||
{
|
||||
range_predicate p = range_predicate::range(0, 5, M)
|
||||
| range_predicate::range(10, 15, M)
|
||||
| range_predicate::range(20, 25, M);
|
||||
expr_ref e = range_predicate_to_regex(u, p, str_sort);
|
||||
expr* a = nullptr; expr* rest = nullptr;
|
||||
check(u.re.is_union(e, a, rest), "3-range -> union(...)");
|
||||
unsigned lo = 0, hi = 0;
|
||||
check(extract_range_chars(u, a, lo, hi) && lo == 0 && hi == 5, "first arg = (0-5)");
|
||||
expr* b = nullptr; expr* c = nullptr;
|
||||
check(u.re.is_union(rest, b, c), "rest is union(...,...)");
|
||||
check(extract_range_chars(u, b, lo, hi) && lo == 10 && hi == 15, "second range");
|
||||
check(extract_range_chars(u, c, lo, hi) && lo == 20 && hi == 25, "third range");
|
||||
}
|
||||
// Round-trip identity for an arbitrary range-set
|
||||
{
|
||||
range_predicate p_in = range_predicate::range('a', 'c', M)
|
||||
| range_predicate::range('m', 'p', M)
|
||||
| range_predicate::range('x', 'z', M);
|
||||
expr_ref e = range_predicate_to_regex(u, p_in, str_sort);
|
||||
range_predicate p_out(M);
|
||||
check(regex_to_range_predicate(u, e, p_out), "round-trip translatable");
|
||||
check(p_in == p_out, "round-trip equal");
|
||||
}
|
||||
|
||||
std::cerr << "regex_range_collapse tests passed\n";
|
||||
}
|
||||
}
|
||||
|
||||
void tst_regex_range_collapse() {
|
||||
run();
|
||||
}
|
||||
127
src/test/seq_regex_bisim.cpp
Normal file
127
src/test/seq_regex_bisim.cpp
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
// Regression test for the seq::derive::intersect_intervals bug.
|
||||
//
|
||||
// Background: derive uses a path-tracking interval set to compute symbolic
|
||||
// derivatives. The intersect_intervals routine used to react to a single
|
||||
// disjoint interval by dropping the entire kept suffix and skipping the rest
|
||||
// of the list, which silently killed valid branches in derivatives such as
|
||||
// D(a|b). That made the bisimulation procedure conclude bogus equalities
|
||||
// like a* == (a|b)*.
|
||||
//
|
||||
// This file also covers the seq::derive top-level-cache poisoning bug.
|
||||
// `m_top_cache` is keyed only by the regex; the routine used to populate it
|
||||
// while `m_ele` was set to a *concrete* character, baking that character
|
||||
// into the cached "symbolic" derivative. Subsequent calls with the same
|
||||
// regex but a different ele then returned a stale concrete answer instead
|
||||
// of the true symbolic derivative. The simplest victim is
|
||||
// (str.in_re "aP" (re.++ (re.* "a") "P"))
|
||||
// which used to return false because the derivative wrt 'a' was cached and
|
||||
// re-used as the derivative wrt 'P'.
|
||||
#include "ast/ast.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
#include "ast/rewriter/seq_regex_bisim.h"
|
||||
#include "ast/rewriter/th_rewriter.h"
|
||||
#include <iostream>
|
||||
|
||||
static void test_a_star_neq_ab_star() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
seq_rewriter rw(m);
|
||||
|
||||
sort_ref str_sort(u.str.mk_string_sort(), m);
|
||||
|
||||
zstring sa("a"), sb("b");
|
||||
expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
|
||||
expr_ref re_b(u.re.mk_to_re(u.str.mk_string(sb)), m);
|
||||
expr_ref a_star(u.re.mk_star(re_a), m);
|
||||
expr_ref ab(u.re.mk_union(re_a, re_b), m);
|
||||
expr_ref ab_star(u.re.mk_star(ab), m);
|
||||
|
||||
expr_ref d_ab = rw.mk_brz_derivative(ab);
|
||||
std::cout << "D(a|b) = " << mk_pp(d_ab, m) << "\n";
|
||||
|
||||
// Both the 'a' branch and the 'b' branch of D(a|b) must reach epsilon.
|
||||
// Collect the regex leaves of the symbolic derivative and require at
|
||||
// least two distinct accepting leaves (one for 'a' and one for 'b').
|
||||
expr_ref_vector leaves(m);
|
||||
auto collect = [&](expr* e, auto&& self) -> void {
|
||||
expr* c, *t, *f;
|
||||
if (m.is_ite(e, c, t, f) || u.re.is_union(e, t, f)) {
|
||||
self(t, self);
|
||||
self(f, self);
|
||||
return;
|
||||
}
|
||||
if (u.re.is_empty(e)) return;
|
||||
leaves.push_back(e);
|
||||
};
|
||||
collect(d_ab, collect);
|
||||
unsigned nullable_leaves = 0;
|
||||
for (expr* l : leaves) {
|
||||
expr_ref n = rw.is_nullable(l);
|
||||
if (m.is_true(n)) ++nullable_leaves;
|
||||
}
|
||||
std::cout << "D(a|b) leaves=" << leaves.size()
|
||||
<< " nullable=" << nullable_leaves << "\n";
|
||||
ENSURE(nullable_leaves >= 2);
|
||||
|
||||
// Bisim must report the two languages are not equivalent.
|
||||
seq::regex_bisim bisim(rw);
|
||||
lbool eq = bisim.are_equivalent(a_star, ab_star);
|
||||
std::cout << "bisim(a*, (a|b)*) = "
|
||||
<< (eq == l_true ? "true" : eq == l_false ? "false" : "undef") << "\n";
|
||||
ENSURE(eq == l_false);
|
||||
}
|
||||
|
||||
// Regression for the derive top-level-cache poisoning bug.
|
||||
// Take r = (re.* "a") ++ "P" and check str.in_re "aP" r. Before the fix
|
||||
// the first per-char derivative call (wrt 'a') populated m_top_cache with
|
||||
// 'a' baked into the symbolic ITE-tree, so the next call (wrt 'P') returned
|
||||
// that stale cached value instead of computing D_P(r) = epsilon, making
|
||||
// str.in_re wrongly return false.
|
||||
static void test_derive_cache_per_ele() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
seq_rewriter rw(m);
|
||||
|
||||
sort_ref str_sort(u.str.mk_string_sort(), m);
|
||||
|
||||
zstring sa("a"), sP("P"), s_aP("aP");
|
||||
expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
|
||||
expr_ref re_P(u.re.mk_to_re(u.str.mk_string(sP)), m);
|
||||
expr_ref a_star(u.re.mk_star(re_a), m);
|
||||
expr_ref r(u.re.mk_concat(a_star, re_P), m);
|
||||
expr_ref aP(u.str.mk_string(s_aP), m);
|
||||
|
||||
// Compute D_'a'(a*P) and D_'P'(a*P) directly via mk_derivative.
|
||||
// Before the fix, m_top_cache was populated while m_ele = ele (the
|
||||
// concrete char), so the second call hit the stale cached answer from
|
||||
// the first. After the fix the cache is keyed by a symbolic var, so
|
||||
// each concrete-ele substitution produces the right answer.
|
||||
expr_ref ch_a(u.mk_char('a'), m);
|
||||
expr_ref ch_P(u.mk_char('P'), m);
|
||||
expr_ref d_a = rw.mk_derivative(ch_a, r);
|
||||
expr_ref d_P = rw.mk_derivative(ch_P, r);
|
||||
std::cout << "D_a(a*P) = " << mk_pp(d_a, m) << "\n";
|
||||
std::cout << "D_P(a*P) = " << mk_pp(d_P, m) << "\n";
|
||||
|
||||
// D_P(a*P) must be nullable (it accepts the empty suffix), while
|
||||
// D_a(a*P) must not be (it still needs a trailing 'P').
|
||||
expr_ref n_a = rw.is_nullable(d_a);
|
||||
expr_ref n_P = rw.is_nullable(d_P);
|
||||
th_rewriter trw(m);
|
||||
trw(n_a);
|
||||
trw(n_P);
|
||||
std::cout << "nullable(D_a) = " << mk_pp(n_a, m) << "\n";
|
||||
std::cout << "nullable(D_P) = " << mk_pp(n_P, m) << "\n";
|
||||
ENSURE(m.is_false(n_a));
|
||||
ENSURE(m.is_true(n_P));
|
||||
}
|
||||
|
||||
void tst_seq_regex_bisim() {
|
||||
test_a_star_neq_ab_star();
|
||||
test_derive_cache_per_ele();
|
||||
}
|
||||
|
|
@ -124,27 +124,6 @@ void tst_seq_rewriter() {
|
|||
ENSURE(!su.re.is_range(e));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 9. Range complement (general): no longer a complement node
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref e(su.re.mk_complement(range('b', 'y')), m);
|
||||
rw(e);
|
||||
std::cout << "range comp general: " << mk_pp(e, m) << "\n";
|
||||
ENSURE(!su.re.is_complement(e));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 10. Range complement (lo = 0): single range e union [hi+1, max].*
|
||||
// -----------------------------------------------------------------------
|
||||
{
|
||||
expr_ref lo_str(su.str.mk_string(zstring(0u)), m);
|
||||
expr_ref hi_str(su.str.mk_string(zstring((unsigned)'f')), m);
|
||||
expr_ref e(su.re.mk_complement(su.re.mk_range(lo_str, hi_str)), m);
|
||||
rw(e);
|
||||
std::cout << "range comp lo=min: " << mk_pp(e, m) << "\n";
|
||||
ENSURE(!su.re.is_complement(e));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 11. Downstream: (re.* (re.range "z" "a")) → str.to_re ""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue