mirror of
https://github.com/Z3Prover/z3
synced 2026-06-19 07:06:28 +00:00
test: unit tests for OP_RE_CHARCLASS
Add src/test/re_charclass.cpp covering: * mk_charclass + is_charclass round-trip, * hash-cons identity (same predicate -> same app*), * canonical collapses: empty -> re.none, top -> re.full_char, * as_charclass for the supported input shapes (CHARCLASS, re.range with concrete bounds, re.to_re of a single-char string, re.full_char, re.empty over (RegEx (Seq Char))) plus the negative cases (multi-char re.to_re, epsilon, concat), * smart-constructor short-circuit for union/inter/diff through seq_rewriter::mk_app_core (including the empty-intersection collapse to re.none and the full_char-union collapse), * parametric mk_func_decl validation (odd arity, lo>hi, unsorted, overlapping, adjacent), * SMT-LIB pretty-printer round-trip via the parametric + [char]0x60 + '(_ re.charclass lo0 hi0 ...)' + [char]0x60 + form. Test runs in <0.1s and is wired into FOR_EACH_ALL_TEST. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
7f2f66d47c
commit
06a395fc49
3 changed files with 352 additions and 0 deletions
|
|
@ -122,6 +122,7 @@ add_executable(test-z3
|
|||
random.cpp
|
||||
range_predicate.cpp
|
||||
rational.cpp
|
||||
re_charclass.cpp
|
||||
rcf.cpp
|
||||
region.cpp
|
||||
sat_local_search.cpp
|
||||
|
|
|
|||
|
|
@ -114,6 +114,7 @@
|
|||
X(api_special_relations) \
|
||||
X(arith_rewriter) \
|
||||
X(range_predicate) \
|
||||
X(re_charclass) \
|
||||
X(check_assumptions) \
|
||||
X(smt_context) \
|
||||
X(theory_dl) \
|
||||
|
|
|
|||
350
src/test/re_charclass.cpp
Normal file
350
src/test/re_charclass.cpp
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
test/re_charclass.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Unit tests for the OP_RE_CHARCLASS AST node:
|
||||
* mk_charclass + is_charclass round-trip,
|
||||
* hash-cons identity (same predicate -> same app*),
|
||||
* canonical collapses (empty -> re.none, top -> re.allchar),
|
||||
* as_charclass for re.to_re of a single-char string / unit,
|
||||
re.range with concrete bounds, re.full_char, re.empty,
|
||||
* smart-constructor short-circuit for union / intersection / diff
|
||||
through seq_rewriter::mk_app_core,
|
||||
* parametric mk_func_decl validation (odd params, lo>hi, sort
|
||||
order, disjointness, non-adjacency),
|
||||
* SMT-LIB pretty-printer round-trip via the parametric
|
||||
`(_ re.charclass lo0 hi0 ...)` form.
|
||||
|
||||
Author:
|
||||
|
||||
Margus Veanes (veanes) 2026
|
||||
|
||||
--*/
|
||||
|
||||
#include "ast/ast.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "ast/range_predicate.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/z3_exception.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
using seq::range_predicate;
|
||||
|
||||
namespace {
|
||||
|
||||
static unsigned max_char(seq_util const& u) { return u.max_char(); }
|
||||
|
||||
static range_predicate rp_range(unsigned lo, unsigned hi, unsigned mx) {
|
||||
return range_predicate::range(lo, hi, mx);
|
||||
}
|
||||
|
||||
// Disjoint union of intervals; arguments must already be canonical.
|
||||
static range_predicate rp_intervals(std::initializer_list<std::pair<unsigned,unsigned>> ranges,
|
||||
unsigned mx) {
|
||||
range_predicate r = range_predicate::empty(mx);
|
||||
for (auto const& kv : ranges)
|
||||
r = r | range_predicate::range(kv.first, kv.second, mx);
|
||||
return r;
|
||||
}
|
||||
|
||||
void test_round_trip() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
unsigned mx = max_char(u);
|
||||
|
||||
// multi-range charclass: [0-9] U [A-Z] U [a-z]
|
||||
range_predicate p = rp_intervals({{'0','9'},{'A','Z'},{'a','z'}}, mx);
|
||||
app_ref cc(u.re.mk_charclass(p, str_sort), m);
|
||||
|
||||
ENSURE(u.re.is_charclass(cc));
|
||||
range_predicate q;
|
||||
ENSURE(u.re.is_charclass(cc, q));
|
||||
ENSURE(p == q);
|
||||
|
||||
// single-range
|
||||
range_predicate p1 = rp_range('a','z', mx);
|
||||
app_ref cc1(u.re.mk_charclass(p1, str_sort), m);
|
||||
ENSURE(u.re.is_charclass(cc1));
|
||||
range_predicate q1;
|
||||
ENSURE(u.re.is_charclass(cc1, q1));
|
||||
ENSURE(p1 == q1);
|
||||
}
|
||||
|
||||
void test_hash_cons_identity() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
unsigned mx = max_char(u);
|
||||
|
||||
range_predicate p = rp_intervals({{'a','f'},{'0','9'}}, mx);
|
||||
|
||||
app_ref cc_a(u.re.mk_charclass(p, str_sort), m);
|
||||
app_ref cc_b(u.re.mk_charclass(p, str_sort), m);
|
||||
ENSURE(cc_a.get() == cc_b.get());
|
||||
ENSURE(cc_a->get_id() == cc_b->get_id());
|
||||
|
||||
// A different predicate must hash-cons to a different node.
|
||||
range_predicate p2 = rp_intervals({{'a','f'},{'0','8'}}, mx);
|
||||
app_ref cc_c(u.re.mk_charclass(p2, str_sort), m);
|
||||
ENSURE(cc_a.get() != cc_c.get());
|
||||
}
|
||||
|
||||
void test_empty_and_top_collapse() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
unsigned mx = max_char(u);
|
||||
|
||||
// empty -> re.none
|
||||
expr_ref empty_cc(u.re.mk_charclass(range_predicate::empty(mx), str_sort), m);
|
||||
ENSURE(u.re.is_empty(empty_cc));
|
||||
ENSURE(!u.re.is_charclass(empty_cc));
|
||||
|
||||
// top -> re.allchar (re.full_char)
|
||||
expr_ref full_cc(u.re.mk_charclass(range_predicate::top(mx), str_sort), m);
|
||||
ENSURE(u.re.is_full_char(full_cc));
|
||||
ENSURE(!u.re.is_charclass(full_cc));
|
||||
}
|
||||
|
||||
void test_as_charclass_positive() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
unsigned mx = max_char(u);
|
||||
range_predicate p;
|
||||
|
||||
// CHARCLASS itself
|
||||
app_ref cc(u.re.mk_charclass(rp_range('a','z',mx), str_sort), m);
|
||||
p = range_predicate::empty(mx);
|
||||
ENSURE(u.re.as_charclass(cc, p));
|
||||
ENSURE(p == rp_range('a','z',mx));
|
||||
|
||||
// (re.range "a" "z")
|
||||
zstring sa("a"), sz("z");
|
||||
expr_ref a(u.str.mk_string(sa), m);
|
||||
expr_ref z(u.str.mk_string(sz), m);
|
||||
expr_ref range_az(u.re.mk_range(a, z), m);
|
||||
p = range_predicate::empty(mx);
|
||||
ENSURE(u.re.as_charclass(range_az, p));
|
||||
ENSURE(p == rp_range('a','z',mx));
|
||||
|
||||
// (re.to_re "X") with a single-char string
|
||||
zstring sx("X");
|
||||
expr_ref to_re_x(u.re.mk_to_re(u.str.mk_string(sx)), m);
|
||||
p = range_predicate::empty(mx);
|
||||
ENSURE(u.re.as_charclass(to_re_x, p));
|
||||
ENSURE(p == rp_range('X','X',mx));
|
||||
|
||||
// re.full_char
|
||||
expr_ref full(u.re.mk_full_char(u.re.mk_re(str_sort)), m);
|
||||
p = range_predicate::empty(mx);
|
||||
ENSURE(u.re.as_charclass(full, p));
|
||||
ENSURE(p == range_predicate::top(mx));
|
||||
|
||||
// re.empty over (RegEx (Seq Char))
|
||||
expr_ref none(u.re.mk_empty(u.re.mk_re(str_sort)), m);
|
||||
p = range_predicate::top(mx);
|
||||
ENSURE(u.re.as_charclass(none, p));
|
||||
ENSURE(p.is_empty());
|
||||
}
|
||||
|
||||
void test_as_charclass_negative() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
range_predicate p;
|
||||
|
||||
// (re.to_re "abc") is concat, not a single-char regex.
|
||||
zstring sabc("abc");
|
||||
expr_ref to_re_abc(u.re.mk_to_re(u.str.mk_string(sabc)), m);
|
||||
ENSURE(!u.re.as_charclass(to_re_abc, p));
|
||||
|
||||
// (re.to_re "") is epsilon, not a charclass.
|
||||
zstring se("");
|
||||
expr_ref to_re_empty(u.re.mk_to_re(u.str.mk_string(se)), m);
|
||||
ENSURE(!u.re.as_charclass(to_re_empty, p));
|
||||
|
||||
// (re.++ (re.range a z) (re.range a z)) - not a single charclass.
|
||||
zstring sa("a"), sz("z");
|
||||
expr_ref ar(u.re.mk_range(u.str.mk_string(sa), u.str.mk_string(sz)), m);
|
||||
expr_ref concat(u.re.mk_concat(ar, ar), m);
|
||||
ENSURE(!u.re.as_charclass(concat, p));
|
||||
}
|
||||
|
||||
// Call seq_rewriter::mk_app_core via the func_decl built by re().mk_<op>.
|
||||
static expr_ref rewrite_binary(seq_rewriter& rw, app* template_app, expr* a, expr* b) {
|
||||
ast_manager& m = rw.m();
|
||||
expr_ref result(m);
|
||||
expr* args[] = {a, b};
|
||||
if (BR_FAILED == rw.mk_app_core(template_app->get_decl(), 2, args, result))
|
||||
result = m.mk_app(template_app->get_decl(), 2, args);
|
||||
return result;
|
||||
}
|
||||
|
||||
void test_smart_ctor_union() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
seq_rewriter rw(m);
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
unsigned mx = max_char(u);
|
||||
|
||||
// CHARCLASS | CHARCLASS -> merged CHARCLASS
|
||||
app_ref cc1(u.re.mk_charclass(rp_range('a','z',mx), str_sort), m);
|
||||
app_ref cc2(u.re.mk_charclass(rp_range('0','9',mx), str_sort), m);
|
||||
app_ref tmpl(u.re.mk_union(cc1, cc2), m);
|
||||
|
||||
expr_ref out = rewrite_binary(rw, tmpl, cc1, cc2);
|
||||
range_predicate p;
|
||||
ENSURE(u.re.is_charclass(out, p));
|
||||
ENSURE(p == rp_intervals({{'0','9'},{'a','z'}}, mx));
|
||||
|
||||
// CHARCLASS | range(concrete) -> CHARCLASS (uses as_charclass)
|
||||
zstring sA("A"), sZ("Z");
|
||||
expr_ref range_AZ(u.re.mk_range(u.str.mk_string(sA), u.str.mk_string(sZ)), m);
|
||||
app_ref tmpl2(u.re.mk_union(cc1, to_app(range_AZ)), m);
|
||||
expr_ref out2 = rewrite_binary(rw, tmpl2, cc1, range_AZ);
|
||||
ENSURE(u.re.is_charclass(out2, p));
|
||||
ENSURE(p == rp_intervals({{'A','Z'},{'a','z'}}, mx));
|
||||
|
||||
// CHARCLASS | full_char -> full_char (after merge, top collapses)
|
||||
expr_ref full(u.re.mk_full_char(u.re.mk_re(str_sort)), m);
|
||||
app_ref tmpl3(u.re.mk_union(cc1, to_app(full)), m);
|
||||
expr_ref out3 = rewrite_binary(rw, tmpl3, cc1, full);
|
||||
ENSURE(u.re.is_full_char(out3));
|
||||
}
|
||||
|
||||
void test_smart_ctor_inter() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
seq_rewriter rw(m);
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
unsigned mx = max_char(u);
|
||||
|
||||
// [a-z] & [m-q] -> [m-q]
|
||||
app_ref cc1(u.re.mk_charclass(rp_range('a','z',mx), str_sort), m);
|
||||
app_ref cc2(u.re.mk_charclass(rp_range('m','q',mx), str_sort), m);
|
||||
app_ref tmpl(u.re.mk_inter(cc1, cc2), m);
|
||||
expr_ref out = rewrite_binary(rw, tmpl, cc1, cc2);
|
||||
range_predicate p;
|
||||
ENSURE(u.re.is_charclass(out, p));
|
||||
ENSURE(p == rp_range('m','q',mx));
|
||||
|
||||
// [a-c] & [x-z] -> empty -> re.none
|
||||
app_ref cc3(u.re.mk_charclass(rp_range('a','c',mx), str_sort), m);
|
||||
app_ref cc4(u.re.mk_charclass(rp_range('x','z',mx), str_sort), m);
|
||||
app_ref tmpl2(u.re.mk_inter(cc3, cc4), m);
|
||||
expr_ref out2 = rewrite_binary(rw, tmpl2, cc3, cc4);
|
||||
ENSURE(u.re.is_empty(out2));
|
||||
}
|
||||
|
||||
void test_smart_ctor_diff() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
seq_rewriter rw(m);
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
unsigned mx = max_char(u);
|
||||
|
||||
// [a-z] - [m-q] -> [a-l] U [r-z]
|
||||
app_ref cc1(u.re.mk_charclass(rp_range('a','z',mx), str_sort), m);
|
||||
app_ref cc2(u.re.mk_charclass(rp_range('m','q',mx), str_sort), m);
|
||||
app_ref tmpl(u.re.mk_diff(cc1, cc2), m);
|
||||
expr_ref out = rewrite_binary(rw, tmpl, cc1, cc2);
|
||||
range_predicate p;
|
||||
ENSURE(u.re.is_charclass(out, p));
|
||||
ENSURE(p == rp_intervals({{'a','l'},{'r','z'}}, mx));
|
||||
}
|
||||
|
||||
// Try to build OP_RE_CHARCLASS with bad parameters; the parametric
|
||||
// mk_func_decl should raise an exception.
|
||||
static bool mk_charclass_raises(ast_manager& m, seq_util& u,
|
||||
std::initializer_list<int> raw_params) {
|
||||
vector<parameter> params;
|
||||
for (int v : raw_params) params.push_back(parameter(v));
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
sort* re_sort = u.re.mk_re(str_sort);
|
||||
try {
|
||||
(void)m.mk_func_decl(u.get_family_id(), OP_RE_CHARCLASS,
|
||||
params.size(), params.data(),
|
||||
0u, (sort* const*)nullptr, re_sort);
|
||||
}
|
||||
catch (z3_exception &) { return true; }
|
||||
return false;
|
||||
}
|
||||
|
||||
void test_validation() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
|
||||
// odd number of parameters
|
||||
ENSURE(mk_charclass_raises(m, u, {5}));
|
||||
// lo > hi
|
||||
ENSURE(mk_charclass_raises(m, u, {10, 5}));
|
||||
// unsorted (97-122 then 48-57)
|
||||
ENSURE(mk_charclass_raises(m, u, {97, 122, 48, 57}));
|
||||
// overlapping
|
||||
ENSURE(mk_charclass_raises(m, u, {50, 100, 80, 120}));
|
||||
// adjacent: [48,57] and [58,65] should be merged
|
||||
ENSURE(mk_charclass_raises(m, u, {48, 57, 58, 65}));
|
||||
|
||||
// Sane invocation must succeed.
|
||||
ENSURE(!mk_charclass_raises(m, u, {}));
|
||||
ENSURE(!mk_charclass_raises(m, u, {65, 90}));
|
||||
ENSURE(!mk_charclass_raises(m, u, {48, 57, 65, 90, 97, 122}));
|
||||
}
|
||||
|
||||
// The SMT-LIB pretty-printer must emit the canonical
|
||||
// `(_ re.charclass lo0 hi0 ...)` form for any CHARCLASS node.
|
||||
void test_pp_round_trip() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
sort* str_sort = u.str.mk_string_sort();
|
||||
unsigned mx = max_char(u);
|
||||
|
||||
range_predicate p = rp_intervals({{'0','9'},{'A','Z'},{'a','z'}}, mx);
|
||||
expr_ref cc(u.re.mk_charclass(p, str_sort), m);
|
||||
|
||||
std::ostringstream oss;
|
||||
oss << mk_pp(cc, m);
|
||||
std::string s = oss.str();
|
||||
// Expect: "(_ re.charclass 48 57 65 90 97 122)"
|
||||
ENSURE(s.find("re.charclass") != std::string::npos);
|
||||
ENSURE(s.find("48 57") != std::string::npos);
|
||||
ENSURE(s.find("65 90") != std::string::npos);
|
||||
ENSURE(s.find("97 122") != std::string::npos);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void tst_re_charclass() {
|
||||
test_round_trip();
|
||||
test_hash_cons_identity();
|
||||
test_empty_and_top_collapse();
|
||||
test_as_charclass_positive();
|
||||
test_as_charclass_negative();
|
||||
test_smart_ctor_union();
|
||||
test_smart_ctor_inter();
|
||||
test_smart_ctor_diff();
|
||||
test_validation();
|
||||
test_pp_round_trip();
|
||||
std::cout << "re_charclass unit tests passed\n";
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue