mirror of
https://github.com/Z3Prover/z3
synced 2026-07-05 14:56:11 +00:00
Porting seq_split to master (#9840)
Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
parent
c22a7bac7c
commit
b3143e759b
11 changed files with 1545 additions and 1 deletions
|
|
@ -138,6 +138,7 @@ add_executable(test-z3
|
|||
simplifier.cpp
|
||||
sls_test.cpp
|
||||
sls_seq_plugin.cpp
|
||||
seq_split.cpp
|
||||
small_object_allocator.cpp
|
||||
smt2print_parse.cpp
|
||||
smt_context.cpp
|
||||
|
|
|
|||
|
|
@ -197,6 +197,7 @@
|
|||
X(ho_matcher) \
|
||||
X(finite_set) \
|
||||
X(finite_set_rewriter) \
|
||||
X(seq_split) \
|
||||
X(fpa) \
|
||||
X(seq_regex_bisim) \
|
||||
X(term_enumeration) \
|
||||
|
|
|
|||
450
src/test/seq_split.cpp
Normal file
450
src/test/seq_split.cpp
Normal file
|
|
@ -0,0 +1,450 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
seq_split.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Unit tests for the regex split engine (the split function sigma) in ast/rewriter/seq_split.cpp.
|
||||
|
||||
Author:
|
||||
|
||||
Clemens Eisenhofer 2026-6-22
|
||||
|
||||
--*/
|
||||
|
||||
#include "ast/ast.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
#include "ast/rewriter/seq_split.h"
|
||||
#include <set>
|
||||
#include <utility>
|
||||
|
||||
|
||||
struct plugin_registrar {
|
||||
plugin_registrar(ast_manager& m) { reg_decl_plugins(m); }
|
||||
};
|
||||
|
||||
class seq_split_test {
|
||||
ast_manager m;
|
||||
plugin_registrar m_reg;
|
||||
seq_rewriter m_rw;
|
||||
seq_split m_split;
|
||||
seq_util u;
|
||||
sort_ref m_str; // the sequence (String) sort
|
||||
sort_ref m_re; // the RegEx sort over m_str
|
||||
|
||||
seq_util::rex& re() { return u.re; }
|
||||
|
||||
expr_ref eps() { return expr_ref(re().mk_epsilon(m_str), m); } // mk_epsilon takes the seq sort
|
||||
expr_ref dot() { return expr_ref(re().mk_full_char(m_re), m); } // mk_full_char takes the RegEx sort
|
||||
expr_ref dotstar() { return expr_ref(re().mk_full_seq(m_re), m); } // .*
|
||||
expr_ref empty_re() { return expr_ref(re().mk_empty(m_re), m); } // the bottom regex
|
||||
expr_ref rappend(expr* a, expr* b) { return m_rw.mk_re_append(a, b); } // the engine's regex concat
|
||||
expr_ref word(char const* s) { return expr_ref(re().mk_to_re(u.str.mk_string(zstring(s))), m); }
|
||||
expr_ref rng(char lo, char hi) {
|
||||
return expr_ref(re().mk_range(u.str.mk_string(zstring(std::string(1, lo).c_str())),
|
||||
u.str.mk_string(zstring(std::string(1, hi).c_str()))), m);
|
||||
}
|
||||
|
||||
typedef std::set<std::pair<expr*, expr*>> pair_set;
|
||||
|
||||
pair_set as_set(split_set const& s) {
|
||||
pair_set out;
|
||||
for (auto const& p : s)
|
||||
out.insert({ p.m_d.get(), p.m_n.get() });
|
||||
return out;
|
||||
}
|
||||
|
||||
bool eager(expr* r, split_set& out, unsigned threshold = UINT_MAX,
|
||||
split_mode mode = split_mode::strong, split_oracle const& oracle = {}) {
|
||||
return m_split.compute(r, out, threshold, mode, oracle);
|
||||
}
|
||||
|
||||
bool lazy(expr* r, split_set& out, unsigned threshold = UINT_MAX,
|
||||
split_mode mode = split_mode::strong, split_oracle const& oracle = {}) {
|
||||
expr_ref node = m_split.make(r);
|
||||
ENSURE(node);
|
||||
seq_split::iterator it = m_split.iterate(node, mode, threshold, oracle);
|
||||
expr_ref d(m), n(m);
|
||||
while (it.next(d, n))
|
||||
out.push_back(split_pair(d, n, m));
|
||||
return !it.gave_up();
|
||||
}
|
||||
|
||||
// assert that the eager and lazy engines agree on sigma(r) as a *set* of
|
||||
// splits, and report the common cardinality.
|
||||
unsigned check_agree(expr* r) {
|
||||
split_set se, sl;
|
||||
bool oke = eager(r, se);
|
||||
bool okl = lazy(r, sl);
|
||||
ENSURE(oke == okl);
|
||||
if (!oke)
|
||||
return 0;
|
||||
ENSURE(as_set(se) == as_set(sl));
|
||||
return (unsigned)as_set(se).size();
|
||||
}
|
||||
|
||||
public:
|
||||
seq_split_test() : m_reg(m), m_rw(m), m_split(m_rw), u(m), m_str(m), m_re(m) {
|
||||
m_str = u.str.mk_string_sort();
|
||||
m_re = re().mk_re(m_str);
|
||||
}
|
||||
|
||||
void test_eager_epsilon() {
|
||||
split_set s;
|
||||
ENSURE(eager(eps(), s));
|
||||
ENSURE(as_set(s) == pair_set({ { eps().get(), eps().get() } }));
|
||||
}
|
||||
|
||||
void test_eager_char() {
|
||||
// sigma(.) = { <eps, .>, <., eps> }
|
||||
expr_ref a = dot();
|
||||
split_set s;
|
||||
ENSURE(eager(a, s));
|
||||
pair_set expected({ { eps().get(), a.get() }, { a.get(), eps().get() } });
|
||||
ENSURE(as_set(s) == expected);
|
||||
}
|
||||
|
||||
void test_eager_word() {
|
||||
// sigma("ab") = { <"", "ab">, <"a","b">, <"ab",""> }
|
||||
split_set s;
|
||||
ENSURE(eager(word("ab"), s));
|
||||
pair_set expected({
|
||||
{ word("").get(), word("ab").get() },
|
||||
{ word("a").get(), word("b").get() },
|
||||
{ word("ab").get(), word("").get() },
|
||||
});
|
||||
ENSURE(as_set(s) == expected);
|
||||
}
|
||||
|
||||
void test_eager_union() {
|
||||
// sigma(a | b) = sigma(a) cup sigma(b)
|
||||
expr_ref a = rng('a', 'a'), b = rng('b', 'b');
|
||||
expr_ref u_re(re().mk_union(a, b), m);
|
||||
split_set s;
|
||||
ENSURE(eager(u_re, s));
|
||||
pair_set expected({
|
||||
{ eps().get(), a.get() }, { a.get(), eps().get() },
|
||||
{ eps().get(), b.get() }, { b.get(), eps().get() },
|
||||
});
|
||||
ENSURE(as_set(s) == expected);
|
||||
}
|
||||
|
||||
void test_agree_all() {
|
||||
expr_ref a = rng('a', 'a'), b = rng('b', 'b');
|
||||
expr_ref star(re().mk_star(a), m);
|
||||
expr_ref plus(re().mk_plus(a), m);
|
||||
expr_ref concat(re().mk_concat(a, b), m);
|
||||
expr_ref uni(re().mk_union(a, b), m);
|
||||
expr_ref inter(re().mk_inter(re().mk_star(a), re().mk_star(b)), m);
|
||||
expr_ref compl_(re().mk_complement(re().mk_star(a)), m);
|
||||
expr_ref diff(re().mk_diff(re().mk_star(a), re().mk_star(b)), m);
|
||||
|
||||
ENSURE(check_agree(eps()) == 1);
|
||||
ENSURE(check_agree(a) == 2);
|
||||
ENSURE(check_agree(word("ab")) == 3);
|
||||
ENSURE(check_agree(uni) == 4);
|
||||
ENSURE(check_agree(star) == 3); // { <eps,eps>, <a*, a.a*>, <a*.a, a*> }
|
||||
(void)check_agree(plus);
|
||||
(void)check_agree(concat);
|
||||
(void)check_agree(inter); // strong-mode intersection
|
||||
(void)check_agree(compl_); // strong-mode De Morgan complement
|
||||
(void)check_agree(diff);
|
||||
}
|
||||
|
||||
void test_lazy_early_stop() {
|
||||
// a* has 3 splits; pull just the first one and then stop. (Note .* is the
|
||||
// full_seq special case with a single split, so use a proper char-class body.)
|
||||
expr_ref star(re().mk_star(rng('a', 'a')), m);
|
||||
expr_ref node = m_split.make(star);
|
||||
ENSURE(node);
|
||||
seq_split::iterator it = m_split.iterate(node, split_mode::strong, UINT_MAX, {});
|
||||
expr_ref d(m), n(m);
|
||||
unsigned seen = 0;
|
||||
if (it.next(d, n)) // pull exactly one split, then walk away
|
||||
++seen;
|
||||
ENSURE(!it.gave_up()); // stopping early is not a give-up
|
||||
ENSURE(seen == 1);
|
||||
}
|
||||
|
||||
void test_threshold_giveup() {
|
||||
expr_ref star(re().mk_star(rng('a', 'a')), m); // 3 splits
|
||||
split_set s;
|
||||
ENSURE(!lazy(star, s, /*threshold*/ 1));
|
||||
// the eager wrapper honours the same cap
|
||||
split_set s2;
|
||||
ENSURE(!eager(star, s2, /*threshold*/ 1));
|
||||
}
|
||||
|
||||
void test_weak_vs_strong() {
|
||||
expr_ref inter(re().mk_inter(re().mk_star(rng('a', 'a')), re().mk_star(rng('b', 'b'))), m);
|
||||
expr_ref compl_(re().mk_complement(re().mk_star(dot())), m);
|
||||
|
||||
split_set s;
|
||||
ENSURE(!eager(inter, s, UINT_MAX, split_mode::weak));
|
||||
s.reset();
|
||||
ENSURE(!lazy(inter, s, UINT_MAX, split_mode::weak));
|
||||
s.reset();
|
||||
ENSURE(!eager(compl_, s, UINT_MAX, split_mode::weak));
|
||||
s.reset();
|
||||
ENSURE(!lazy(compl_, s, UINT_MAX, split_mode::weak));
|
||||
|
||||
// strong mode succeeds for both
|
||||
s.reset();
|
||||
ENSURE(eager(inter, s, UINT_MAX, split_mode::strong));
|
||||
s.reset();
|
||||
ENSURE(eager(compl_, s, UINT_MAX, split_mode::strong));
|
||||
}
|
||||
|
||||
void test_make_non_regex() {
|
||||
expr_ref not_a_regex(u.str.mk_string(zstring("a")), m); // String, not RegEx
|
||||
expr_ref node = m_split.make(not_a_regex);
|
||||
ENSURE(!node);
|
||||
}
|
||||
|
||||
void test_oracle_prunes() {
|
||||
// sigma(.) without an oracle = { <eps,.>, <.,eps> }; an oracle that keeps
|
||||
// only splits whose suffix is epsilon must drop one of the two.
|
||||
expr_ref a = dot();
|
||||
expr_ref e = eps();
|
||||
split_oracle keep_eps_suffix = [&](expr*, expr* n) { return n == e.get(); };
|
||||
|
||||
split_set se, sl;
|
||||
ENSURE(eager(a, se, UINT_MAX, split_mode::strong, keep_eps_suffix));
|
||||
ENSURE(lazy(a, sl, UINT_MAX, split_mode::strong, keep_eps_suffix));
|
||||
pair_set expected({ { a.get(), e.get() } });
|
||||
ENSURE(as_set(se) == expected);
|
||||
ENSURE(as_set(sl) == expected);
|
||||
}
|
||||
|
||||
void test_eager_full_seq() {
|
||||
// sigma(.*) = { <.*, .*> }
|
||||
expr_ref ds = dotstar();
|
||||
split_set s;
|
||||
ENSURE(eager(ds, s));
|
||||
ENSURE(as_set(s) == pair_set({ { ds.get(), ds.get() } }));
|
||||
}
|
||||
|
||||
void test_eager_bottom() {
|
||||
// sigma(empty) = {}
|
||||
split_set s;
|
||||
ENSURE(eager(empty_re(), s));
|
||||
ENSURE(s.empty());
|
||||
|
||||
split_set sl;
|
||||
ENSURE(lazy(empty_re(), sl));
|
||||
ENSURE(sl.empty());
|
||||
}
|
||||
|
||||
void test_eager_empty_word() {
|
||||
// sigma(to_re("")) = { <"", ""> } (a single, trivial split)
|
||||
split_set s;
|
||||
ENSURE(eager(word(""), s));
|
||||
ENSURE(as_set(s) == pair_set({ { word("").get(), word("").get() } }));
|
||||
}
|
||||
|
||||
void test_eager_star_content() {
|
||||
// sigma(a*) = { <eps,eps>, <a*.eps, a.a*>, <a*.a, eps.a*> }
|
||||
expr_ref a = rng('a', 'a');
|
||||
expr_ref as(re().mk_star(a), m);
|
||||
split_set s;
|
||||
ENSURE(eager(as, s));
|
||||
pair_set expected({
|
||||
{ eps().get(), eps().get() },
|
||||
{ rappend(as, eps()).get(), rappend(a, as).get() },
|
||||
{ rappend(as, a).get(), rappend(eps(), as).get() },
|
||||
});
|
||||
ENSURE(as_set(s) == expected);
|
||||
}
|
||||
|
||||
void test_eager_plus_content() {
|
||||
// sigma(a+) = a*.sigma(a).a* (the star rule without <eps,eps>)
|
||||
expr_ref a = rng('a', 'a');
|
||||
expr_ref as(re().mk_star(a), m);
|
||||
expr_ref ap(re().mk_plus(a), m);
|
||||
split_set s;
|
||||
ENSURE(eager(ap, s));
|
||||
pair_set expected({
|
||||
{ rappend(as, eps()).get(), rappend(a, as).get() },
|
||||
{ rappend(as, a).get(), rappend(eps(), as).get() },
|
||||
});
|
||||
ENSURE(as_set(s) == expected);
|
||||
}
|
||||
|
||||
void test_eager_concat_content() {
|
||||
// sigma(a.b) = sigma(a).b cup a.sigma(b)
|
||||
expr_ref a = rng('a', 'a'), b = rng('b', 'b');
|
||||
expr_ref ab(re().mk_concat(a, b), m);
|
||||
split_set s;
|
||||
ENSURE(eager(ab, s));
|
||||
pair_set expected({
|
||||
{ eps().get(), rappend(a, b).get() }, // <eps, a.b>
|
||||
{ a.get(), rappend(eps(), b).get() }, // <a, eps.b>
|
||||
{ rappend(a, eps()).get(), b.get() }, // <a.eps, b>
|
||||
{ rappend(a, b).get(), eps().get() }, // <a.b, eps>
|
||||
});
|
||||
ENSURE(as_set(s) == expected);
|
||||
}
|
||||
|
||||
void test_nary_union() {
|
||||
// sigma(a|b|c) has 2 splits per char-class
|
||||
expr_ref a = rng('a', 'a'), b = rng('b', 'b'), c = rng('c', 'c');
|
||||
expr_ref u3(re().mk_union(a, re().mk_union(b, c)), m);
|
||||
ENSURE(check_agree(u3) == 6);
|
||||
}
|
||||
|
||||
void test_nary_concat() {
|
||||
// sigma(a.b.c)
|
||||
expr_ref a = rng('a', 'a'), b = rng('b', 'b'), c = rng('c', 'c');
|
||||
expr_ref c3(re().mk_concat(a, re().mk_concat(b, c)), m);
|
||||
ENSURE(check_agree(c3) >= 4);
|
||||
}
|
||||
|
||||
void test_nested_complement() {
|
||||
// sigma(~~(a*))
|
||||
expr_ref cc(re().mk_complement(re().mk_complement(re().mk_star(rng('a', 'a')))), m);
|
||||
(void)check_agree(cc);
|
||||
}
|
||||
|
||||
void test_determinism() {
|
||||
expr_ref r(re().mk_concat(rng('a', 'a'), re().mk_star(rng('b', 'b'))), m);
|
||||
split_set s1, s2;
|
||||
ENSURE(lazy(r, s1));
|
||||
ENSURE(lazy(r, s2));
|
||||
ENSURE(as_set(s1) == as_set(s2));
|
||||
}
|
||||
|
||||
void test_threshold_boundary() {
|
||||
expr_ref as(re().mk_star(rng('a', 'a')), m); // exactly 3 splits
|
||||
split_set s;
|
||||
ENSURE(eager(as, s));
|
||||
unsigned k = (unsigned)as_set(s).size();
|
||||
ENSURE(k == 3);
|
||||
|
||||
split_set ok_e, ok_l, bad_e, bad_l;
|
||||
ENSURE(eager(as, ok_e, k));
|
||||
ENSURE(lazy(as, ok_l, k));
|
||||
ENSURE(!eager(as, bad_e, k - 1)); // one below threshold; give up
|
||||
ENSURE(!lazy(as, bad_l, k - 1));
|
||||
}
|
||||
|
||||
void test_early_stop_after_two() {
|
||||
expr_ref as(re().mk_star(rng('a', 'a')), m); // 3 splits
|
||||
expr_ref node = m_split.make(as);
|
||||
ENSURE(node);
|
||||
seq_split::iterator it = m_split.iterate(node, split_mode::strong, UINT_MAX, {});
|
||||
expr_ref d(m), n(m);
|
||||
unsigned seen = 0;
|
||||
while (seen < 2 && it.next(d, n)) // pull two splits on demand, then stop
|
||||
++seen;
|
||||
ENSURE(!it.gave_up());
|
||||
ENSURE(seen == 2);
|
||||
}
|
||||
|
||||
void test_iterator_exhaustion() {
|
||||
// Pull every split on demand; gave_up() must stay false on a clean
|
||||
// exhaustion, and next() must keep returning false once drained.
|
||||
expr_ref as(re().mk_star(rng('a', 'a')), m); // 3 splits
|
||||
expr_ref node = m_split.make(as);
|
||||
ENSURE(node);
|
||||
seq_split::iterator it = m_split.iterate(node, split_mode::strong, UINT_MAX, {});
|
||||
expr_ref d(m), n(m);
|
||||
unsigned seen = 0;
|
||||
while (it.next(d, n))
|
||||
++seen;
|
||||
ENSURE(seen == 3);
|
||||
ENSURE(!it.gave_up());
|
||||
// idempotent past the end
|
||||
ENSURE(!it.next(d, n));
|
||||
ENSURE(!it.gave_up());
|
||||
}
|
||||
|
||||
void test_iterator_giveup() {
|
||||
// A threshold overrun aborts: next() returns false and gave_up() is true.
|
||||
expr_ref as(re().mk_star(rng('a', 'a')), m); // 3 splits, cap at 1
|
||||
expr_ref node = m_split.make(as);
|
||||
ENSURE(node);
|
||||
seq_split::iterator it = m_split.iterate(node, split_mode::strong, /*threshold*/ 1, {});
|
||||
expr_ref d(m), n(m);
|
||||
unsigned seen = 0;
|
||||
while (it.next(d, n))
|
||||
++seen;
|
||||
ENSURE(it.gave_up()); // aborted, not a clean exhaustion
|
||||
ENSURE(seen <= 1); // produced at most the capped number
|
||||
|
||||
// A weak-mode Boolean closure is likewise a give-up.
|
||||
expr_ref inter(re().mk_inter(re().mk_star(rng('a', 'a')), re().mk_star(rng('b', 'b'))), m);
|
||||
expr_ref inode = m_split.make(inter);
|
||||
ENSURE(inode);
|
||||
seq_split::iterator wit = m_split.iterate(inode, split_mode::weak, UINT_MAX, {});
|
||||
ENSURE(!wit.next(d, n));
|
||||
ENSURE(wit.gave_up());
|
||||
}
|
||||
|
||||
void test_simplify() {
|
||||
expr_ref regs[] = {
|
||||
expr_ref(re().mk_star(rng('a', 'a')), m),
|
||||
expr_ref(re().mk_complement(re().mk_star(rng('a', 'a'))), m),
|
||||
expr_ref(re().mk_concat(rng('a', 'a'), rng('b', 'b')), m),
|
||||
};
|
||||
for (auto& r : regs) {
|
||||
split_set s;
|
||||
ENSURE(eager(r, s));
|
||||
unsigned before = (unsigned)s.size();
|
||||
m_split.simplify(s);
|
||||
ENSURE(s.size() <= before);
|
||||
ENSURE(!s.empty());
|
||||
// idempotent
|
||||
split_set s2(s);
|
||||
m_split.simplify(s2);
|
||||
ENSURE(as_set(s) == as_set(s2));
|
||||
}
|
||||
}
|
||||
|
||||
void test_trivial_oracle() {
|
||||
expr_ref r(re().mk_star(rng('a', 'a')), m);
|
||||
split_oracle keep_all = [](expr*, expr*) { return true; };
|
||||
split_set s_no, s_yes;
|
||||
ENSURE(eager(r, s_no));
|
||||
ENSURE(eager(r, s_yes, UINT_MAX, split_mode::strong, keep_all));
|
||||
ENSURE(as_set(s_no) == as_set(s_yes));
|
||||
}
|
||||
|
||||
void run() {
|
||||
test_eager_epsilon();
|
||||
test_eager_char();
|
||||
test_eager_word();
|
||||
test_eager_union();
|
||||
test_agree_all();
|
||||
test_lazy_early_stop();
|
||||
test_threshold_giveup();
|
||||
test_weak_vs_strong();
|
||||
test_make_non_regex();
|
||||
test_oracle_prunes();
|
||||
test_eager_full_seq();
|
||||
test_eager_bottom();
|
||||
test_eager_empty_word();
|
||||
test_eager_star_content();
|
||||
test_eager_plus_content();
|
||||
test_eager_concat_content();
|
||||
test_nary_union();
|
||||
test_nary_concat();
|
||||
test_nested_complement();
|
||||
test_determinism();
|
||||
test_threshold_boundary();
|
||||
test_early_stop_after_two();
|
||||
test_iterator_exhaustion();
|
||||
test_iterator_giveup();
|
||||
test_simplify();
|
||||
test_trivial_oracle();
|
||||
}
|
||||
};
|
||||
|
||||
void tst_seq_split() {
|
||||
seq_split_test t;
|
||||
t.run();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue