3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-26 10:28:48 +00:00

fix(seq): reject non-character regexes in regex_to_range_predicate

Guard regex_to_range_predicate so it only collapses regexes whose
element sort is a character sort. Previously a regex over a non-char
sequence sort (e.g. (re (Seq Int))) could be silently mis-collapsed
into bogus [0, max_char] ranges. Add a negative unit test covering
re.empty/re.full_char over (Seq Int).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Margus Veanes 2026-06-25 22:42:26 +03:00
parent ee20c9963b
commit 2ecdd15e0f
2 changed files with 26 additions and 0 deletions

View file

@ -22,6 +22,17 @@ Authors:
namespace seq {
bool regex_to_range_predicate(seq_util& u, expr* r, range_predicate& out) {
// The range algebra only models sets of single characters over the
// unsigned character domain [0, max_char]. Guard against any regex
// whose element type is not a sequence of characters (e.g. a regex
// over (Seq Int) or (Seq (Seq Char))): for such regexes the
// re.range/re.union/... matchers below would silently fabricate a
// character-class predicate and change semantics. Reject them up
// front so callers fall back to the generic regex path.
sort* seq_sort = nullptr;
if (!u.is_re(r, seq_sort) || !u.is_string(seq_sort))
return false;
unsigned const max_char = u.max_char();
auto& re = u.re;

View file

@ -10,6 +10,7 @@ Module Name:
#include "ast/rewriter/regex_range_collapse.h"
#include "ast/reg_decl_plugins.h"
#include "ast/ast_pp.h"
#include "ast/arith_decl_plugin.h"
#include "util/util.h"
#include <iostream>
@ -151,6 +152,20 @@ namespace {
"re.* of range not translatable");
}
// Negative: a regex whose element type is NOT a sequence of
// characters (here (Seq Int)) must be rejected outright, even for
// shapes that structurally resemble char-class operators.
{
range_predicate p(M);
arith_util a(m);
sort* int_seq = u.str.mk_seq(a.mk_int());
sort* int_re = u.re.mk_re(int_seq);
check(!regex_to_range_predicate(u, u.re.mk_empty(int_re), p),
"re.empty over (Seq Int) is NOT a char class");
check(!regex_to_range_predicate(u, u.re.mk_full_char(int_re), p),
"re.full_char over (Seq Int) is NOT a char class");
}
// ---- materialization round-trip ----
// empty -> re.empty