3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-10 10:57:15 +00:00

Port is_classical attribute to seq_util::rex::info (#9796)

`is_classical` (tracks whether a regex uses only classical operators —
no complement, intersection, diff, or empty-language/fail) was only
available on `euf::snode`. Moving it into `seq_util::rex::info` makes it
accessible to all regex-handling code without going through the snode
layer.

### Changes

**`seq_decl_plugin.h`**
- Added `bool classical { true }` to `seq_util::rex::info`
- The general `info` constructor requires `bool is_classical` explicitly
(no default)

**`seq_decl_plugin.cpp`**
- `mk_info_rec`: `OP_RE_EMPTY_SET` (fail) sets `classical=false`
- `mk_info_rec`: `OP_RE_RANGE`, `OP_RE_FULL_CHAR_SET`, `OP_RE_OF_PRED`
set `classical=false`
- `complement()`, `conj()` (intersection), `diff()`: always produce
`classical=false`
- `star()`, `plus()`, `opt()`, `concat()`, `disj()`, `orelse()`,
`loop()`: propagate `classical` via logical AND over operands
- `operator=` and `display()` updated to include `classical`

### Semantics

| Operation | `classical` |
|-----------|-------------|
| `re.empty` (fail) | `false` |
| `re.range`, `re.allchar`, `re.of.pred` | `false` |
| `re.comp` (complement) | `false` |
| `re.inter` (intersection) | `false` |
| `re.diff` | `false` |
| `re.all` (full sequence set) | `true` |
| `str.to.re` (string literal) | `true` |
| `re.*`, `re.+`, `re.opt`, `re.++`, `re.union`, `re.loop` | inherited
from operands |

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
Copilot 2026-06-09 14:35:48 -07:00 committed by GitHub
parent f0956a622f
commit d415ead6a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 28 additions and 17 deletions

View file

@ -1653,9 +1653,9 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
if (e->get_family_id() == u.get_family_id()) {
switch (e->get_decl()->get_decl_kind()) {
case OP_RE_EMPTY_SET:
return info(true, l_false, UINT_MAX);
return info(true, l_false, UINT_MAX, false);
case OP_RE_FULL_SEQ_SET:
return info(true, l_true, 0);
return info(true, l_true, 0, true);
case OP_RE_STAR:
i1 = get_info_rec(e->get_arg(0));
return i1.star();
@ -1667,7 +1667,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
case OP_RE_OF_PRED:
//TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
//TBD: check if the range is unsat
return info(true, l_false, 1);
return info(true, l_false, 1, false);
case OP_RE_CONCAT:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
@ -1684,7 +1684,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
min_length = u.str.min_length(e->get_arg(0));
is_value = m.is_value(e->get_arg(0));
nullable = (is_value && min_length == 0 ? l_true : (min_length > 0 ? l_false : l_undef));
return info(is_value, nullable, min_length);
return info(is_value, nullable, min_length, true);
case OP_RE_REVERSE:
return get_info_rec(e->get_arg(0));
case OP_RE_PLUS:
@ -1720,7 +1720,8 @@ std::ostream& seq_util::rex::info::display(std::ostream& out) const {
if (is_known()) {
out << "info("
<< "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", "
<< "min_length=" << min_length << ")";
<< "min_length=" << min_length << ", "
<< "classical=" << (classical ? "T" : "F") << ")";
}
else if (is_valid())
out << "UNKNOWN";
@ -1740,13 +1741,13 @@ std::string seq_util::rex::info::str() const {
seq_util::rex::info seq_util::rex::info::star() const {
//if is_known() is false then all mentioned properties will remain false
return seq_util::rex::info(interpreted, l_true, 0);
return seq_util::rex::info(interpreted, l_true, 0, classical);
}
seq_util::rex::info seq_util::rex::info::plus() const {
if (is_known()) {
//plus never occurs in a normalized regex
return info(interpreted, nullable, min_length);
return info(interpreted, nullable, min_length, classical);
}
else
return *this;
@ -1755,14 +1756,14 @@ seq_util::rex::info seq_util::rex::info::plus() const {
seq_util::rex::info seq_util::rex::info::opt() const {
// if is_known() is false then all mentioned properties will remain false
// optional construct never occurs in a normalized regex
return seq_util::rex::info(interpreted, l_true, 0);
return seq_util::rex::info(interpreted, l_true, 0, classical);
}
seq_util::rex::info seq_util::rex::info::complement() const {
if (is_known()) {
lbool compl_nullable = (nullable == l_true ? l_false : (nullable == l_false ? l_true : l_undef));
unsigned compl_min_length = (compl_nullable == l_false ? 1 : 0);
return info(interpreted, compl_nullable, compl_min_length);
return info(interpreted, compl_nullable, compl_min_length, false);
}
else
return *this;
@ -1776,7 +1777,8 @@ seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs,
m = UINT_MAX;
return info(interpreted && rhs.interpreted,
((nullable == l_false || rhs.nullable == l_false) ? l_false : ((nullable == l_true && rhs.nullable == l_true) ? l_true : l_undef)),
m);
m,
classical && rhs.classical);
}
else
return rhs;
@ -1790,7 +1792,8 @@ seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) co
//works correctly if one of the arguments is unknown
return info(interpreted && rhs.interpreted,
((nullable == l_true || rhs.nullable == l_true) ? l_true : ((nullable == l_false && rhs.nullable == l_false) ? l_false : l_undef)),
std::min(min_length, rhs.min_length));
std::min(min_length, rhs.min_length),
classical && rhs.classical);
}
else
return rhs;
@ -1801,7 +1804,8 @@ seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) co
if (rhs.is_known()) {
return info(interpreted && rhs.interpreted,
((nullable == l_true && rhs.nullable == l_true) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
std::max(min_length, rhs.min_length));
std::max(min_length, rhs.min_length),
false);
}
else
return rhs;
@ -1815,7 +1819,8 @@ seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) co
if (rhs.is_known()) {
return info(interpreted & rhs.interpreted,
((nullable == l_true && rhs.nullable == l_false) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
std::max(min_length, rhs.min_length));
std::max(min_length, rhs.min_length),
false);
}
else
return rhs;
@ -1832,7 +1837,8 @@ seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) co
// TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
return info(false,
((nullable == l_true && i.nullable == l_true) ? l_true : ((nullable == l_false && i.nullable == l_false) ? l_false : l_undef)),
std::min(min_length, i.min_length));
std::min(min_length, i.min_length),
classical && i.classical);
}
else
return i;
@ -1848,7 +1854,7 @@ seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) co
if (m > 0 && (m < min_length || m < lower))
m = UINT_MAX;
lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
return info(interpreted, loop_nullable, m);
return info(interpreted, loop_nullable, m, classical);
}
else
return *this;
@ -1863,6 +1869,7 @@ seq_util::rex::info& seq_util::rex::info::operator=(info const& other) {
interpreted = other.interpreted;
nullable = other.nullable;
min_length = other.min_length;
classical = other.classical;
return *this;
}

View file

@ -443,6 +443,8 @@ public:
lbool nullable { l_undef };
/* Lower bound on the length of all accepted words. */
unsigned min_length { 0 };
/* Classical regular expression: does not use complement, intersection, diff, or the empty language (fail). */
bool classical { true };
/*
Default constructor of invalid info.
@ -459,11 +461,13 @@ public:
*/
info(bool is_interpreted,
lbool is_nullable,
unsigned min_l) :
unsigned min_l,
bool is_classical) :
known(l_true),
interpreted(is_interpreted),
nullable(is_nullable),
min_length(min_l) {}
min_length(min_l),
classical(is_classical) {}
/*
Appends a string representation of the info into the stream.