mirror of
https://github.com/Z3Prover/z3
synced 2026-06-19 15:16:29 +00:00
Recognise (str.to_re X) as CHARCLASS when X is a single character
Extend as_charclass to accept (str.to_re s) with s a length-1 string or (str.to_re (seq.unit c)) with c a constant character. These reduce to the singleton range [c, c] and let mixed unions like (re.union (re.range "a" "z") (str.to_re "_")) collapse into a single canonical CHARCLASS. Multi-character str.to_re (which is a concatenation of length-1 regex nodes) and empty str.to_re (which is epsilon and has min-length 0) are deliberately excluded. On the regex-equivalence corpus this lifts wall time from +0.23% to -1.76% vs master and net decisions from +1 to +2. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
23edf638b7
commit
b9010f20cd
1 changed files with 17 additions and 0 deletions
|
|
@ -1223,6 +1223,23 @@ bool seq_util::rex::as_charclass(expr* r, seq::range_predicate& p) const {
|
|||
return false;
|
||||
}
|
||||
expr* e1 = nullptr, *e2 = nullptr;
|
||||
// (str.to_re s) is a CHARCLASS only when s is a single character (length 1).
|
||||
// For longer strings it is a concat of length-1 regexes; for length 0 it is
|
||||
// epsilon, which is not a charclass.
|
||||
if (is_to_re(r, e1)) {
|
||||
zstring s;
|
||||
if (u.str.is_string(e1, s) && s.length() == 1) {
|
||||
p = seq::range_predicate::range(s[0], s[0], mx);
|
||||
return true;
|
||||
}
|
||||
expr* c = nullptr;
|
||||
unsigned ch = 0;
|
||||
if (u.str.is_unit(e1, c) && u.is_const_char(c, ch)) {
|
||||
p = seq::range_predicate::range(ch, ch, mx);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (is_range(r, e1, e2)) {
|
||||
zstring s1, s2;
|
||||
if (u.str.is_string(e1, s1) && u.str.is_string(e2, s2)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue