3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-23 04:49:11 +00:00

Simplify regex splits

This commit is contained in:
CEisenhofer 2026-03-20 13:33:53 +01:00
parent 5d912bdfa5
commit 737c5d44ed
3 changed files with 32 additions and 77 deletions

View file

@ -524,7 +524,7 @@ namespace euf {
return n;
}
snode* sgraph::brzozowski_deriv(snode* re, snode* elem, snode* allowed_range) {
snode* sgraph::brzozowski_deriv(snode* re, snode* elem) {
expr* re_expr = re->get_expr();
expr* elem_expr = elem->get_expr();
SASSERT(re_expr);
@ -549,26 +549,7 @@ namespace euf {
// This avoids generating massive 'ite' structures for symbolic variables.
sort* seq_sort = nullptr, *ele_sort = nullptr;
if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) {
if (allowed_range && allowed_range->get_expr()) {
expr* range_expr = allowed_range->get_expr();
expr* lo = nullptr, *hi = nullptr;
if (m_seq.re.is_full_char(range_expr)) {
// For full char, we can't substitute a representative without losing info.
// Fallback to testing the symbolic character.
}
else if (m_seq.re.is_range(range_expr, lo, hi) && lo) {
expr* lo_ch = nullptr;
zstring zs;
if (m_seq.str.is_unit(lo, lo_ch))
elem_expr = lo_ch;
else if (m_seq.str.is_string(lo, zs) && zs.length() > 0)
elem_expr = m_seq.str.mk_char(zs[0]);
else
elem_expr = lo; // Use representative to take the derivative
}
}
// Fallback: If elem itself is a regex predicate, extract representative
else if (ele_sort != elem_expr->get_sort()) {
if (ele_sort != elem_expr->get_sort()) {
// std::cout << "Different sorts: " << ele_sort->get_name() << " vs " << elem_expr->get_sort()->get_name() << std::endl;
expr* lo = nullptr, *hi = nullptr;
if (m_seq.re.is_full_char(elem_expr))

View file

@ -142,7 +142,7 @@ namespace euf {
// Brzozowski derivative of regex re with respect to element elem.
// allowed_range can explicitly provide a concrete character or range to use
// for deriving symbolic variables.
snode* brzozowski_deriv(snode* re, snode* elem, snode* allowed_range = nullptr);
snode* brzozowski_deriv(snode* re, snode* elem);
// Decode a character expression that may be represented as a const-char,
// a unit string containing a const-char, or a one-character string literal.

View file

@ -1922,66 +1922,40 @@ namespace seq {
euf::snode_vector minterms;
sg.compute_minterms(mem.m_regex, minterms);
VERIFY(!minterms.empty());
euf::snode* uniform = nullptr;
bool is_uniform = true;
for (euf::snode* mt : minterms) {
if (!mt || mt->is_fail())
continue;
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
if (!deriv) {
is_uniform = false;
break;
}
if (!uniform)
uniform = deriv;
else if (uniform->id() != deriv->id()) {
is_uniform = false;
break;
}
}
if (is_uniform && uniform) {
if (uniform->is_fail()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::regex;
return simplify_result::conflict;
}
mem.m_str = sg.drop_left(mem.m_str, 1);
mem.m_regex = uniform;
mem.m_history = sg.mk_concat(mem.m_history, tok);
continue;
}
// Uniform derivative failed — try char_range subset approach.
// try char_range subset approach.
// If the symbolic char has a char_range constraint and that
// range is a subset of exactly one minterm's character class,
// we can deterministically take that minterm's derivative.
if (m_char_ranges.contains(tok->id()) && m_graph.m_parikh) {
char_set const& cs = m_char_ranges[tok->id()];
if (!cs.is_empty()) {
euf::snode* matching_deriv = nullptr;
bool found = false;
for (euf::snode* mt : minterms) {
if (!mt || mt->is_fail()) continue;
if (!mt->get_expr()) continue;
char_set mt_cs = m_graph.m_seq_regex->minterm_to_char_set(mt->get_expr());
if (cs.is_subset(mt_cs)) {
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
if (!deriv) { found = false; break; }
if (deriv->is_fail()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::regex;
return simplify_result::conflict;
}
matching_deriv = deriv;
found = true;
break;
SASSERT(m_graph.m_parikh);
char_set const& cs = m_char_ranges.contains(tok->id())
? m_char_ranges[tok->id()]
: char_set::full(zstring::max_char());
if (!cs.is_empty()) {
euf::snode* matching_deriv = nullptr;
bool found = false;
for (euf::snode* mt : minterms) {
SASSERT(mt && mt->get_expr());
SASSERT(!mt->is_fail());
char_set mt_cs = m_graph.m_seq_regex->minterm_to_char_set(mt->get_expr());
if (cs.is_subset(mt_cs)) {
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
if (!deriv) { found = false; break; }
if (deriv->is_fail()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::regex;
return simplify_result::conflict;
}
matching_deriv = deriv;
found = true;
break;
}
if (found && matching_deriv) {
mem.m_str = sg.drop_left(mem.m_str, 1);
mem.m_regex = matching_deriv;
mem.m_history = sg.mk_concat(mem.m_history, tok);
continue;
}
}
if (found && matching_deriv) {
mem.m_str = sg.drop_left(mem.m_str, 1);
mem.m_regex = matching_deriv;
mem.m_history = sg.mk_concat(mem.m_history, tok);
continue;
}
}
break;