mirror of
https://github.com/Z3Prover/z3
synced 2026-03-23 04:49:11 +00:00
Simplify regex splits
This commit is contained in:
parent
5d912bdfa5
commit
737c5d44ed
3 changed files with 32 additions and 77 deletions
|
|
@ -524,7 +524,7 @@ namespace euf {
|
|||
return n;
|
||||
}
|
||||
|
||||
snode* sgraph::brzozowski_deriv(snode* re, snode* elem, snode* allowed_range) {
|
||||
snode* sgraph::brzozowski_deriv(snode* re, snode* elem) {
|
||||
expr* re_expr = re->get_expr();
|
||||
expr* elem_expr = elem->get_expr();
|
||||
SASSERT(re_expr);
|
||||
|
|
@ -549,26 +549,7 @@ namespace euf {
|
|||
// This avoids generating massive 'ite' structures for symbolic variables.
|
||||
sort* seq_sort = nullptr, *ele_sort = nullptr;
|
||||
if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) {
|
||||
if (allowed_range && allowed_range->get_expr()) {
|
||||
expr* range_expr = allowed_range->get_expr();
|
||||
expr* lo = nullptr, *hi = nullptr;
|
||||
if (m_seq.re.is_full_char(range_expr)) {
|
||||
// For full char, we can't substitute a representative without losing info.
|
||||
// Fallback to testing the symbolic character.
|
||||
}
|
||||
else if (m_seq.re.is_range(range_expr, lo, hi) && lo) {
|
||||
expr* lo_ch = nullptr;
|
||||
zstring zs;
|
||||
if (m_seq.str.is_unit(lo, lo_ch))
|
||||
elem_expr = lo_ch;
|
||||
else if (m_seq.str.is_string(lo, zs) && zs.length() > 0)
|
||||
elem_expr = m_seq.str.mk_char(zs[0]);
|
||||
else
|
||||
elem_expr = lo; // Use representative to take the derivative
|
||||
}
|
||||
}
|
||||
// Fallback: If elem itself is a regex predicate, extract representative
|
||||
else if (ele_sort != elem_expr->get_sort()) {
|
||||
if (ele_sort != elem_expr->get_sort()) {
|
||||
// std::cout << "Different sorts: " << ele_sort->get_name() << " vs " << elem_expr->get_sort()->get_name() << std::endl;
|
||||
expr* lo = nullptr, *hi = nullptr;
|
||||
if (m_seq.re.is_full_char(elem_expr))
|
||||
|
|
|
|||
|
|
@ -142,7 +142,7 @@ namespace euf {
|
|||
// Brzozowski derivative of regex re with respect to element elem.
|
||||
// allowed_range can explicitly provide a concrete character or range to use
|
||||
// for deriving symbolic variables.
|
||||
snode* brzozowski_deriv(snode* re, snode* elem, snode* allowed_range = nullptr);
|
||||
snode* brzozowski_deriv(snode* re, snode* elem);
|
||||
|
||||
// Decode a character expression that may be represented as a const-char,
|
||||
// a unit string containing a const-char, or a one-character string literal.
|
||||
|
|
|
|||
|
|
@ -1922,66 +1922,40 @@ namespace seq {
|
|||
euf::snode_vector minterms;
|
||||
sg.compute_minterms(mem.m_regex, minterms);
|
||||
VERIFY(!minterms.empty());
|
||||
euf::snode* uniform = nullptr;
|
||||
bool is_uniform = true;
|
||||
for (euf::snode* mt : minterms) {
|
||||
if (!mt || mt->is_fail())
|
||||
continue;
|
||||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
||||
if (!deriv) {
|
||||
is_uniform = false;
|
||||
break;
|
||||
}
|
||||
if (!uniform)
|
||||
uniform = deriv;
|
||||
else if (uniform->id() != deriv->id()) {
|
||||
is_uniform = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (is_uniform && uniform) {
|
||||
if (uniform->is_fail()) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::regex;
|
||||
return simplify_result::conflict;
|
||||
}
|
||||
mem.m_str = sg.drop_left(mem.m_str, 1);
|
||||
mem.m_regex = uniform;
|
||||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
||||
continue;
|
||||
}
|
||||
// Uniform derivative failed — try char_range subset approach.
|
||||
// try char_range subset approach.
|
||||
// If the symbolic char has a char_range constraint and that
|
||||
// range is a subset of exactly one minterm's character class,
|
||||
// we can deterministically take that minterm's derivative.
|
||||
if (m_char_ranges.contains(tok->id()) && m_graph.m_parikh) {
|
||||
char_set const& cs = m_char_ranges[tok->id()];
|
||||
if (!cs.is_empty()) {
|
||||
euf::snode* matching_deriv = nullptr;
|
||||
bool found = false;
|
||||
for (euf::snode* mt : minterms) {
|
||||
if (!mt || mt->is_fail()) continue;
|
||||
if (!mt->get_expr()) continue;
|
||||
char_set mt_cs = m_graph.m_seq_regex->minterm_to_char_set(mt->get_expr());
|
||||
if (cs.is_subset(mt_cs)) {
|
||||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
||||
if (!deriv) { found = false; break; }
|
||||
if (deriv->is_fail()) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::regex;
|
||||
return simplify_result::conflict;
|
||||
}
|
||||
matching_deriv = deriv;
|
||||
found = true;
|
||||
break;
|
||||
SASSERT(m_graph.m_parikh);
|
||||
char_set const& cs = m_char_ranges.contains(tok->id())
|
||||
? m_char_ranges[tok->id()]
|
||||
: char_set::full(zstring::max_char());
|
||||
|
||||
if (!cs.is_empty()) {
|
||||
euf::snode* matching_deriv = nullptr;
|
||||
bool found = false;
|
||||
for (euf::snode* mt : minterms) {
|
||||
SASSERT(mt && mt->get_expr());
|
||||
SASSERT(!mt->is_fail());
|
||||
char_set mt_cs = m_graph.m_seq_regex->minterm_to_char_set(mt->get_expr());
|
||||
if (cs.is_subset(mt_cs)) {
|
||||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
||||
if (!deriv) { found = false; break; }
|
||||
if (deriv->is_fail()) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::regex;
|
||||
return simplify_result::conflict;
|
||||
}
|
||||
matching_deriv = deriv;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
if (found && matching_deriv) {
|
||||
mem.m_str = sg.drop_left(mem.m_str, 1);
|
||||
mem.m_regex = matching_deriv;
|
||||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (found && matching_deriv) {
|
||||
mem.m_str = sg.drop_left(mem.m_str, 1);
|
||||
mem.m_regex = matching_deriv;
|
||||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue