mirror of
https://github.com/Z3Prover/z3
synced 2026-07-04 06:16:09 +00:00
Simplify regex splits
This commit is contained in:
parent
5d912bdfa5
commit
737c5d44ed
3 changed files with 32 additions and 77 deletions
|
|
@ -524,7 +524,7 @@ namespace euf {
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
snode* sgraph::brzozowski_deriv(snode* re, snode* elem, snode* allowed_range) {
|
snode* sgraph::brzozowski_deriv(snode* re, snode* elem) {
|
||||||
expr* re_expr = re->get_expr();
|
expr* re_expr = re->get_expr();
|
||||||
expr* elem_expr = elem->get_expr();
|
expr* elem_expr = elem->get_expr();
|
||||||
SASSERT(re_expr);
|
SASSERT(re_expr);
|
||||||
|
|
@ -549,26 +549,7 @@ namespace euf {
|
||||||
// This avoids generating massive 'ite' structures for symbolic variables.
|
// This avoids generating massive 'ite' structures for symbolic variables.
|
||||||
sort* seq_sort = nullptr, *ele_sort = nullptr;
|
sort* seq_sort = nullptr, *ele_sort = nullptr;
|
||||||
if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) {
|
if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) {
|
||||||
if (allowed_range && allowed_range->get_expr()) {
|
if (ele_sort != elem_expr->get_sort()) {
|
||||||
expr* range_expr = allowed_range->get_expr();
|
|
||||||
expr* lo = nullptr, *hi = nullptr;
|
|
||||||
if (m_seq.re.is_full_char(range_expr)) {
|
|
||||||
// For full char, we can't substitute a representative without losing info.
|
|
||||||
// Fallback to testing the symbolic character.
|
|
||||||
}
|
|
||||||
else if (m_seq.re.is_range(range_expr, lo, hi) && lo) {
|
|
||||||
expr* lo_ch = nullptr;
|
|
||||||
zstring zs;
|
|
||||||
if (m_seq.str.is_unit(lo, lo_ch))
|
|
||||||
elem_expr = lo_ch;
|
|
||||||
else if (m_seq.str.is_string(lo, zs) && zs.length() > 0)
|
|
||||||
elem_expr = m_seq.str.mk_char(zs[0]);
|
|
||||||
else
|
|
||||||
elem_expr = lo; // Use representative to take the derivative
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Fallback: If elem itself is a regex predicate, extract representative
|
|
||||||
else if (ele_sort != elem_expr->get_sort()) {
|
|
||||||
// std::cout << "Different sorts: " << ele_sort->get_name() << " vs " << elem_expr->get_sort()->get_name() << std::endl;
|
// std::cout << "Different sorts: " << ele_sort->get_name() << " vs " << elem_expr->get_sort()->get_name() << std::endl;
|
||||||
expr* lo = nullptr, *hi = nullptr;
|
expr* lo = nullptr, *hi = nullptr;
|
||||||
if (m_seq.re.is_full_char(elem_expr))
|
if (m_seq.re.is_full_char(elem_expr))
|
||||||
|
|
|
||||||
|
|
@ -142,7 +142,7 @@ namespace euf {
|
||||||
// Brzozowski derivative of regex re with respect to element elem.
|
// Brzozowski derivative of regex re with respect to element elem.
|
||||||
// allowed_range can explicitly provide a concrete character or range to use
|
// allowed_range can explicitly provide a concrete character or range to use
|
||||||
// for deriving symbolic variables.
|
// for deriving symbolic variables.
|
||||||
snode* brzozowski_deriv(snode* re, snode* elem, snode* allowed_range = nullptr);
|
snode* brzozowski_deriv(snode* re, snode* elem);
|
||||||
|
|
||||||
// Decode a character expression that may be represented as a const-char,
|
// Decode a character expression that may be represented as a const-char,
|
||||||
// a unit string containing a const-char, or a one-character string literal.
|
// a unit string containing a const-char, or a one-character string literal.
|
||||||
|
|
|
||||||
|
|
@ -1922,66 +1922,40 @@ namespace seq {
|
||||||
euf::snode_vector minterms;
|
euf::snode_vector minterms;
|
||||||
sg.compute_minterms(mem.m_regex, minterms);
|
sg.compute_minterms(mem.m_regex, minterms);
|
||||||
VERIFY(!minterms.empty());
|
VERIFY(!minterms.empty());
|
||||||
euf::snode* uniform = nullptr;
|
// try char_range subset approach.
|
||||||
bool is_uniform = true;
|
|
||||||
for (euf::snode* mt : minterms) {
|
|
||||||
if (!mt || mt->is_fail())
|
|
||||||
continue;
|
|
||||||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
|
||||||
if (!deriv) {
|
|
||||||
is_uniform = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!uniform)
|
|
||||||
uniform = deriv;
|
|
||||||
else if (uniform->id() != deriv->id()) {
|
|
||||||
is_uniform = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (is_uniform && uniform) {
|
|
||||||
if (uniform->is_fail()) {
|
|
||||||
m_is_general_conflict = true;
|
|
||||||
m_reason = backtrack_reason::regex;
|
|
||||||
return simplify_result::conflict;
|
|
||||||
}
|
|
||||||
mem.m_str = sg.drop_left(mem.m_str, 1);
|
|
||||||
mem.m_regex = uniform;
|
|
||||||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Uniform derivative failed — try char_range subset approach.
|
|
||||||
// If the symbolic char has a char_range constraint and that
|
// If the symbolic char has a char_range constraint and that
|
||||||
// range is a subset of exactly one minterm's character class,
|
// range is a subset of exactly one minterm's character class,
|
||||||
// we can deterministically take that minterm's derivative.
|
// we can deterministically take that minterm's derivative.
|
||||||
if (m_char_ranges.contains(tok->id()) && m_graph.m_parikh) {
|
SASSERT(m_graph.m_parikh);
|
||||||
char_set const& cs = m_char_ranges[tok->id()];
|
char_set const& cs = m_char_ranges.contains(tok->id())
|
||||||
if (!cs.is_empty()) {
|
? m_char_ranges[tok->id()]
|
||||||
euf::snode* matching_deriv = nullptr;
|
: char_set::full(zstring::max_char());
|
||||||
bool found = false;
|
|
||||||
for (euf::snode* mt : minterms) {
|
if (!cs.is_empty()) {
|
||||||
if (!mt || mt->is_fail()) continue;
|
euf::snode* matching_deriv = nullptr;
|
||||||
if (!mt->get_expr()) continue;
|
bool found = false;
|
||||||
char_set mt_cs = m_graph.m_seq_regex->minterm_to_char_set(mt->get_expr());
|
for (euf::snode* mt : minterms) {
|
||||||
if (cs.is_subset(mt_cs)) {
|
SASSERT(mt && mt->get_expr());
|
||||||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
SASSERT(!mt->is_fail());
|
||||||
if (!deriv) { found = false; break; }
|
char_set mt_cs = m_graph.m_seq_regex->minterm_to_char_set(mt->get_expr());
|
||||||
if (deriv->is_fail()) {
|
if (cs.is_subset(mt_cs)) {
|
||||||
m_is_general_conflict = true;
|
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
||||||
m_reason = backtrack_reason::regex;
|
if (!deriv) { found = false; break; }
|
||||||
return simplify_result::conflict;
|
if (deriv->is_fail()) {
|
||||||
}
|
m_is_general_conflict = true;
|
||||||
matching_deriv = deriv;
|
m_reason = backtrack_reason::regex;
|
||||||
found = true;
|
return simplify_result::conflict;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
matching_deriv = deriv;
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (found && matching_deriv) {
|
}
|
||||||
mem.m_str = sg.drop_left(mem.m_str, 1);
|
if (found && matching_deriv) {
|
||||||
mem.m_regex = matching_deriv;
|
mem.m_str = sg.drop_left(mem.m_str, 1);
|
||||||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
mem.m_regex = matching_deriv;
|
||||||
continue;
|
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
||||||
}
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue