mirror of
https://github.com/Z3Prover/z3
synced 2025-04-08 10:25:18 +00:00
sketch expr_inverter approach for eliminating unconstrained regex containment.
This commit is contained in:
parent
c7dfb619a2
commit
2dd4faf598
|
@ -881,36 +881,28 @@ public:
|
|||
}
|
||||
return false;
|
||||
case OP_SEQ_IN_RE:
|
||||
if (uncnstr(args[0]) && seq.re.is_ground(args[1])) {
|
||||
if (uncnstr(args[0]) && seq.re.is_ground(args[1]) && seq.is_string(args[0]->get_sort())) {
|
||||
#if 0
|
||||
//
|
||||
// requires auxiliary functions
|
||||
// is_empty
|
||||
// find_member
|
||||
expr* r = args[1];
|
||||
expr_ref not_r(seq.re.mk_complement(r), m);
|
||||
lbool emp_r = rw.is_empty(r);
|
||||
|
||||
if (emp_r == l_undef)
|
||||
// some_string_in_re.
|
||||
// A preliminary implementation exists in sls_seq_plugin.cpp
|
||||
// it should be moved to seq_rewriter and made agnostic to m_chars.
|
||||
// maybe use backtracking for better covereage: when some_string_in_re
|
||||
// fails it doesn't necessarily mean that the regex is empty.
|
||||
//
|
||||
zstring s1, s2;
|
||||
expr* re = args[1];
|
||||
expr_ref not_re(seq.re.mk_complement(re), m);
|
||||
if (!rw.some_string_in_re(re, s1) || !rw.some_string_in_re(not_re, s2))
|
||||
return false;
|
||||
if (emp_r == l_true) {
|
||||
r = m.mk_false();
|
||||
return true;
|
||||
}
|
||||
lbool emp_not_r = rw.is_empty(not_r);
|
||||
if (emp_not_r == l_true) {
|
||||
r = m.mk_true();
|
||||
return true;
|
||||
}
|
||||
if (emp_not_r == l_false) {
|
||||
mk_fresh_uncnstr_var_for(f, r);
|
||||
expr_ref witness1 = rw.find_member(r);
|
||||
expr_ref witness2 = rw.find_member(not_r);
|
||||
if (!witness1 || !witness2)
|
||||
return false;
|
||||
if (m_mc)
|
||||
add_def(args[0], m.mk_ite(r, witness1, witness2));
|
||||
return true;
|
||||
}
|
||||
|
||||
mk_fresh_uncnstr_var_for(f, r);
|
||||
expr_ref witness1 = expr_ref(seq.str.mk_string(s1), m);
|
||||
expr_ref witness2 = expr_ref(seq.str.mk_string(s2), m);
|
||||
if (m_mc)
|
||||
add_def(args[0], m.mk_ite(r, witness1, witness2));
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -6145,3 +6145,106 @@ void seq_rewriter::op_cache::cleanup() {
|
|||
STRACE("seq_verbose", tout << "Derivative op cache reset" << std::endl;);
|
||||
}
|
||||
}
|
||||
|
||||
bool seq_rewriter::some_string_in_re(expr* r, zstring& s) {
|
||||
sort* rs;
|
||||
(void)rs;
|
||||
SASSERT(re().is_re(r, rs) && m_util.is_string(rs));
|
||||
expr_mark visited;
|
||||
unsigned_vector str;
|
||||
expr_ref_vector pinned(m());
|
||||
if (!some_string_in_re(pinned, visited, r, str))
|
||||
return false;
|
||||
s = zstring(str.size(), str.data());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool seq_rewriter::some_string_in_re(expr_ref_vector& pinned, expr_mark& visited, expr* r, unsigned_vector& str) {
|
||||
if (visited.is_marked(r))
|
||||
return false;
|
||||
if (re().is_empty(r))
|
||||
return false;
|
||||
auto info = re().get_info(r);
|
||||
if (info.nullable == l_true)
|
||||
return true;
|
||||
visited.mark(r);
|
||||
pinned.push_back(r);
|
||||
if (re().is_union(r))
|
||||
return any_of(*to_app(r), [&](expr* arg) { return some_string_in_re(pinned, visited, arg, str); });
|
||||
|
||||
expr_ref r2 = mk_derivative(r);
|
||||
buffer<std::pair<unsigned, unsigned>> exclude;
|
||||
return append_char(pinned, visited, exclude, r2, str);
|
||||
}
|
||||
|
||||
bool seq_rewriter::append_char(expr_ref_vector& pinned, expr_mark& visited, buffer<std::pair<unsigned, unsigned>>& exclude, expr* r, unsigned_vector& str) {
|
||||
expr* c, * th, * el;
|
||||
if (re().is_empty(r))
|
||||
return false;
|
||||
if (re().is_union(r))
|
||||
return any_of(*to_app(r), [&](expr* arg) { return append_char(pinned, visited, exclude, arg, str); });
|
||||
if (m().is_ite(r, c, th, el)) {
|
||||
unsigned low = 0, high = zstring::unicode_max_char();
|
||||
if (get_bounds(c, low, high)) {
|
||||
SASSERT(low <= high);
|
||||
unsigned sz = str.size();
|
||||
str.push_back(low); // TODO: check that low is not in exclude
|
||||
if (some_string_in_re(pinned, visited, th, str))
|
||||
return true;
|
||||
str.shrink(sz);
|
||||
}
|
||||
if (re().is_empty(el))
|
||||
return false;
|
||||
exclude.push_back({ low, high });
|
||||
if (append_char(pinned, visited, exclude, el, str))
|
||||
return true;
|
||||
exclude.pop_back();
|
||||
}
|
||||
|
||||
if (is_ground(r)) {
|
||||
for (auto [l, h] : exclude)
|
||||
verbose_stream() << "exclude " << l << " " << h << "\n";
|
||||
str.push_back('a');
|
||||
if (some_string_in_re(pinned, visited, r, str))
|
||||
return true;
|
||||
str.pop_back();
|
||||
return false;
|
||||
}
|
||||
|
||||
verbose_stream() << "todo append_char " << mk_pp(r, m()) << "\n";
|
||||
// TODO: select characters from m_chars[ctx.rand(m_chars.size())]);
|
||||
str.push_back('a');
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool seq_rewriter::get_bounds(expr* e, unsigned& low, unsigned& high) {
|
||||
low = 0;
|
||||
high = zstring::unicode_max_char();
|
||||
ptr_buffer<expr> todo;
|
||||
todo.push_back(e);
|
||||
expr* x, * y;
|
||||
unsigned ch = 0;
|
||||
while (!todo.empty()) {
|
||||
e = todo.back();
|
||||
todo.pop_back();
|
||||
if (m().is_and(e))
|
||||
todo.append(to_app(e)->get_num_args(), to_app(e)->get_args());
|
||||
else if (m_util.is_char_le(e, x, y) && m_util.is_const_char(x, ch) && is_var(y))
|
||||
low = std::max(ch, low);
|
||||
else if (m_util.is_char_le(e, x, y) && m_util.is_const_char(y, ch) && is_var(x))
|
||||
high = std::min(ch, high);
|
||||
else if (m().is_eq(e, x, y) && is_var(x) && m_util.is_const_char(y, ch)) {
|
||||
low = std::max(ch, low);
|
||||
high = std::min(ch, high);
|
||||
}
|
||||
else if (m().is_eq(e, x, y) && is_var(y) && m_util.is_const_char(x, ch)) {
|
||||
low = std::max(ch, low);
|
||||
high = std::min(ch, high);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return low <= high;
|
||||
}
|
||||
|
||||
|
|
|
@ -353,6 +353,11 @@ class seq_rewriter {
|
|||
|
||||
void intersect(unsigned lo, unsigned hi, svector<std::pair<unsigned, unsigned>>& ranges);
|
||||
|
||||
bool get_bounds(expr* e, unsigned& low, unsigned& high);
|
||||
bool append_char(expr_ref_vector& pinned, expr_mark& visited, buffer<std::pair<unsigned, unsigned>>& exclude, expr* r, unsigned_vector& str);
|
||||
bool some_string_in_re(expr_ref_vector& pinned, expr_mark& visited, expr* r, unsigned_vector& str);
|
||||
|
||||
|
||||
public:
|
||||
seq_rewriter(ast_manager & m, params_ref const & p = params_ref()):
|
||||
m_util(m), m_autil(m), m_br(m, p), m_re2aut(m), m_op_cache(m), m_es(m),
|
||||
|
@ -432,5 +437,12 @@ public:
|
|||
expr_ref mk_regex_union_normalize(expr* r1, expr* r2);
|
||||
/* Apply simplifications to the intersection to keep it normalized (r1 and r2 are not normalized)*/
|
||||
expr_ref mk_regex_inter_normalize(expr* r1, expr* r2);
|
||||
|
||||
/*
|
||||
* Extract some string that is a member of r.
|
||||
* Return true if a valid string was extracted.
|
||||
* Return false when giving up or the regular expression is empty.
|
||||
*/
|
||||
bool some_string_in_re(expr* r, zstring& s);
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue