mirror of
https://github.com/Z3Prover/z3
synced 2025-04-08 10:25:18 +00:00
Regex membership (#7506)
* Make finding a word in the regex iterative * Fixed gc problem
This commit is contained in:
parent
9a237d55ca
commit
c572fc2e4f
|
@ -883,10 +883,13 @@ public:
|
|||
case OP_SEQ_IN_RE:
|
||||
if (uncnstr(args[0]) && seq.re.is_ground(args[1]) && seq.is_string(args[0]->get_sort())) {
|
||||
#if 1
|
||||
zstring s1, s2;
|
||||
zstring s1;
|
||||
expr* re = args[1];
|
||||
if (!rw.some_string_in_re(re, s1))
|
||||
return false;
|
||||
zstring s2;
|
||||
expr_ref not_re(seq.re.mk_complement(re), m);
|
||||
if (!rw.some_string_in_re(re, s1) || !rw.some_string_in_re(not_re, s2))
|
||||
if (!rw.some_string_in_re(not_re, s2))
|
||||
return false;
|
||||
|
||||
mk_fresh_uncnstr_var_for(f, r);
|
||||
|
|
|
@ -6152,91 +6152,115 @@ bool seq_rewriter::some_string_in_re(expr* r, zstring& s) {
|
|||
// SASSERT(re().is_re(r, rs) && m_util.is_string(rs));
|
||||
expr_mark visited;
|
||||
unsigned_vector str;
|
||||
expr_ref_vector pinned(m());
|
||||
|
||||
if (!some_string_in_re(pinned, visited, r, str))
|
||||
if (!some_string_in_re(visited, r, str))
|
||||
return false;
|
||||
s = zstring(str.size(), str.data());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool seq_rewriter::some_string_in_re(expr_ref_vector& pinned, expr_mark& visited, expr* r, unsigned_vector& str) {
|
||||
if (visited.is_marked(r))
|
||||
return false;
|
||||
if (re().is_empty(r))
|
||||
return false;
|
||||
auto info = re().get_info(r);
|
||||
if (info.nullable == l_true)
|
||||
return true;
|
||||
visited.mark(r);
|
||||
pinned.push_back(r);
|
||||
if (re().is_union(r))
|
||||
return any_of(*to_app(r), [&](expr* arg) { return some_string_in_re(pinned, visited, arg, str); });
|
||||
|
||||
expr_ref r2 = mk_derivative(r);
|
||||
struct re_eval_pos {
|
||||
expr_ref e; // use reference to avoid gc
|
||||
unsigned str_len;
|
||||
buffer<std::pair<unsigned, unsigned>> exclude;
|
||||
return append_char(pinned, visited, exclude, r2, str);
|
||||
}
|
||||
bool needs_derivation;
|
||||
};
|
||||
|
||||
bool seq_rewriter::append_char(expr_ref_vector& pinned, expr_mark& visited, buffer<std::pair<unsigned, unsigned>>& exclude, expr* r, unsigned_vector& str) {
|
||||
expr* c, * th, * el;
|
||||
if (re().is_empty(r))
|
||||
return false;
|
||||
if (re().is_union(r))
|
||||
return any_of(*to_app(r), [&](expr* arg) { return append_char(pinned, visited, exclude, arg, str); });
|
||||
if (m().is_ite(r, c, th, el)) {
|
||||
unsigned low = 0, high = zstring::unicode_max_char();
|
||||
if (get_bounds(c, low, high)) {
|
||||
SASSERT(low <= high);
|
||||
str.push_back(low); // ASSERT: low .. high does not intersect with exclude
|
||||
if (some_string_in_re(pinned, visited, th, str))
|
||||
return true;
|
||||
str.pop_back();
|
||||
}
|
||||
if (re().is_empty(el))
|
||||
return false;
|
||||
exclude.push_back({ low, high });
|
||||
if (append_char(pinned, visited, exclude, el, str))
|
||||
return true;
|
||||
exclude.pop_back();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_ground(r)) {
|
||||
// ensure selected character is not in exclude
|
||||
unsigned ch = 'a';
|
||||
bool wrapped = false;
|
||||
while (true) {
|
||||
bool found = false;
|
||||
for (auto [l, h] : exclude) {
|
||||
if (l <= ch && ch <= h) {
|
||||
found = true;
|
||||
ch = h + 1;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
break;
|
||||
if (ch != zstring::unicode_max_char() + 1)
|
||||
bool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vector& str) {
|
||||
SASSERT(str.empty());
|
||||
vector<re_eval_pos> todo;
|
||||
todo.push_back({ expr_ref(r, m()), 0, {}, true });
|
||||
while (!todo.empty()) {
|
||||
re_eval_pos current = todo.back();
|
||||
todo.pop_back();
|
||||
r = current.e;
|
||||
str.resize(current.str_len);
|
||||
if (current.needs_derivation) {
|
||||
SASSERT(current.exclude.empty());
|
||||
// We are looking for the next character => generate derivation
|
||||
if (visited.is_marked(r))
|
||||
continue;
|
||||
if (wrapped)
|
||||
return false;
|
||||
ch = 0;
|
||||
wrapped = true;
|
||||
}
|
||||
str.push_back(ch);
|
||||
if (some_string_in_re(pinned, visited, r, str))
|
||||
return true;
|
||||
str.pop_back();
|
||||
return false;
|
||||
}
|
||||
if (re().is_empty(r))
|
||||
continue;
|
||||
auto info = re().get_info(r);
|
||||
if (info.nullable == l_true)
|
||||
return true;
|
||||
visited.mark(r);
|
||||
if (re().is_union(r)) {
|
||||
for (expr* arg : *to_app(r)) {
|
||||
todo.push_back({ expr_ref(arg, m()), str.size(), {}, true });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
verbose_stream() << "todo append_char " << mk_pp(r, m()) << "\n";
|
||||
// TODO: select characters from m_chars[ctx.rand(m_chars.size())]);
|
||||
str.push_back('a');
|
||||
r = mk_derivative(r);
|
||||
}
|
||||
// otw. we are still in the process of deciding case of the derivation to take
|
||||
|
||||
buffer<std::pair<unsigned, unsigned>> exclude = std::move(current.exclude);
|
||||
|
||||
expr* c, * th, * el;
|
||||
if (re().is_empty(r))
|
||||
continue;
|
||||
if (re().is_union(r)) {
|
||||
for (expr* arg : *to_app(r)) {
|
||||
todo.push_back({ expr_ref(arg, m()), str.size(), exclude, false });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (m().is_ite(r, c, th, el)) {
|
||||
unsigned low = 0, high = zstring::unicode_max_char();
|
||||
bool hasBounds = get_bounds(c, low, high);
|
||||
if (!re().is_empty(el)) {
|
||||
exclude.push_back({ low, high });
|
||||
todo.push_back({ expr_ref(el, m()), str.size(), std::move(exclude), false });
|
||||
}
|
||||
if (hasBounds) {
|
||||
// I want this case to be processed first => push it last
|
||||
// reason: current string is only pruned
|
||||
SASSERT(low <= high);
|
||||
str.push_back(low); // ASSERT: low .. high does not intersect with exclude
|
||||
todo.push_back({ expr_ref(th, m()), str.size(), {}, true });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_ground(r)) {
|
||||
// ensure selected character is not in exclude
|
||||
unsigned ch = 'a';
|
||||
bool wrapped = false;
|
||||
bool failed = false;
|
||||
while (true) {
|
||||
bool found = false;
|
||||
for (auto [l, h] : exclude) {
|
||||
if (l <= ch && ch <= h) {
|
||||
found = true;
|
||||
ch = h + 1;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
break;
|
||||
if (ch != zstring::unicode_max_char() + 1)
|
||||
continue;
|
||||
if (wrapped) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
ch = 0;
|
||||
wrapped = true;
|
||||
}
|
||||
if (failed)
|
||||
continue;
|
||||
str.push_back(ch);
|
||||
todo.push_back({ expr_ref(r, m()), str.size(), {}, true });
|
||||
continue;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool seq_rewriter::get_bounds(expr* e, unsigned& low, unsigned& high) {
|
||||
low = 0;
|
||||
high = zstring::unicode_max_char();
|
||||
|
|
|
@ -354,9 +354,7 @@ class seq_rewriter {
|
|||
void intersect(unsigned lo, unsigned hi, svector<std::pair<unsigned, unsigned>>& ranges);
|
||||
|
||||
bool get_bounds(expr* e, unsigned& low, unsigned& high);
|
||||
bool append_char(expr_ref_vector& pinned, expr_mark& visited, buffer<std::pair<unsigned, unsigned>>& exclude, expr* r, unsigned_vector& str);
|
||||
bool some_string_in_re(expr_ref_vector& pinned, expr_mark& visited, expr* r, unsigned_vector& str);
|
||||
|
||||
bool some_string_in_re(expr_mark& visited, expr* r, unsigned_vector& str);
|
||||
|
||||
public:
|
||||
seq_rewriter(ast_manager & m, params_ref const & p = params_ref()):
|
||||
|
|
|
@ -1847,7 +1847,7 @@ namespace sls {
|
|||
|
||||
{
|
||||
zstring s1;
|
||||
if (ctx.is_true(e) && some_string_in_re(y, s1)) {
|
||||
if (ctx.is_true(e) && rw.some_string_in_re(y, s1)) {
|
||||
m_str_updates.push_back({ x, s1, 1 });
|
||||
return apply_update();
|
||||
}
|
||||
|
@ -2001,92 +2001,4 @@ namespace sls {
|
|||
choose(r2, k - 1, prefix2, result);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Find some string that is a member of regex
|
||||
//
|
||||
bool seq_plugin::some_string_in_re(expr* _r, zstring& s) {
|
||||
expr_ref r(_r, m);
|
||||
seq_rewriter rw(m);
|
||||
while (true) {
|
||||
if (seq.re.is_empty(r))
|
||||
return false;
|
||||
auto info = seq.re.get_info(r);
|
||||
if (info.nullable == l_true)
|
||||
return true;
|
||||
expr_ref r2 = rw.mk_derivative(r);
|
||||
if (!append_char(r, r2, s))
|
||||
return false;
|
||||
r = r2;
|
||||
}
|
||||
}
|
||||
|
||||
bool seq_plugin::get_bounds(expr* e, unsigned& low, unsigned& high) {
|
||||
// TODO: remove recursion (though it is probably not deep anyway)
|
||||
expr* x, * y;
|
||||
unsigned ch = 0;
|
||||
if (m.is_and(e, x, y)) {
|
||||
if (!get_bounds(x, low, high))
|
||||
return false;
|
||||
return get_bounds(y, low, high);
|
||||
}
|
||||
if (m.is_eq(e, x, y)) {
|
||||
if ((is_var(x) && seq.is_const_char(y, ch)) || (is_var(y) && seq.is_const_char(x, ch))) {
|
||||
if (ch < low || ch > high)
|
||||
return false;
|
||||
low = high = ch;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (seq.is_char_le(e, x, y)) {
|
||||
if (seq.is_const_char(x, ch)) {
|
||||
low = std::max(ch, low);
|
||||
return high >= low;
|
||||
}
|
||||
if (seq.is_const_char(y, ch)) {
|
||||
high = std::min(ch, high);
|
||||
return high >= low;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool seq_plugin::append_char(expr* r0, expr_ref& r, zstring& s) {
|
||||
expr* c, * th, * el;
|
||||
if (seq.re.is_union(r)) {
|
||||
auto info0 = seq.re.get_info(r0);
|
||||
for (expr* r1 : *to_app(r)) {
|
||||
auto info1 = seq.re.get_info(r1);
|
||||
if (r0 == r1)
|
||||
continue;
|
||||
if (info1.min_length < info0.min_length) {
|
||||
r = r1;
|
||||
return append_char(r0, r, s);
|
||||
}
|
||||
}
|
||||
expr* r2;
|
||||
do {
|
||||
r2 = to_app(r)->get_arg(ctx.rand(to_app(r)->get_num_args()));
|
||||
} while (r2 == r0);
|
||||
r = r2;
|
||||
// Just take one that is not a self loop (there is always such one element)
|
||||
return append_char(r0, r, s);
|
||||
}
|
||||
if (m.is_ite(r, c, th, el)) {
|
||||
unsigned low = 0, high = UINT_MAX;
|
||||
if (get_bounds(c, low, high)) {
|
||||
SASSERT(low <= high);
|
||||
s += zstring(low);
|
||||
r = th;
|
||||
return true;
|
||||
}
|
||||
verbose_stream() << "nyi append_char " << mk_bounded_pp(r, m) << "\n";
|
||||
NOT_IMPLEMENTED_YET();
|
||||
return false;
|
||||
}
|
||||
s += zstring(m_chars[ctx.rand(m_chars.size())]);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -147,9 +147,6 @@ namespace sls {
|
|||
void next_char(expr* r, unsigned_vector& chars);
|
||||
|
||||
bool is_in_re(zstring const& s, expr* r);
|
||||
bool some_string_in_re(expr* r, zstring& s);
|
||||
bool get_bounds(expr* e, unsigned& low, unsigned& high);
|
||||
bool append_char(expr* r0, expr_ref& r, zstring& s);
|
||||
|
||||
// access evaluation
|
||||
bool is_seq_predicate(expr* e);
|
||||
|
|
Loading…
Reference in a new issue