3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-06 17:44:08 +00:00

computing and memoizing info for regexes (#4647)

* computing and memoizing info for regex expressions

* computing and memoizing info for regex expressions

* took care of comments of the related pull request

* removed +1 from min_length of ite

* added to_str method for re and fixed STRACE bug in get_info_rec
This commit is contained in:
Margus Veanes 2020-08-18 20:01:59 -07:00 committed by GitHub
parent 747a8ff72a
commit c50e869e5a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 133 additions and 23 deletions

View file

@ -1314,26 +1314,7 @@ unsigned seq_util::str::max_length(expr* s) const {
unsigned seq_util::re::min_length(expr* r) const {
SASSERT(u.is_re(r));
expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
unsigned lo = 0, hi = 0;
if (is_empty(r))
return UINT_MAX;
if (is_concat(r, r1, r2))
return u.max_plus(min_length(r1), min_length(r2));
if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
return std::min(min_length(r1), min_length(r2));
if (is_intersection(r, r1, r2))
return std::max(min_length(r1), min_length(r2));
if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_plus(r, r1))
return min_length(r1);
if (is_loop(r, r1, lo) || is_loop(r, r1, lo, hi))
return u.max_mul(lo, min_length(r1));
if (is_to_re(r, s))
return u.str.min_length(s);
if (is_range(r) || is_of_pred(r) || is_full_char(r))
return 1;
// Else: star, option, complement, full_seq, derivative
return 0;
return get_info(r).min_length;
}
unsigned seq_util::re::max_length(expr* r) const {
@ -1595,3 +1576,115 @@ std::ostream& seq_util::re::pp::display(std::ostream& out) const {
// Else: derivative or is_of_pred
return out << mk_pp(e, re.m);
}
/*
Pretty prints the regex r into the output string
*/
std::string seq_util::re::to_str(expr* r) const {
std::ostringstream out;
out << pp(u.re, r);
return out.str();
}
/*
Returns true iff info has been computed for the regex r
*/
bool seq_util::re::has_valid_info(expr* r) const {
return r->get_id() < m_infos.size() && m_infos[r->get_id()].is_valid();
}
/*
Returns the info in the cache if the info is valid. Returns invalid_info otherwise.
*/
seq_util::re::info seq_util::re::get_cached_info(expr* e) const {
if (has_valid_info(e))
return m_infos[e->get_id()];
else
return invalid_info;
}
/*
Get the information value associated with the regular expression e
*/
seq_util::re::info seq_util::re::get_info(expr* e) const
{
SASSERT(u.is_re(e));
auto result = get_cached_info(e);
if (result.is_valid())
return result;
m_info_pinned.push_back(e);
return get_info_rec(e);
}
/*
Gets the info value for the given regex e, possibly making a new info recursively over the structure of e.
*/
seq_util::re::info seq_util::re::get_info_rec(expr* e) const {
auto result = get_cached_info(e);
if (result.is_valid())
return result;
if (!is_app(e))
return invalid_info;
result = mk_info_rec(to_app(e));
m_infos.setx(e->get_id(), result, invalid_info);
STRACE("re_info", tout << "compute_info(" << pp(u.re, e) << "): min_length=" << m_infos[e->get_id()].min_length << std::endl;);
return result;
}
/*
Computes the info value for the given regex e recursively over the structure of e.
The regex e does not yet have an entry in the cache.
*/
seq_util::re::info seq_util::re::mk_info_rec(app* e) const {
info i1, i2;
if (e->get_family_id() == u.get_family_id()) {
switch (e->get_decl()->get_decl_kind())
{
case OP_RE_EMPTY_SET:
return info(UINT_MAX);
case OP_RE_FULL_SEQ_SET:
case OP_RE_STAR:
case OP_RE_OPTION:
return info(0);
case OP_RE_RANGE:
case OP_RE_FULL_CHAR_SET:
case OP_RE_OF_PRED:
return info(1);
case OP_RE_CONCAT:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return info(u.max_plus(i1.min_length, i2.min_length));
case OP_RE_UNION:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return info(std::min(i1.min_length, i2.min_length));
case OP_RE_INTERSECT:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return info(std::max(i1.min_length, i2.min_length));
case OP_SEQ_TO_RE:
return info(u.str.min_length(e->get_arg(0)));
case OP_RE_REVERSE:
case OP_RE_PLUS:
return get_info_rec(e->get_arg(0));
case OP_RE_COMPLEMENT:
i1 = get_info_rec(e->get_arg(0));
return info(i1.min_length > 0 ? 0 : UINT_MAX);
case OP_RE_LOOP:
i1 = get_info_rec(e->get_arg(0));
return info(u.max_mul(i1.min_length, e->get_decl()->get_parameter(0).get_int()));
case OP_RE_DIFF:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return info(std::max(i1.min_length, i2.min_length > 0 ? 0 : UINT_MAX));
}
return invalid_info;
}
expr* c, * t, * f;
if (u.m.is_ite(e, c, t, f)) {
i1 = get_info_rec(t);
i2 = get_info_rec(f);
return info(std::min(i1.min_length, i2.min_length));
}
return invalid_info;
}

View file

@ -409,12 +409,29 @@ public:
};
class re {
struct info {
bool valid;
unsigned min_length;
info() : valid(false), min_length(0) {}
info(unsigned k) : valid(true), min_length(k) {}
bool is_valid() { return valid; }
};
seq_util& u;
ast_manager& m;
family_id m_fid;
vector<info> mutable m_infos;
expr_ref_vector mutable m_info_pinned;
info invalid_info;
bool has_valid_info(expr* r) const;
info get_info_rec(expr* r) const;
info mk_info_rec(app* r) const;
info get_cached_info(expr* e) const;
public:
re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {}
re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid), m_info_pinned(u.m) {}
sort* mk_re(sort* seq) { parameter param(seq); return m.mk_sort(m_fid, RE_SORT, 1, &param); }
sort* to_seq(sort* re);
@ -482,6 +499,8 @@ public:
unsigned max_length(expr* r) const;
bool is_epsilon(expr* r) const;
app* mk_epsilon(sort* seq_sort);
info get_info(expr* r) const;
std::string to_str(expr* r) const;
class pp {
seq_util::re& re;
@ -495,7 +514,6 @@ public:
pp(seq_util::re& r, expr* e) : re(r), e(e) {}
std::ostream& display(std::ostream&) const;
};
};
str str;
re re;
@ -511,7 +529,6 @@ public:
~seq_util() {}
family_id get_family_id() const { return m_fid; }
};
inline std::ostream& operator<<(std::ostream& out, seq_util::re::pp const & p) { return p.display(out); }