3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-13 20:38:43 +00:00

add (disabled) stubs for decomposing re-membership on regex

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2020-05-31 12:25:21 -07:00
parent 7f7663a3b4
commit 084cd335eb
4 changed files with 143 additions and 19 deletions

View file

@ -2109,6 +2109,34 @@ bool seq_rewriter::get_head_tail_reversed(expr* s, expr_ref& head, expr_ref& tai
return false;
}
bool seq_rewriter::get_re_head_tail(expr* r, expr_ref& head, expr_ref& tail) {
expr* r1 = nullptr, *r2 = nullptr;
if (re().is_concat(r, r1, r2)) {
head = r1;
tail = r2;
return re().min_length(r1) != UINT_MAX && re().max_length(r1) == re().min_length(r1);
}
return false;
}
bool seq_rewriter::get_re_head_tail_reversed(expr* r, expr_ref& head, expr_ref& tail) {
expr* r1 = nullptr, *r2 = nullptr;
if (re().is_concat(r, r1, r2)) {
unsigned len = re().min_length(r2);
if (len != UINT_MAX && re().max_length(r2) == len) {
head = r1;
tail = r2;
return true;
}
if (get_re_head_tail_reversed(r2, head, tail)) {
head = re().mk_concat(r1, head);
return true;
}
}
return false;
}
expr_ref seq_rewriter::re_and(expr* cond, expr* r) {
if (m().is_true(cond))
return expr_ref(r, m());
@ -2586,6 +2614,21 @@ bool seq_rewriter::rewrite_contains_pattern(expr* a, expr* b, expr_ref& result)
"" in b -> is_nullable(b)
(ele + tail) in b -> tail in (derivative e b)
(head + ele) in b -> head in (right-derivative e b)
Other rewrites:
s in b1 ++ b2, min_len(b1) = max_len(b2) != UINT_MAX ->
(seq.len s) >= min_len(b1) &
(seq.extract s 0 min_len(b1)) in b1 &
(seq.extract s min_len(b1) (- (seq.len s) min_len(b1))) in b2
similar for tail of regex
Disabled rewrite:
s + "ab" + t in all ++ "c" ++ all ++ .... ++ "z" ++ all
=>
disjunctions that cover cases where s overlaps given that "ab" does
not overlap with any of the sequences.
It is disabled because the solver doesn't handle disjunctions of regexes well.
*/
br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
@ -2615,14 +2658,35 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
result = re().mk_in_re(tl, re().mk_derivative(hd, b));
return BR_REWRITE2;
}
else if (get_head_tail_reversed(a, hd, tl)) {
result = re().mk_in_re(
hd,
re().mk_reverse(re().mk_derivative(tl, re().mk_reverse(b)))
);
if (get_head_tail_reversed(a, hd, tl)) {
result = re().mk_reverse(re().mk_derivative(tl, re().mk_reverse(b)));
result = re().mk_in_re(hd, result);
return BR_REWRITE_FULL;
}
#if 0
if (get_re_head_tail(b, hd, tl)) {
SASSERT(re().min_length(hd) == re().max_length(hd));
expr_ref len_hd(m_autil.mk_int(re().min_length(hd)), m());
expr_ref len_a(str().mk_length(a), m());
expr_ref len_tl(m_autil.mk_sub(len_a, len_hd), m());
result = m().mk_and(m_autil.mk_ge(len_a, len_hd),
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl));
return BR_REWRITE_FULL;
}
if (get_re_head_tail_reversed(b, hd, tl)) {
SASSERT(re().min_length(tl) == re().max_length(tl));
expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m());
expr_ref len_a(str().mk_length(a), m());
expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m());
result = m().mk_and(m_autil.mk_ge(len_a, len_tl),
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl));
return BR_REWRITE_FULL;
}
#endif
if (false && rewrite_contains_pattern(a, b, result))
return BR_REWRITE_FULL;

View file

@ -136,6 +136,8 @@ class seq_rewriter {
// Support for regular expression derivatives
bool get_head_tail(expr* e, expr_ref& head, expr_ref& tail);
bool get_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail);
bool get_re_head_tail(expr* e, expr_ref& head, expr_ref& tail);
bool get_re_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail);
expr_ref re_and(expr* cond, expr* r);
expr_ref re_predicate(expr* cond, sort* seq_sort);
expr_ref mk_seq_concat(expr* a, expr* b);

View file

@ -1155,6 +1155,13 @@ bv_util& seq_util::bv() const {
return *m_bv.get();
}
unsigned seq_util::max_plus(unsigned x, unsigned y) const {
if (x + y < x || x + y < y)
return UINT_MAX;
return x + y;
}
bool seq_util::is_const_char(expr* e, unsigned& c) const {
#if Z3_USE_UNICODE
return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true);
@ -1243,15 +1250,49 @@ app* seq_util::str::mk_is_empty(expr* s) const {
return m.mk_eq(s, mk_empty(get_sort(s)));
}
unsigned seq_util::str::min_length(expr* s) const {
SASSERT(u.is_seq(s));
expr_ref_vector es(m);
unsigned result = 0;
get_concat_units(s, es);
for (expr* arg : es) {
if (is_unit(arg))
result++;
expr* s1 = nullptr, *s2 = nullptr;
auto get_length = [&](expr* s1) {
zstring st;
if (is_unit(s1))
return 1u;
else if (is_string(s1, st))
return st.length();
else
return 0u;
};
while (is_concat(s, s1, s2)) {
result += get_length(s1);
s = s2;
}
result += get_length(s);
return result;
}
unsigned seq_util::str::max_length(expr* s) const {
SASSERT(u.is_seq(s));
unsigned result = 0;
expr* s1 = nullptr, *s2 = nullptr;
zstring st;
auto get_length = [&](expr* s1) {
if (is_empty(s1))
return 0u;
else if (is_unit(s1))
return 1u;
else if (is_string(s1, st))
return st.length();
else
return UINT_MAX;
};
while (is_concat(s, s1, s2)) {
result = u.max_plus(get_length(s), result);
s = s2;
}
result = u.max_plus(get_length(s), result);
return result;
}
@ -1260,15 +1301,8 @@ unsigned seq_util::re::min_length(expr* r) const {
expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
if (is_empty(r))
return UINT_MAX;
if (is_concat(r, r1, r2)) {
unsigned l1 = min_length(r1);
if (l1 == UINT_MAX)
return l1;
unsigned l2 = min_length(r2);
if (l2 == UINT_MAX)
return l2;
return l1 + l2;
}
if (is_concat(r, r1, r2))
return u.max_plus(min_length(r1), min_length(r2));
if (m.is_ite(r, s, r1, r2))
return std::min(min_length(r1), min_length(r2));
if (is_diff(r, r1, r2))
@ -1282,6 +1316,26 @@ unsigned seq_util::re::min_length(expr* r) const {
return 0;
}
unsigned seq_util::re::max_length(expr* r) const {
SASSERT(u.is_re(r));
expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
if (is_empty(r))
return 0;
if (is_concat(r, r1, r2))
return u.max_plus(max_length(r1), max_length(r2));
if (m.is_ite(r, s, r1, r2))
return std::max(max_length(r1), max_length(r2));
if (is_diff(r, r1, r2))
return max_length(r1);
if (is_union(r, r1, r2))
return std::max(max_length(r1), max_length(r2));
if (is_intersection(r, r1, r2))
return std::min(max_length(r1), max_length(r2));
if (is_to_re(r, s))
return u.str.max_length(s);
return UINT_MAX;
}
sort* seq_util::re::to_seq(sort* re) {
(void)u;
SASSERT(u.is_re(re));

View file

@ -236,6 +236,8 @@ class seq_util {
family_id m_fid;
mutable scoped_ptr<bv_util> m_bv;
bv_util& bv() const;
unsigned max_plus(unsigned x, unsigned y) const;
public:
ast_manager& get_manager() const { return m; }
@ -400,6 +402,7 @@ public:
expr* get_rightmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e2; return e; }
unsigned min_length(expr* s) const;
unsigned max_length(expr* s) const;
};
class re {
@ -469,6 +472,7 @@ public:
bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi);
bool is_loop(expr const* n, expr*& body, expr*& lo);
unsigned min_length(expr* r) const;
unsigned max_length(expr* r) const;
};
str str;
re re;