mirror of
https://github.com/Z3Prover/z3
synced 2025-04-15 13:28:47 +00:00
add (disabled) stubs for decomposing re-membership on regex
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
parent
7f7663a3b4
commit
084cd335eb
|
@ -2109,6 +2109,34 @@ bool seq_rewriter::get_head_tail_reversed(expr* s, expr_ref& head, expr_ref& tai
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool seq_rewriter::get_re_head_tail(expr* r, expr_ref& head, expr_ref& tail) {
|
||||||
|
expr* r1 = nullptr, *r2 = nullptr;
|
||||||
|
if (re().is_concat(r, r1, r2)) {
|
||||||
|
head = r1;
|
||||||
|
tail = r2;
|
||||||
|
return re().min_length(r1) != UINT_MAX && re().max_length(r1) == re().min_length(r1);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool seq_rewriter::get_re_head_tail_reversed(expr* r, expr_ref& head, expr_ref& tail) {
|
||||||
|
expr* r1 = nullptr, *r2 = nullptr;
|
||||||
|
if (re().is_concat(r, r1, r2)) {
|
||||||
|
unsigned len = re().min_length(r2);
|
||||||
|
if (len != UINT_MAX && re().max_length(r2) == len) {
|
||||||
|
head = r1;
|
||||||
|
tail = r2;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (get_re_head_tail_reversed(r2, head, tail)) {
|
||||||
|
head = re().mk_concat(r1, head);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
expr_ref seq_rewriter::re_and(expr* cond, expr* r) {
|
expr_ref seq_rewriter::re_and(expr* cond, expr* r) {
|
||||||
if (m().is_true(cond))
|
if (m().is_true(cond))
|
||||||
return expr_ref(r, m());
|
return expr_ref(r, m());
|
||||||
|
@ -2586,6 +2614,21 @@ bool seq_rewriter::rewrite_contains_pattern(expr* a, expr* b, expr_ref& result)
|
||||||
"" in b -> is_nullable(b)
|
"" in b -> is_nullable(b)
|
||||||
(ele + tail) in b -> tail in (derivative e b)
|
(ele + tail) in b -> tail in (derivative e b)
|
||||||
(head + ele) in b -> head in (right-derivative e b)
|
(head + ele) in b -> head in (right-derivative e b)
|
||||||
|
|
||||||
|
Other rewrites:
|
||||||
|
s in b1 ++ b2, min_len(b1) = max_len(b2) != UINT_MAX ->
|
||||||
|
(seq.len s) >= min_len(b1) &
|
||||||
|
(seq.extract s 0 min_len(b1)) in b1 &
|
||||||
|
(seq.extract s min_len(b1) (- (seq.len s) min_len(b1))) in b2
|
||||||
|
|
||||||
|
similar for tail of regex
|
||||||
|
|
||||||
|
Disabled rewrite:
|
||||||
|
s + "ab" + t in all ++ "c" ++ all ++ .... ++ "z" ++ all
|
||||||
|
=>
|
||||||
|
disjunctions that cover cases where s overlaps given that "ab" does
|
||||||
|
not overlap with any of the sequences.
|
||||||
|
It is disabled because the solver doesn't handle disjunctions of regexes well.
|
||||||
*/
|
*/
|
||||||
br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
|
br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
|
||||||
|
|
||||||
|
@ -2615,14 +2658,35 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
|
||||||
result = re().mk_in_re(tl, re().mk_derivative(hd, b));
|
result = re().mk_in_re(tl, re().mk_derivative(hd, b));
|
||||||
return BR_REWRITE2;
|
return BR_REWRITE2;
|
||||||
}
|
}
|
||||||
else if (get_head_tail_reversed(a, hd, tl)) {
|
|
||||||
result = re().mk_in_re(
|
if (get_head_tail_reversed(a, hd, tl)) {
|
||||||
hd,
|
result = re().mk_reverse(re().mk_derivative(tl, re().mk_reverse(b)));
|
||||||
re().mk_reverse(re().mk_derivative(tl, re().mk_reverse(b)))
|
result = re().mk_in_re(hd, result);
|
||||||
);
|
|
||||||
return BR_REWRITE_FULL;
|
return BR_REWRITE_FULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if (get_re_head_tail(b, hd, tl)) {
|
||||||
|
SASSERT(re().min_length(hd) == re().max_length(hd));
|
||||||
|
expr_ref len_hd(m_autil.mk_int(re().min_length(hd)), m());
|
||||||
|
expr_ref len_a(str().mk_length(a), m());
|
||||||
|
expr_ref len_tl(m_autil.mk_sub(len_a, len_hd), m());
|
||||||
|
result = m().mk_and(m_autil.mk_ge(len_a, len_hd),
|
||||||
|
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
|
||||||
|
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl));
|
||||||
|
return BR_REWRITE_FULL;
|
||||||
|
}
|
||||||
|
if (get_re_head_tail_reversed(b, hd, tl)) {
|
||||||
|
SASSERT(re().min_length(tl) == re().max_length(tl));
|
||||||
|
expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m());
|
||||||
|
expr_ref len_a(str().mk_length(a), m());
|
||||||
|
expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m());
|
||||||
|
result = m().mk_and(m_autil.mk_ge(len_a, len_tl),
|
||||||
|
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
|
||||||
|
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl));
|
||||||
|
return BR_REWRITE_FULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (false && rewrite_contains_pattern(a, b, result))
|
if (false && rewrite_contains_pattern(a, b, result))
|
||||||
return BR_REWRITE_FULL;
|
return BR_REWRITE_FULL;
|
||||||
|
|
||||||
|
|
|
@ -136,6 +136,8 @@ class seq_rewriter {
|
||||||
// Support for regular expression derivatives
|
// Support for regular expression derivatives
|
||||||
bool get_head_tail(expr* e, expr_ref& head, expr_ref& tail);
|
bool get_head_tail(expr* e, expr_ref& head, expr_ref& tail);
|
||||||
bool get_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail);
|
bool get_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail);
|
||||||
|
bool get_re_head_tail(expr* e, expr_ref& head, expr_ref& tail);
|
||||||
|
bool get_re_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail);
|
||||||
expr_ref re_and(expr* cond, expr* r);
|
expr_ref re_and(expr* cond, expr* r);
|
||||||
expr_ref re_predicate(expr* cond, sort* seq_sort);
|
expr_ref re_predicate(expr* cond, sort* seq_sort);
|
||||||
expr_ref mk_seq_concat(expr* a, expr* b);
|
expr_ref mk_seq_concat(expr* a, expr* b);
|
||||||
|
|
|
@ -1155,6 +1155,13 @@ bv_util& seq_util::bv() const {
|
||||||
return *m_bv.get();
|
return *m_bv.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned seq_util::max_plus(unsigned x, unsigned y) const {
|
||||||
|
if (x + y < x || x + y < y)
|
||||||
|
return UINT_MAX;
|
||||||
|
return x + y;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool seq_util::is_const_char(expr* e, unsigned& c) const {
|
bool seq_util::is_const_char(expr* e, unsigned& c) const {
|
||||||
#if Z3_USE_UNICODE
|
#if Z3_USE_UNICODE
|
||||||
return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true);
|
return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true);
|
||||||
|
@ -1243,15 +1250,49 @@ app* seq_util::str::mk_is_empty(expr* s) const {
|
||||||
return m.mk_eq(s, mk_empty(get_sort(s)));
|
return m.mk_eq(s, mk_empty(get_sort(s)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
unsigned seq_util::str::min_length(expr* s) const {
|
unsigned seq_util::str::min_length(expr* s) const {
|
||||||
SASSERT(u.is_seq(s));
|
SASSERT(u.is_seq(s));
|
||||||
expr_ref_vector es(m);
|
|
||||||
unsigned result = 0;
|
unsigned result = 0;
|
||||||
get_concat_units(s, es);
|
expr* s1 = nullptr, *s2 = nullptr;
|
||||||
for (expr* arg : es) {
|
auto get_length = [&](expr* s1) {
|
||||||
if (is_unit(arg))
|
zstring st;
|
||||||
result++;
|
if (is_unit(s1))
|
||||||
|
return 1u;
|
||||||
|
else if (is_string(s1, st))
|
||||||
|
return st.length();
|
||||||
|
else
|
||||||
|
return 0u;
|
||||||
|
};
|
||||||
|
while (is_concat(s, s1, s2)) {
|
||||||
|
result += get_length(s1);
|
||||||
|
s = s2;
|
||||||
}
|
}
|
||||||
|
result += get_length(s);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned seq_util::str::max_length(expr* s) const {
|
||||||
|
SASSERT(u.is_seq(s));
|
||||||
|
unsigned result = 0;
|
||||||
|
expr* s1 = nullptr, *s2 = nullptr;
|
||||||
|
zstring st;
|
||||||
|
auto get_length = [&](expr* s1) {
|
||||||
|
if (is_empty(s1))
|
||||||
|
return 0u;
|
||||||
|
else if (is_unit(s1))
|
||||||
|
return 1u;
|
||||||
|
else if (is_string(s1, st))
|
||||||
|
return st.length();
|
||||||
|
else
|
||||||
|
return UINT_MAX;
|
||||||
|
};
|
||||||
|
while (is_concat(s, s1, s2)) {
|
||||||
|
result = u.max_plus(get_length(s), result);
|
||||||
|
s = s2;
|
||||||
|
}
|
||||||
|
result = u.max_plus(get_length(s), result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1260,15 +1301,8 @@ unsigned seq_util::re::min_length(expr* r) const {
|
||||||
expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
|
expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
|
||||||
if (is_empty(r))
|
if (is_empty(r))
|
||||||
return UINT_MAX;
|
return UINT_MAX;
|
||||||
if (is_concat(r, r1, r2)) {
|
if (is_concat(r, r1, r2))
|
||||||
unsigned l1 = min_length(r1);
|
return u.max_plus(min_length(r1), min_length(r2));
|
||||||
if (l1 == UINT_MAX)
|
|
||||||
return l1;
|
|
||||||
unsigned l2 = min_length(r2);
|
|
||||||
if (l2 == UINT_MAX)
|
|
||||||
return l2;
|
|
||||||
return l1 + l2;
|
|
||||||
}
|
|
||||||
if (m.is_ite(r, s, r1, r2))
|
if (m.is_ite(r, s, r1, r2))
|
||||||
return std::min(min_length(r1), min_length(r2));
|
return std::min(min_length(r1), min_length(r2));
|
||||||
if (is_diff(r, r1, r2))
|
if (is_diff(r, r1, r2))
|
||||||
|
@ -1282,6 +1316,26 @@ unsigned seq_util::re::min_length(expr* r) const {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned seq_util::re::max_length(expr* r) const {
|
||||||
|
SASSERT(u.is_re(r));
|
||||||
|
expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
|
||||||
|
if (is_empty(r))
|
||||||
|
return 0;
|
||||||
|
if (is_concat(r, r1, r2))
|
||||||
|
return u.max_plus(max_length(r1), max_length(r2));
|
||||||
|
if (m.is_ite(r, s, r1, r2))
|
||||||
|
return std::max(max_length(r1), max_length(r2));
|
||||||
|
if (is_diff(r, r1, r2))
|
||||||
|
return max_length(r1);
|
||||||
|
if (is_union(r, r1, r2))
|
||||||
|
return std::max(max_length(r1), max_length(r2));
|
||||||
|
if (is_intersection(r, r1, r2))
|
||||||
|
return std::min(max_length(r1), max_length(r2));
|
||||||
|
if (is_to_re(r, s))
|
||||||
|
return u.str.max_length(s);
|
||||||
|
return UINT_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
sort* seq_util::re::to_seq(sort* re) {
|
sort* seq_util::re::to_seq(sort* re) {
|
||||||
(void)u;
|
(void)u;
|
||||||
SASSERT(u.is_re(re));
|
SASSERT(u.is_re(re));
|
||||||
|
|
|
@ -236,6 +236,8 @@ class seq_util {
|
||||||
family_id m_fid;
|
family_id m_fid;
|
||||||
mutable scoped_ptr<bv_util> m_bv;
|
mutable scoped_ptr<bv_util> m_bv;
|
||||||
bv_util& bv() const;
|
bv_util& bv() const;
|
||||||
|
|
||||||
|
unsigned max_plus(unsigned x, unsigned y) const;
|
||||||
public:
|
public:
|
||||||
|
|
||||||
ast_manager& get_manager() const { return m; }
|
ast_manager& get_manager() const { return m; }
|
||||||
|
@ -400,6 +402,7 @@ public:
|
||||||
expr* get_rightmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e2; return e; }
|
expr* get_rightmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e2; return e; }
|
||||||
|
|
||||||
unsigned min_length(expr* s) const;
|
unsigned min_length(expr* s) const;
|
||||||
|
unsigned max_length(expr* s) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class re {
|
class re {
|
||||||
|
@ -469,6 +472,7 @@ public:
|
||||||
bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi);
|
bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi);
|
||||||
bool is_loop(expr const* n, expr*& body, expr*& lo);
|
bool is_loop(expr const* n, expr*& body, expr*& lo);
|
||||||
unsigned min_length(expr* r) const;
|
unsigned min_length(expr* r) const;
|
||||||
|
unsigned max_length(expr* r) const;
|
||||||
};
|
};
|
||||||
str str;
|
str str;
|
||||||
re re;
|
re re;
|
||||||
|
|
Loading…
Reference in a new issue