From 084cd335eb675057621532bd28bcc1c32cd292ad Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sun, 31 May 2020 12:25:21 -0700 Subject: [PATCH] add (disabled) stubs for decomposing re-membership on regex Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_rewriter.cpp | 74 ++++++++++++++++++++++++++-- src/ast/rewriter/seq_rewriter.h | 2 + src/ast/seq_decl_plugin.cpp | 82 +++++++++++++++++++++++++------ src/ast/seq_decl_plugin.h | 4 ++ 4 files changed, 143 insertions(+), 19 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 59f98d93a..955d6f34b 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2109,6 +2109,34 @@ bool seq_rewriter::get_head_tail_reversed(expr* s, expr_ref& head, expr_ref& tai return false; } +bool seq_rewriter::get_re_head_tail(expr* r, expr_ref& head, expr_ref& tail) { + expr* r1 = nullptr, *r2 = nullptr; + if (re().is_concat(r, r1, r2)) { + head = r1; + tail = r2; + return re().min_length(r1) != UINT_MAX && re().max_length(r1) == re().min_length(r1); + } + return false; +} + +bool seq_rewriter::get_re_head_tail_reversed(expr* r, expr_ref& head, expr_ref& tail) { + expr* r1 = nullptr, *r2 = nullptr; + if (re().is_concat(r, r1, r2)) { + unsigned len = re().min_length(r2); + if (len != UINT_MAX && re().max_length(r2) == len) { + head = r1; + tail = r2; + return true; + } + if (get_re_head_tail_reversed(r2, head, tail)) { + head = re().mk_concat(r1, head); + return true; + } + } + return false; +} + + expr_ref seq_rewriter::re_and(expr* cond, expr* r) { if (m().is_true(cond)) return expr_ref(r, m()); @@ -2586,6 +2614,21 @@ bool seq_rewriter::rewrite_contains_pattern(expr* a, expr* b, expr_ref& result) "" in b -> is_nullable(b) (ele + tail) in b -> tail in (derivative e b) (head + ele) in b -> head in (right-derivative e b) + +Other rewrites: + s in b1 ++ b2, min_len(b1) = max_len(b2) != UINT_MAX -> + (seq.len s) >= min_len(b1) & + (seq.extract s 0 min_len(b1)) in b1 & + (seq.extract s min_len(b1) (- (seq.len s) min_len(b1))) in b2 + + similar for tail of regex + +Disabled rewrite: + s + "ab" + t in all ++ "c" ++ all ++ .... ++ "z" ++ all + => + disjunctions that cover cases where s overlaps given that "ab" does + not overlap with any of the sequences. + It is disabled because the solver doesn't handle disjunctions of regexes well. */ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { @@ -2615,14 +2658,35 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { result = re().mk_in_re(tl, re().mk_derivative(hd, b)); return BR_REWRITE2; } - else if (get_head_tail_reversed(a, hd, tl)) { - result = re().mk_in_re( - hd, - re().mk_reverse(re().mk_derivative(tl, re().mk_reverse(b))) - ); + + if (get_head_tail_reversed(a, hd, tl)) { + result = re().mk_reverse(re().mk_derivative(tl, re().mk_reverse(b))); + result = re().mk_in_re(hd, result); return BR_REWRITE_FULL; } +#if 0 + if (get_re_head_tail(b, hd, tl)) { + SASSERT(re().min_length(hd) == re().max_length(hd)); + expr_ref len_hd(m_autil.mk_int(re().min_length(hd)), m()); + expr_ref len_a(str().mk_length(a), m()); + expr_ref len_tl(m_autil.mk_sub(len_a, len_hd), m()); + result = m().mk_and(m_autil.mk_ge(len_a, len_hd), + re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd), + re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl)); + return BR_REWRITE_FULL; + } + if (get_re_head_tail_reversed(b, hd, tl)) { + SASSERT(re().min_length(tl) == re().max_length(tl)); + expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m()); + expr_ref len_a(str().mk_length(a), m()); + expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m()); + result = m().mk_and(m_autil.mk_ge(len_a, len_tl), + re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd), + re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl)); + return BR_REWRITE_FULL; + } +#endif if (false && rewrite_contains_pattern(a, b, result)) return BR_REWRITE_FULL; diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index afa6f5a40..5519ee0fc 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -136,6 +136,8 @@ class seq_rewriter { // Support for regular expression derivatives bool get_head_tail(expr* e, expr_ref& head, expr_ref& tail); bool get_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail); + bool get_re_head_tail(expr* e, expr_ref& head, expr_ref& tail); + bool get_re_head_tail_reversed(expr* e, expr_ref& head, expr_ref& tail); expr_ref re_and(expr* cond, expr* r); expr_ref re_predicate(expr* cond, sort* seq_sort); expr_ref mk_seq_concat(expr* a, expr* b); diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 9bcb856f6..2ba1ef4d5 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1155,6 +1155,13 @@ bv_util& seq_util::bv() const { return *m_bv.get(); } +unsigned seq_util::max_plus(unsigned x, unsigned y) const { + if (x + y < x || x + y < y) + return UINT_MAX; + return x + y; +} + + bool seq_util::is_const_char(expr* e, unsigned& c) const { #if Z3_USE_UNICODE return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true); @@ -1243,15 +1250,49 @@ app* seq_util::str::mk_is_empty(expr* s) const { return m.mk_eq(s, mk_empty(get_sort(s))); } + + unsigned seq_util::str::min_length(expr* s) const { SASSERT(u.is_seq(s)); - expr_ref_vector es(m); unsigned result = 0; - get_concat_units(s, es); - for (expr* arg : es) { - if (is_unit(arg)) - result++; + expr* s1 = nullptr, *s2 = nullptr; + auto get_length = [&](expr* s1) { + zstring st; + if (is_unit(s1)) + return 1u; + else if (is_string(s1, st)) + return st.length(); + else + return 0u; + }; + while (is_concat(s, s1, s2)) { + result += get_length(s1); + s = s2; } + result += get_length(s); + return result; +} + +unsigned seq_util::str::max_length(expr* s) const { + SASSERT(u.is_seq(s)); + unsigned result = 0; + expr* s1 = nullptr, *s2 = nullptr; + zstring st; + auto get_length = [&](expr* s1) { + if (is_empty(s1)) + return 0u; + else if (is_unit(s1)) + return 1u; + else if (is_string(s1, st)) + return st.length(); + else + return UINT_MAX; + }; + while (is_concat(s, s1, s2)) { + result = u.max_plus(get_length(s), result); + s = s2; + } + result = u.max_plus(get_length(s), result); return result; } @@ -1260,15 +1301,8 @@ unsigned seq_util::re::min_length(expr* r) const { expr* r1 = nullptr, *r2 = nullptr, *s = nullptr; if (is_empty(r)) return UINT_MAX; - if (is_concat(r, r1, r2)) { - unsigned l1 = min_length(r1); - if (l1 == UINT_MAX) - return l1; - unsigned l2 = min_length(r2); - if (l2 == UINT_MAX) - return l2; - return l1 + l2; - } + if (is_concat(r, r1, r2)) + return u.max_plus(min_length(r1), min_length(r2)); if (m.is_ite(r, s, r1, r2)) return std::min(min_length(r1), min_length(r2)); if (is_diff(r, r1, r2)) @@ -1282,6 +1316,26 @@ unsigned seq_util::re::min_length(expr* r) const { return 0; } +unsigned seq_util::re::max_length(expr* r) const { + SASSERT(u.is_re(r)); + expr* r1 = nullptr, *r2 = nullptr, *s = nullptr; + if (is_empty(r)) + return 0; + if (is_concat(r, r1, r2)) + return u.max_plus(max_length(r1), max_length(r2)); + if (m.is_ite(r, s, r1, r2)) + return std::max(max_length(r1), max_length(r2)); + if (is_diff(r, r1, r2)) + return max_length(r1); + if (is_union(r, r1, r2)) + return std::max(max_length(r1), max_length(r2)); + if (is_intersection(r, r1, r2)) + return std::min(max_length(r1), max_length(r2)); + if (is_to_re(r, s)) + return u.str.max_length(s); + return UINT_MAX; +} + sort* seq_util::re::to_seq(sort* re) { (void)u; SASSERT(u.is_re(re)); diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index d131bfeb7..544628597 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -236,6 +236,8 @@ class seq_util { family_id m_fid; mutable scoped_ptr m_bv; bv_util& bv() const; + + unsigned max_plus(unsigned x, unsigned y) const; public: ast_manager& get_manager() const { return m; } @@ -400,6 +402,7 @@ public: expr* get_rightmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e2; return e; } unsigned min_length(expr* s) const; + unsigned max_length(expr* s) const; }; class re { @@ -469,6 +472,7 @@ public: bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi); bool is_loop(expr const* n, expr*& body, expr*& lo); unsigned min_length(expr* r) const; + unsigned max_length(expr* r) const; }; str str; re re;