From eb3f20832e91c8b3371ac57a87e65a8b6d89d75c Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sat, 23 May 2020 11:52:53 -0700 Subject: [PATCH] initial pass at using derivatives in regex unfolding Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_rewriter.cpp | 535 ++++++++++++++++----------- src/ast/rewriter/seq_rewriter.h | 16 +- src/ast/seq_decl_plugin.h | 2 +- src/smt/CMakeLists.txt | 3 +- src/smt/params/smt_params_helper.pyg | 1 + src/smt/params/theory_seq_params.cpp | 1 + src/smt/params/theory_seq_params.h | 4 +- src/smt/seq_regex.cpp | 266 +++++++++++++ src/smt/seq_regex.h | 77 ++++ src/smt/theory_seq.cpp | 13 +- src/smt/theory_seq.h | 4 + 11 files changed, 694 insertions(+), 228 deletions(-) create mode 100644 src/smt/seq_regex.cpp create mode 100644 src/smt/seq_regex.h diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index a13cd5ab0..e40afe143 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -410,78 +410,78 @@ void seq_rewriter::get_param_descrs(param_descrs & r) { } br_status seq_rewriter::mk_bool_app(func_decl* f, unsigned n, expr* const* args, expr_ref& result) { - switch (f->get_decl_kind()) { - case OP_AND: - return mk_bool_app_helper(true, n, args, result); - case OP_OR: - return mk_bool_app_helper(false, n, args, result); - default: - return BR_FAILED; - } + switch (f->get_decl_kind()) { + case OP_AND: + return mk_bool_app_helper(true, n, args, result); + case OP_OR: + return mk_bool_app_helper(false, n, args, result); + default: + return BR_FAILED; + } } br_status seq_rewriter::mk_bool_app_helper(bool is_and, unsigned n, expr* const* args, expr_ref& result) { - bool found = false; - expr* arg = nullptr; - - for (unsigned i = 0; i < n && !found; ++i) { - found = m_util.str.is_in_re(args[i]) || (m().is_not(args[i], arg) && m_util.str.is_in_re(arg)); - } - if (!found) return BR_FAILED; - - obj_map in_re, not_in_re; - bool found_pair = false; - - for (unsigned i = 0; i < n; ++i) { - expr* args_i = args[i]; - expr* x = nullptr; - expr* y = nullptr; - expr* z = nullptr; - if (m_util.str.is_in_re(args_i, x, y)) { - if (in_re.find(x, z)) { - in_re[x] = is_and ? m_util.re.mk_inter(z, y) : m_util.re.mk_union(z, y); - found_pair = true; - } - else { - in_re.insert(x, y); - } - found_pair |= not_in_re.contains(x); - } - else if (m().is_not(args_i, arg) && m_util.str.is_in_re(arg, x, y)) { + bool found = false; + expr* arg = nullptr; + + for (unsigned i = 0; i < n && !found; ++i) { + found = m_util.str.is_in_re(args[i]) || (m().is_not(args[i], arg) && m_util.str.is_in_re(arg)); + } + if (!found) return BR_FAILED; + + obj_map in_re, not_in_re; + bool found_pair = false; + + for (unsigned i = 0; i < n; ++i) { + expr* args_i = args[i]; + expr* x = nullptr; + expr* y = nullptr; + expr* z = nullptr; + if (m_util.str.is_in_re(args_i, x, y)) { + if (in_re.find(x, z)) { + in_re[x] = is_and ? re().mk_inter(z, y) : re().mk_union(z, y); + found_pair = true; + } + else { + in_re.insert(x, y); + } + found_pair |= not_in_re.contains(x); + } + else if (m().is_not(args_i, arg) && m_util.str.is_in_re(arg, x, y)) { if (not_in_re.find(x, z)) { - not_in_re[x] = is_and ? m_util.re.mk_union(z, y) : m_util.re.mk_inter(z, y); + not_in_re[x] = is_and ? re().mk_union(z, y) : re().mk_inter(z, y); found_pair = true; } else { not_in_re.insert(x, y); } - found_pair |= in_re.contains(x); - } - } - - if (!found_pair) { - return BR_FAILED; - } - - ptr_buffer new_args; - for (auto const & kv : in_re) { - expr* x = kv.m_key; - expr* y = kv.m_value; - expr* z = nullptr; - if (not_in_re.find(x, z)) { - expr* z_c = m_util.re.mk_complement(z); - expr* w = is_and ? m_util.re.mk_inter(y, z_c) : m_util.re.mk_union(y, z_c); - new_args.push_back(m_util.re.mk_in_re(x, w)); - } - else { - new_args.push_back(m_util.re.mk_in_re(x, y)); - } - } + found_pair |= in_re.contains(x); + } + } + + if (!found_pair) { + return BR_FAILED; + } + + ptr_buffer new_args; + for (auto const & kv : in_re) { + expr* x = kv.m_key; + expr* y = kv.m_value; + expr* z = nullptr; + if (not_in_re.find(x, z)) { + expr* z_c = re().mk_complement(z); + expr* w = is_and ? re().mk_inter(y, z_c) : re().mk_union(y, z_c); + new_args.push_back(re().mk_in_re(x, w)); + } + else { + new_args.push_back(re().mk_in_re(x, y)); + } + } for (auto const& kv : not_in_re) { expr* x = kv.m_key; expr* y = kv.m_value; if (!in_re.contains(x)) { - new_args.push_back(m_util.re.mk_in_re(x, m_util.re.mk_complement(y))); + new_args.push_back(re().mk_in_re(x, re().mk_complement(y))); } } for (unsigned i = 0; i < n; ++i) { @@ -490,9 +490,9 @@ br_status seq_rewriter::mk_bool_app_helper(bool is_and, unsigned n, expr* const* new_args.push_back(arg); } } - + result = is_and ? m().mk_and(new_args.size(), new_args.c_ptr()) : m().mk_or(new_args.size(), new_args.c_ptr()); - return BR_REWRITE_FULL; + return BR_REWRITE_FULL; } br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) { @@ -2066,14 +2066,14 @@ bool seq_rewriter::get_head_tail(expr* s, expr_ref& head, expr_ref& tail) { expr_ref seq_rewriter::kleene_and(expr* cond, expr* r) { if (m().is_true(cond)) return expr_ref(r, m()); - expr* re_empty = m_util.re.mk_empty(m().get_sort(r)); + expr* re_empty = re().mk_empty(m().get_sort(r)); if (m().is_false(cond)) return expr_ref(re_empty, m()); return expr_ref(m().mk_ite(cond, r, re_empty), m()); } expr_ref seq_rewriter::kleene_predicate(expr* cond, sort* seq_sort) { - expr_ref re_with_empty(m_util.re.mk_to_re(m_util.str.mk_empty(seq_sort)), m()); + expr_ref re_with_empty(re().mk_to_re(m_util.str.mk_empty(seq_sort)), m()); return kleene_and(cond, re_with_empty); } @@ -2082,39 +2082,39 @@ expr_ref seq_rewriter::is_nullable(expr* r) { expr* r1 = nullptr, *r2 = nullptr; unsigned lo = 0, hi = 0; expr_ref result(m()); - if (m_util.re.is_concat(r, r1, r2) || - m_util.re.is_intersection(r, r1, r2)) { + if (re().is_concat(r, r1, r2) || + re().is_intersection(r, r1, r2)) { result = mk_and(m(), is_nullable(r1), is_nullable(r2)); } - else if (m_util.re.is_union(r, r1, r2)) { + else if (re().is_union(r, r1, r2)) { result = mk_or(m(), is_nullable(r1), is_nullable(r2)); } - else if (m_util.re.is_diff(r, r1, r2)) { + else if (re().is_diff(r, r1, r2)) { result = mk_not(m(), is_nullable(r2)); result = mk_and(m(), is_nullable(r1), result); } - else if (m_util.re.is_star(r) || - m_util.re.is_opt(r) || - m_util.re.is_full_seq(r) || - (m_util.re.is_loop(r, r1, lo) && lo == 0) || - (m_util.re.is_loop(r, r1, lo, hi) && lo == 0)) { + else if (re().is_star(r) || + re().is_opt(r) || + re().is_full_seq(r) || + (re().is_loop(r, r1, lo) && lo == 0) || + (re().is_loop(r, r1, lo, hi) && lo == 0)) { result = m().mk_true(); } - else if (m_util.re.is_full_char(r) || - m_util.re.is_empty(r) || - m_util.re.is_of_pred(r) || - m_util.re.is_range(r)) { + else if (re().is_full_char(r) || + re().is_empty(r) || + re().is_of_pred(r) || + re().is_range(r)) { result = m().mk_false(); } - else if (m_util.re.is_plus(r, r1) || - (m_util.re.is_loop(r, r1, lo) && lo > 0) || - (m_util.re.is_loop(r, r1, lo, hi) && lo > 0)) { + else if (re().is_plus(r, r1) || + (re().is_loop(r, r1, lo) && lo > 0) || + (re().is_loop(r, r1, lo, hi) && lo > 0)) { result = is_nullable(r1); } - else if (m_util.re.is_complement(r, r1)) { + else if (re().is_complement(r, r1)) { result = mk_not(m(), is_nullable(r1)); } - else if (m_util.re.is_to_re(r, r1)) { + else if (re().is_to_re(r, r1)) { sort* seq_sort = nullptr; VERIFY(m_util.is_re(r, seq_sort)); expr* emptystr = m_util.str.mk_empty(seq_sort); @@ -2123,7 +2123,7 @@ expr_ref seq_rewriter::is_nullable(expr* r) { else { sort* seq_sort = nullptr; VERIFY(m_util.is_re(r, seq_sort)); - result = m_util.re.mk_in_re(m_util.str.mk_empty(seq_sort), r); + result = re().mk_in_re(m_util.str.mk_empty(seq_sort), r); } return result; } @@ -2141,14 +2141,14 @@ expr_ref seq_rewriter::derivative(expr* elem, expr* r) { expr* r1 = nullptr, * r2 = nullptr, *p = nullptr; expr_ref dr1(m()), dr2(m()), result(m()); unsigned lo = 0, hi = 0; - if (m_util.re.is_concat(r, r1, r2)) { + if (re().is_concat(r, r1, r2)) { expr_ref is_n = is_nullable(r1); dr1 = derivative(elem, r1); if (!dr1) { result = dr1; // failed } else if (m().is_false(is_n)) { - result = m_util.re.mk_concat(dr1, r2); + result = re().mk_concat(dr1, r2); } else { dr2 = derivative(elem, r2); @@ -2156,67 +2156,67 @@ expr_ref seq_rewriter::derivative(expr* elem, expr* r) { result = dr2; // failed } else if (m().is_true(is_n)) { - result = m_util.re.mk_union( - m_util.re.mk_concat(dr1, r2), + result = re().mk_union( + re().mk_concat(dr1, r2), dr2 ); } else { - result = m_util.re.mk_union( - m_util.re.mk_concat(dr1, r2), + result = re().mk_union( + re().mk_concat(dr1, r2), kleene_and(is_n, dr2) ); } } } - else if (m_util.re.is_star(r, r1)) { + else if (re().is_star(r, r1)) { result = derivative(elem, r1); if (result) { - result = m_util.re.mk_concat(result, r); + result = re().mk_concat(result, r); } } - else if (m_util.re.is_plus(r, r1)) { - result = m_util.re.mk_star(r1); + else if (re().is_plus(r, r1)) { + result = re().mk_star(r1); result = derivative(elem, result); } - else if (m_util.re.is_union(r, r1, r2)) { + else if (re().is_union(r, r1, r2)) { dr1 = derivative(elem, r1); dr2 = derivative(elem, r2); if (dr1 && dr2) { - result = m_util.re.mk_union(dr1, dr2); + result = re().mk_union(dr1, dr2); } } - else if (m_util.re.is_intersection(r, r1, r2)) { + else if (re().is_intersection(r, r1, r2)) { dr1 = derivative(elem, r1); dr2 = derivative(elem, r2); if (dr1 && dr2) { - result = m_util.re.mk_inter(dr1, dr2); + result = re().mk_inter(dr1, dr2); } } - else if (m_util.re.is_opt(r, r1)) { + else if (re().is_opt(r, r1)) { result = derivative(elem, r1); } - else if (m_util.re.is_complement(r, r1)) { + else if (re().is_complement(r, r1)) { result = derivative(elem, r1); if (result) { - result = m_util.re.mk_complement(result); + result = re().mk_complement(result); } } - else if (m_util.re.is_loop(r, r1, lo)) { + else if (re().is_loop(r, r1, lo)) { result = derivative(elem, r1); if (result) { if (lo > 0) { lo--; } - result = m_util.re.mk_concat( + result = re().mk_concat( result, - m_util.re.mk_loop(r1, lo) + re().mk_loop(r1, lo) ); } } - else if (m_util.re.is_loop(r, r1, lo, hi)) { + else if (re().is_loop(r, r1, lo, hi)) { if (hi == 0) { - result = m_util.re.mk_empty(m().get_sort(r)); + result = re().mk_empty(m().get_sort(r)); } else { result = derivative(elem, r1); @@ -2225,18 +2225,18 @@ expr_ref seq_rewriter::derivative(expr* elem, expr* r) { if (lo > 0) { lo--; } - result = m_util.re.mk_concat( + result = re().mk_concat( result, - m_util.re.mk_loop(r1, lo, hi) + re().mk_loop(r1, lo, hi) ); } } } - else if (m_util.re.is_full_seq(r) || - m_util.re.is_empty(r)) { + else if (re().is_full_seq(r) || + re().is_empty(r)) { result = r; } - else if (m_util.re.is_to_re(r, r1)) { + else if (re().is_to_re(r, r1)) { // r1 is a string here (not a regexp) expr_ref hd(m()); expr_ref tl(m()); @@ -2244,14 +2244,14 @@ expr_ref seq_rewriter::derivative(expr* elem, expr* r) { // head must be equal; if so, derivative is tail result = kleene_and( m().mk_eq(elem, hd), - m_util.re.mk_to_re(tl) + re().mk_to_re(tl) ); } else if (m_util.str.is_empty(r1)) { - result = m_util.re.mk_empty(m().get_sort(r)); + result = re().mk_empty(m().get_sort(r)); } } - else if (m_util.re.is_range(r, r1, r2)) { + else if (re().is_range(r, r1, r2)) { // r1, r2 are sequences. zstring s1, s2; if (m_util.str.is_string(r1, s1) && m_util.str.is_string(r2, s2)) { @@ -2262,14 +2262,14 @@ expr_ref seq_rewriter::derivative(expr* elem, expr* r) { result = kleene_predicate(result, seq_sort); } else { - result = m_util.re.mk_empty(m().get_sort(r)); + result = re().mk_empty(m().get_sort(r)); } } } - else if (m_util.re.is_full_char(r)) { - result = m_util.re.mk_to_re(m_util.str.mk_empty(seq_sort)); + else if (re().is_full_char(r)) { + result = re().mk_to_re(m_util.str.mk_empty(seq_sort)); } - else if (m_util.re.is_of_pred(r, p)) { + else if (re().is_of_pred(r, p)) { array_util array(m()); expr* args[2] = { p, elem }; result = array.mk_select(2, args); @@ -2280,16 +2280,16 @@ expr_ref seq_rewriter::derivative(expr* elem, expr* r) { br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { - if (m_util.re.is_empty(b)) { + if (re().is_empty(b)) { result = m().mk_false(); return BR_DONE; } - if (m_util.re.is_full_seq(b)) { + if (re().is_full_seq(b)) { result = m().mk_true(); return BR_DONE; } expr* b1 = nullptr; - if (m_util.re.is_to_re(b, b1)) { + if (re().is_to_re(b, b1)) { result = m().mk_eq(a, b1); return BR_REWRITE1; } @@ -2305,7 +2305,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { if (get_head_tail(a, hd, tl)) { expr_ref db = derivative(hd, b); // null if failed if (db) { - result = m_util.re.mk_in_re(tl, db); + result = re().mk_in_re(tl, db); return BR_REWRITE_FULL; } } @@ -2318,15 +2318,15 @@ br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) { } br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { - if (m_util.re.is_full_seq(a) && m_util.re.is_full_seq(b)) { + if (re().is_full_seq(a) && re().is_full_seq(b)) { result = a; return BR_DONE; } - if (m_util.re.is_empty(a)) { + if (re().is_empty(a)) { result = a; return BR_DONE; } - if (m_util.re.is_empty(b)) { + if (re().is_empty(b)) { result = b; return BR_DONE; } @@ -2339,47 +2339,47 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { return BR_DONE; } expr* a1 = nullptr, *b1 = nullptr; - if (m_util.re.is_to_re(a, a1) && m_util.re.is_to_re(b, b1)) { - result = m_util.re.mk_to_re(m_util.str.mk_concat(a1, b1)); + if (re().is_to_re(a, a1) && re().is_to_re(b, b1)) { + result = re().mk_to_re(m_util.str.mk_concat(a1, b1)); return BR_REWRITE2; } - if (m_util.re.is_star(a, a1) && m_util.re.is_star(b, b1) && a1 == b1) { + if (re().is_star(a, a1) && re().is_star(b, b1) && a1 == b1) { result = a; return BR_DONE; } - if (m_util.re.is_star(a, a1) && a1 == b) { - result = m_util.re.mk_concat(b, a); + if (re().is_star(a, a1) && a1 == b) { + result = re().mk_concat(b, a); return BR_DONE; } unsigned lo1, hi1, lo2, hi2; - if (m_util.re.is_loop(a, a1, lo1, hi1) && lo1 <= hi1 && m_util.re.is_loop(b, b1, lo2, hi2) && lo2 <= hi2 && a1 == b1) { - result = m_util.re.mk_loop(a1, lo1 + lo2, hi1 + hi2); + if (re().is_loop(a, a1, lo1, hi1) && lo1 <= hi1 && re().is_loop(b, b1, lo2, hi2) && lo2 <= hi2 && a1 == b1) { + result = re().mk_loop(a1, lo1 + lo2, hi1 + hi2); return BR_DONE; } - if (m_util.re.is_loop(a, a1, lo1) && m_util.re.is_loop(b, b1, lo2) && a1 == b1) { - result = m_util.re.mk_loop(a1, lo1 + lo2); + if (re().is_loop(a, a1, lo1) && re().is_loop(b, b1, lo2) && a1 == b1) { + result = re().mk_loop(a1, lo1 + lo2); return BR_DONE; } for (unsigned i = 0; i < 2; ++i) { // (loop a lo1) + (loop a lo2 hi2) = (loop a lo1 + lo2) - if (m_util.re.is_loop(a, a1, lo1) && m_util.re.is_loop(b, b1, lo2, hi2) && lo2 <= hi2 && a1 == b1) { - result = m_util.re.mk_loop(a1, lo1 + lo2); + if (re().is_loop(a, a1, lo1) && re().is_loop(b, b1, lo2, hi2) && lo2 <= hi2 && a1 == b1) { + result = re().mk_loop(a1, lo1 + lo2); return BR_DONE; } // (loop a lo1 hi1) + a* = (loop a lo1) - if (m_util.re.is_loop(a, a1, lo1, hi1) && m_util.re.is_star(b, b1) && a1 == b1) { - result = m_util.re.mk_loop(a1, lo1); + if (re().is_loop(a, a1, lo1, hi1) && re().is_star(b, b1) && a1 == b1) { + result = re().mk_loop(a1, lo1); return BR_DONE; } // (loop a lo1) + a* = (loop a lo1) - if (m_util.re.is_loop(a, a1, lo1) && m_util.re.is_star(b, b1) && a1 == b1) { + if (re().is_loop(a, a1, lo1) && re().is_star(b, b1) && a1 == b1) { result = a; return BR_DONE; } // (loop a lo1 hi1) + a = (loop a lo1+1 hi1+1) - if (m_util.re.is_loop(a, a1, lo1, hi1) && lo1 <= hi1 && a1 == b) { - result = m_util.re.mk_loop(a1, lo1+1, hi1+1); + if (re().is_loop(a, a1, lo1, hi1) && lo1 <= hi1 && a1 == b) { + result = re().mk_loop(a1, lo1+1, hi1+1); return BR_DONE; } std::swap(a, b); @@ -2396,27 +2396,27 @@ br_status seq_rewriter::mk_re_union(expr* a, expr* b, expr_ref& result) { result = a; return BR_DONE; } - if (m_util.re.is_empty(a)) { + if (re().is_empty(a)) { result = b; return BR_DONE; } - if (m_util.re.is_empty(b)) { + if (re().is_empty(b)) { result = a; return BR_DONE; } - if (m_util.re.is_full_seq(a)) { + if (re().is_full_seq(a)) { result = a; return BR_DONE; } - if (m_util.re.is_full_seq(b)) { + if (re().is_full_seq(b)) { result = b; return BR_DONE; } - if (m_util.re.is_star(a) && is_epsilon(b)) { + if (re().is_star(a) && is_epsilon(b)) { result = a; return BR_DONE; } - if (m_util.re.is_star(b) && is_epsilon(a)) { + if (re().is_star(b) && is_epsilon(a)) { result = b; return BR_DONE; } @@ -2425,20 +2425,20 @@ br_status seq_rewriter::mk_re_union(expr* a, expr* b, expr_ref& result) { br_status seq_rewriter::mk_re_complement(expr* a, expr_ref& result) { expr* e1, *e2; - if (m_util.re.is_intersection(a, e1, e2)) { - result = m_util.re.mk_union(m_util.re.mk_complement(e1), m_util.re.mk_complement(e2)); + if (re().is_intersection(a, e1, e2)) { + result = re().mk_union(re().mk_complement(e1), re().mk_complement(e2)); return BR_REWRITE2; } - if (m_util.re.is_union(a, e1, e2)) { - result = m_util.re.mk_inter(m_util.re.mk_complement(e1), m_util.re.mk_complement(e2)); + if (re().is_union(a, e1, e2)) { + result = re().mk_inter(re().mk_complement(e1), re().mk_complement(e2)); return BR_REWRITE2; } - if (m_util.re.is_empty(a)) { - result = m_util.re.mk_full_seq(m().get_sort(a)); + if (re().is_empty(a)) { + result = re().mk_full_seq(m().get_sort(a)); return BR_DONE; } - if (m_util.re.is_full_seq(a)) { - result = m_util.re.mk_empty(m().get_sort(a)); + if (re().is_full_seq(a)) { + result = re().mk_empty(m().get_sort(a)); return BR_DONE; } return BR_FAILED; @@ -2456,40 +2456,40 @@ br_status seq_rewriter::mk_re_inter(expr* a, expr* b, expr_ref& result) { result = a; return BR_DONE; } - if (m_util.re.is_empty(a)) { + if (re().is_empty(a)) { result = a; return BR_DONE; } - if (m_util.re.is_empty(b)) { + if (re().is_empty(b)) { result = b; return BR_DONE; } - if (m_util.re.is_full_seq(a)) { + if (re().is_full_seq(a)) { result = b; return BR_DONE; } - if (m_util.re.is_full_seq(b)) { + if (re().is_full_seq(b)) { result = a; return BR_DONE; } expr* ac = nullptr, *bc = nullptr; - if ((m_util.re.is_complement(a, ac) && ac == b) || - (m_util.re.is_complement(b, bc) && bc == a)) { - result = m_util.re.mk_empty(m().get_sort(a)); + if ((re().is_complement(a, ac) && ac == b) || + (re().is_complement(b, bc) && bc == a)) { + result = re().mk_empty(m().get_sort(a)); return BR_DONE; } - if (m_util.re.is_to_re(b)) + if (re().is_to_re(b)) std::swap(a, b); expr* s = nullptr; - if (m_util.re.is_to_re(a, s)) { - result = m().mk_ite(m_util.re.mk_in_re(s, b), a, m_util.re.mk_empty(m().get_sort(a))); + if (re().is_to_re(a, s)) { + result = m().mk_ite(re().mk_in_re(s, b), a, re().mk_empty(m().get_sort(a))); return BR_REWRITE2; } return BR_FAILED; } br_status seq_rewriter::mk_re_diff(expr* a, expr* b, expr_ref& result) { - result = m_util.re.mk_inter(a, m_util.re.mk_complement(b)); + result = re().mk_inter(a, re().mk_complement(b)); return BR_REWRITE2; } @@ -2505,21 +2505,21 @@ br_status seq_rewriter::mk_re_loop(func_decl* f, unsigned num_args, expr* const* hi2 = np > 1 ? f->get_parameter(1).get_int() : lo2; // (loop a 0 0) = "" if (np == 2 && lo2 > hi2) { - result = m_util.re.mk_empty(m().get_sort(args[0])); + result = re().mk_empty(m().get_sort(args[0])); return BR_DONE; } if (np == 2 && hi2 == 0) { - result = m_util.re.mk_to_re(m_util.str.mk_empty(m_util.re.to_seq(m().get_sort(args[0])))); + result = re().mk_to_re(m_util.str.mk_empty(re().to_seq(m().get_sort(args[0])))); return BR_DONE; } // (loop (loop a lo) lo2) = (loop lo*lo2) - if (m_util.re.is_loop(args[0], a, lo) && np == 1) { - result = m_util.re.mk_loop(a, lo2 * lo); + if (re().is_loop(args[0], a, lo) && np == 1) { + result = re().mk_loop(a, lo2 * lo); return BR_REWRITE1; } // (loop (loop a l l) h h) = (loop a l*h l*h) - if (m_util.re.is_loop(args[0], a, lo, hi) && np == 2 && lo == hi && lo2 == hi2) { - result = m_util.re.mk_loop(a, lo2 * lo, hi2 * hi); + if (re().is_loop(args[0], a, lo, hi) && np == 2 && lo == hi && lo2 == hi2) { + result = re().mk_loop(a, lo2 * lo, hi2 * hi); return BR_REWRITE1; } // (loop a 1 1) = a @@ -2529,20 +2529,20 @@ br_status seq_rewriter::mk_re_loop(func_decl* f, unsigned num_args, expr* const* } // (loop a 0) = a* if (np == 1 && lo2 == 0) { - result = m_util.re.mk_star(args[0]); + result = re().mk_star(args[0]); return BR_DONE; } break; case 2: if (m_autil.is_numeral(args[1], n1) && n1.is_unsigned()) { - result = m_util.re.mk_loop(args[0], n1.get_unsigned()); + result = re().mk_loop(args[0], n1.get_unsigned()); return BR_REWRITE1; } break; case 3: if (m_autil.is_numeral(args[1], n1) && n1.is_unsigned() && m_autil.is_numeral(args[2], n2) && n2.is_unsigned()) { - result = m_util.re.mk_loop(args[0], n1.get_unsigned(), n2.get_unsigned()); + result = re().mk_loop(args[0], n1.get_unsigned(), n2.get_unsigned()); return BR_REWRITE1; } break; @@ -2554,7 +2554,7 @@ br_status seq_rewriter::mk_re_loop(func_decl* f, unsigned num_args, expr* const* br_status seq_rewriter::mk_re_power(func_decl* f, expr* a, expr_ref& result) { unsigned p = f->get_parameter(0).get_int(); - result = m_util.re.mk_loop(a, p, p); + result = re().mk_loop(a, p, p); return BR_REWRITE1; } @@ -2570,45 +2570,45 @@ br_status seq_rewriter::mk_re_power(func_decl* f, expr* a, expr_ref& result) { */ br_status seq_rewriter::mk_re_star(expr* a, expr_ref& result) { expr* b, *c, *b1, *c1; - if (m_util.re.is_star(a) || m_util.re.is_full_seq(a)) { + if (re().is_star(a) || re().is_full_seq(a)) { result = a; return BR_DONE; } - if (m_util.re.is_full_char(a)) { - result = m_util.re.mk_full_seq(m().get_sort(a)); + if (re().is_full_char(a)) { + result = re().mk_full_seq(m().get_sort(a)); return BR_DONE; } - if (m_util.re.is_empty(a)) { + if (re().is_empty(a)) { sort* seq_sort = nullptr; VERIFY(m_util.is_re(a, seq_sort)); - result = m_util.re.mk_to_re(m_util.str.mk_empty(seq_sort)); + result = re().mk_to_re(m_util.str.mk_empty(seq_sort)); return BR_DONE; } - if (m_util.re.is_plus(a, b)) { - result = m_util.re.mk_star(b); + if (re().is_plus(a, b)) { + result = re().mk_star(b); return BR_DONE; } - if (m_util.re.is_union(a, b, c)) { - if (m_util.re.is_star(b, b1)) { - result = m_util.re.mk_star(m_util.re.mk_union(b1, c)); + if (re().is_union(a, b, c)) { + if (re().is_star(b, b1)) { + result = re().mk_star(re().mk_union(b1, c)); return BR_REWRITE2; } - if (m_util.re.is_star(c, c1)) { - result = m_util.re.mk_star(m_util.re.mk_union(b, c1)); + if (re().is_star(c, c1)) { + result = re().mk_star(re().mk_union(b, c1)); return BR_REWRITE2; } if (is_epsilon(b)) { - result = m_util.re.mk_star(c); + result = re().mk_star(c); return BR_REWRITE2; } if (is_epsilon(c)) { - result = m_util.re.mk_star(b); + result = re().mk_star(b); return BR_REWRITE2; } } - if (m_util.re.is_concat(a, b, c) && - m_util.re.is_star(b, b1) && m_util.re.is_star(c, c1)) { - result = m_util.re.mk_star(m_util.re.mk_union(b1, c1)); + if (re().is_concat(a, b, c) && + re().is_star(b, b1) && re().is_star(c, c1)) { + result = re().mk_star(re().mk_union(b1, c1)); return BR_REWRITE2; } @@ -2630,11 +2630,11 @@ br_status seq_rewriter::mk_re_range(expr* lo, expr* hi, expr_ref& result) { a+ = aa* */ br_status seq_rewriter::mk_re_plus(expr* a, expr_ref& result) { - if (m_util.re.is_empty(a)) { + if (re().is_empty(a)) { result = a; return BR_DONE; } - if (m_util.re.is_full_seq(a)) { + if (re().is_full_seq(a)) { result = a; return BR_DONE; } @@ -2642,80 +2642,175 @@ br_status seq_rewriter::mk_re_plus(expr* a, expr_ref& result) { result = a; return BR_DONE; } - if (m_util.re.is_plus(a)) { + if (re().is_plus(a)) { result = a; return BR_DONE; } - if (m_util.re.is_star(a)) { + if (re().is_star(a)) { result = a; return BR_DONE; } - result = m_util.re.mk_concat(a, m_util.re.mk_star(a)); + result = re().mk_concat(a, re().mk_star(a)); return BR_REWRITE2; } br_status seq_rewriter::mk_re_opt(expr* a, expr_ref& result) { sort* s = nullptr; VERIFY(m_util.is_re(a, s)); - result = m_util.re.mk_union(m_util.re.mk_to_re(m_util.str.mk_empty(s)), a); + result = re().mk_union(re().mk_to_re(m_util.str.mk_empty(s)), a); return BR_REWRITE1; } +bool seq_rewriter::has_cofactor(expr* r, expr_ref& cond, expr_ref& th, expr_ref& el) { + expr_ref_vector trail(m()), args_th(m()), args_el(m()); + expr* c = nullptr, *tt = nullptr, *ee = nullptr; + obj_map cache_th, cache_el; + expr_mark no_cofactor, visited; + ptr_vector todo; + todo.push_back(r); + while (!todo.empty()) { + expr* e = todo.back(); + if (visited.is_marked(e) || !is_app(e)) { + todo.pop_back(); + continue; + } + app* a = to_app(e); + if (m().is_ite(e, c, tt, ee)) { + if (!cond) { + cond = c; + cache_th.insert(a, tt); + cache_el.insert(a, ee); + } + else if (cond == c) { + cache_th.insert(a, tt); + cache_el.insert(a, ee); + } + else { + no_cofactor.mark(a); + } + visited.mark(e, true); + todo.pop_back(); + continue; + } + if (a->get_family_id() != u().get_family_id()) { + visited.mark(e, true); + no_cofactor.mark(e, true); + todo.pop_back(); + continue; + } + switch (a->get_decl_kind()) { + case OP_RE_CONCAT: + case OP_RE_UNION: + case OP_RE_INTERSECT: + case OP_RE_COMPLEMENT: + break; + case OP_RE_STAR: + case OP_RE_LOOP: + default: + visited.mark(e, true); + no_cofactor.mark(e, true); + continue; + } + args_th.reset(); + args_el.reset(); + bool has_cof = false; + for (expr* arg : *a) { + if (no_cofactor.is_marked(arg)) { + args_th.push_back(arg); + args_el.push_back(arg); + } + else if (cache_th.contains(arg)) { + args_th.push_back(cache_th[arg]); + args_el.push_back(cache_el[arg]); + has_cof = true; + } + else { + todo.push_back(arg); + } + } + if (args_th.size() == a->get_num_args()) { + if (has_cof) { + th = m().mk_app(a->get_decl(), args_th); + el = m().mk_app(a->get_decl(), args_el); + trail.push_back(th); + trail.push_back(el); + cache_th.insert(a, th); + cache_el.insert(a, el); + } + else { + no_cofactor.mark(a, true); + } + visited.mark(e, true); + todo.pop_back(); + } + } + SASSERT(cond == !no_cofactor.is_marked(r)); + if (cond) { + th = cache_th[r]; + el = cache_el[r]; + return true; + } + else { + return false; + } +} + + br_status seq_rewriter::reduce_re_is_empty(expr* r, expr_ref& result) { expr* r1, *r2, *r3, *r4; zstring s1, s2; unsigned lo, hi; - auto eq_empty = [&](expr* r) { return m().mk_eq(r, m_util.re.mk_empty(m().get_sort(r))); }; - if (m_util.re.is_union(r, r1, r2)) { + auto eq_empty = [&](expr* r) { return m().mk_eq(r, re().mk_empty(m().get_sort(r))); }; + if (re().is_union(r, r1, r2)) { result = m().mk_and(eq_empty(r1), eq_empty(r2)); return BR_REWRITE2; } - if (m_util.re.is_star(r) || - m_util.re.is_to_re(r) || - m_util.re.is_full_char(r) || - m_util.re.is_full_seq(r)) { + if (re().is_star(r) || + re().is_to_re(r) || + re().is_full_char(r) || + re().is_full_seq(r)) { result = m().mk_false(); return BR_DONE; } - if (m_util.re.is_concat(r, r1, r2)) { + if (re().is_concat(r, r1, r2)) { result = m().mk_or(eq_empty(r1), eq_empty(r2)); return BR_REWRITE2; } - else if (m_util.re.is_range(r, r1, r2) && + else if (re().is_range(r, r1, r2) && m_util.str.is_string(r1, s1) && m_util.str.is_string(r2, s2) && s1.length() == 1 && s2.length() == 1) { result = m().mk_bool_val(s1[0] <= s2[0]); return BR_DONE; } - else if ((m_util.re.is_loop(r, r1, lo) || - m_util.re.is_loop(r, r1, lo, hi)) && lo == 0) { + else if ((re().is_loop(r, r1, lo) || + re().is_loop(r, r1, lo, hi)) && lo == 0) { result = m().mk_false(); return BR_DONE; } - else if (m_util.re.is_loop(r, r1, lo) || - (m_util.re.is_loop(r, r1, lo, hi) && lo <= hi)) { + else if (re().is_loop(r, r1, lo) || + (re().is_loop(r, r1, lo, hi) && lo <= hi)) { result = eq_empty(r1); return BR_REWRITE1; } - // DNF expansion: - else if (m_util.re.is_intersection(r, r1, r2) && m_util.re.is_union(r1, r3, r4)) { - result = eq_empty(m_util.re.mk_union(m_util.re.mk_inter(r3, r2), m_util.re.mk_inter(r4, r2))); + // Partial DNF expansion: + else if (re().is_intersection(r, r1, r2) && re().is_union(r1, r3, r4)) { + result = eq_empty(re().mk_union(re().mk_inter(r3, r2), re().mk_inter(r4, r2))); return BR_REWRITE3; } - else if (m_util.re.is_intersection(r, r1, r2) && m_util.re.is_union(r2, r3, r4)) { - result = eq_empty(m_util.re.mk_union(m_util.re.mk_inter(r3, r1), m_util.re.mk_inter(r4, r1))); + else if (re().is_intersection(r, r1, r2) && re().is_union(r2, r3, r4)) { + result = eq_empty(re().mk_union(re().mk_inter(r3, r1), re().mk_inter(r4, r1))); return BR_REWRITE3; } return BR_FAILED; } br_status seq_rewriter::reduce_re_eq(expr* l, expr* r, expr_ref& result) { - if (m_util.re.is_empty(l)) { + if (re().is_empty(l)) { std::swap(l, r); } - if (m_util.re.is_empty(r)) { + if (re().is_empty(r)) { return reduce_re_is_empty(l, result); } return BR_FAILED; @@ -2996,9 +3091,9 @@ bool seq_rewriter::reduce_contains(expr* a, expr* b, expr_ref_vector& disj) { } if (m_util.str.is_string(b, s)) { - expr* all = m_util.re.mk_full_seq(m_util.re.mk_re(m().get_sort(b))); - disj.push_back(m_util.re.mk_in_re(m_util.str.mk_concat(m_lhs.size() - i, m_lhs.c_ptr() + i, sort_a), - m_util.re.mk_concat(all, m_util.re.mk_concat(m_util.re.mk_to_re(b), all)))); + expr* all = re().mk_full_seq(re().mk_re(m().get_sort(b))); + disj.push_back(re().mk_in_re(m_util.str.mk_concat(m_lhs.size() - i, m_lhs.c_ptr() + i, sort_a), + re().mk_concat(all, re().mk_concat(re().mk_to_re(b), all)))); return true; } @@ -3155,7 +3250,7 @@ bool seq_rewriter::reduce_by_length(expr_ref_vector& ls, expr_ref_vector& rs, bool seq_rewriter::is_epsilon(expr* e) const { expr* e1; - return m_util.re.is_to_re(e, e1) && m_util.str.is_empty(e1); + return re().is_to_re(e, e1) && m_util.str.is_empty(e1); } bool seq_rewriter::reduce_subsequence(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs) { diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index d14bc671c..56d439f5b 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -135,10 +135,8 @@ class seq_rewriter { // Support for regular expression derivatives bool get_head_tail(expr* e, expr_ref& head, expr_ref& tail); - expr_ref is_nullable(expr* r); expr_ref kleene_and(expr* cond, expr* r); expr_ref kleene_predicate(expr* cond, sort* seq_sort); - expr_ref derivative(expr* hd, expr* r); br_status mk_seq_unit(expr* e, expr_ref& result); br_status mk_seq_concat(expr* a, expr* b, expr_ref& result); @@ -181,8 +179,7 @@ class seq_rewriter { br_status reduce_re_eq(expr* a, expr* b, expr_ref& result); br_status reduce_re_is_empty(expr* r, expr_ref& result); - - br_status mk_bool_app_helper(bool is_and, unsigned n, expr* const* args, expr_ref& result); + br_status mk_bool_app_helper(bool is_and, unsigned n, expr* const* args, expr_ref& result); bool cannot_contain_prefix(expr* a, expr* b); bool cannot_contain_suffix(expr* a, expr* b); @@ -212,6 +209,11 @@ class seq_rewriter { void remove_empty_and_concats(expr_ref_vector& es); void remove_leading(unsigned n, expr_ref_vector& es); + class seq_util::re& re() { return u().re; } + class seq_util::re const& re() const { return u().re; } + class seq_util::str& str() { return u().str; } + class seq_util::str const& str() const { return u().str; } + public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): m_util(m), m_autil(m), m_re2aut(m), m_es(m), m_lhs(m), m_rhs(m), m_coalesce_chars(true) { @@ -219,6 +221,7 @@ public: ast_manager & m() const { return m_util.get_manager(); } family_id get_fid() const { return m_util.get_family_id(); } seq_util const& u() const { return m_util; } + seq_util& u() { return m_util; } void updt_params(params_ref const & p); static void get_param_descrs(param_descrs & r); @@ -241,6 +244,11 @@ public: br_status mk_bool_app(func_decl* f, unsigned n, expr* const* args, expr_ref& result); + expr_ref derivative(expr* hd, expr* r); + + expr_ref is_nullable(expr* r); + + bool has_cofactor(expr* r, expr_ref& cond, expr_ref& th, expr_ref& el); }; diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index c0e2d1157..46cc27fe4 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -242,7 +242,7 @@ public: bool is_re(sort* s) const { return is_sort_of(s, m_fid, RE_SORT); } bool is_re(sort* s, sort*& seq) const { return is_sort_of(s, m_fid, RE_SORT) && (seq = to_sort(s->get_parameter(0).get_ast()), true); } bool is_seq(expr* e) const { return is_seq(m.get_sort(e)); } - bool is_seq(sort* s, sort*& seq) { return is_seq(s) && (seq = to_sort(s->get_parameter(0).get_ast()), true); } + bool is_seq(sort* s, sort*& seq) const { return is_seq(s) && (seq = to_sort(s->get_parameter(0).get_ast()), true); } bool is_re(expr* e) const { return is_re(m.get_sort(e)); } bool is_re(expr* e, sort*& seq) const { return is_re(m.get_sort(e), seq); } bool is_char(expr* e) const { return is_char(m.get_sort(e)); } diff --git a/src/smt/CMakeLists.txt b/src/smt/CMakeLists.txt index 5f55677fc..f7a17a0ef 100644 --- a/src/smt/CMakeLists.txt +++ b/src/smt/CMakeLists.txt @@ -13,10 +13,11 @@ z3_add_component(smt old_interval.cpp qi_queue.cpp seq_axioms.cpp - seq_skolem.cpp seq_eq_solver.cpp seq_ne_solver.cpp seq_offset_eq.cpp + seq_regex.cpp + seq_skolem.cpp seq_unicode.cpp smt_almost_cg_table.cpp smt_arith_value.cpp diff --git a/src/smt/params/smt_params_helper.pyg b/src/smt/params/smt_params_helper.pyg index 6cbbf77fb..95d359dee 100644 --- a/src/smt/params/smt_params_helper.pyg +++ b/src/smt/params/smt_params_helper.pyg @@ -96,6 +96,7 @@ def_module_params(module_name='smt', ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'), ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'), ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'), + ('seq.use_derivatives', BOOL, False, 'dev flag (not for users) enable derivative based unfolding of regex'), ('str.strong_arrangements', BOOL, True, 'assert equivalences instead of implications when generating string arrangement axioms'), ('str.aggressive_length_testing', BOOL, False, 'prioritize testing concrete length values over generating more options'), ('str.aggressive_value_testing', BOOL, False, 'prioritize testing concrete string constant values over generating more options'), diff --git a/src/smt/params/theory_seq_params.cpp b/src/smt/params/theory_seq_params.cpp index 68cce6e66..0563cb615 100644 --- a/src/smt/params/theory_seq_params.cpp +++ b/src/smt/params/theory_seq_params.cpp @@ -21,4 +21,5 @@ void theory_seq_params::updt_params(params_ref const & _p) { smt_params_helper p(_p); m_split_w_len = p.seq_split_w_len(); m_seq_validate = p.seq_validate(); + m_seq_use_derivatives = p.seq_use_derivatives(); } diff --git a/src/smt/params/theory_seq_params.h b/src/smt/params/theory_seq_params.h index f9719b9fe..c925fc4e1 100644 --- a/src/smt/params/theory_seq_params.h +++ b/src/smt/params/theory_seq_params.h @@ -25,11 +25,13 @@ struct theory_seq_params { */ bool m_split_w_len; bool m_seq_validate; + bool m_seq_use_derivatives; theory_seq_params(params_ref const & p = params_ref()): m_split_w_len(true), - m_seq_validate(false) + m_seq_validate(false), + m_seq_use_derivatives(false) { updt_params(p); } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp new file mode 100644 index 000000000..a2f875269 --- /dev/null +++ b/src/smt/seq_regex.cpp @@ -0,0 +1,266 @@ +/*++ +Copyright (c) 2011 Microsoft Corporation + +Module Name: + + seq_regex.cpp + +Abstract: + + Solver for regexes + +Author: + + Nikolaj Bjorner (nbjorner) 2020-5-22 + +--*/ + +#include "smt/seq_regex.h" +#include "smt/theory_seq.h" + +namespace smt { + + seq_regex::seq_regex(theory_seq& th): + th(th), + ctx(th.get_context()), + m(th.get_manager()) + {} + + seq_util& seq_regex::u() { return th.m_util; } + class seq_util::re& seq_regex::re() { return th.m_util.re; } + class seq_util::str& seq_regex::str() { return th.m_util.str; } + seq_rewriter& seq_regex::seq_rw() { return th.m_seq_rewrite; } + seq_skolem& seq_regex::sk() { return th.m_sk; } + void seq_regex::rewrite(expr_ref& e) { th.m_rewrite(e); } + + void seq_regex::propagate_in_re(literal lit) { + if (!propagate(lit)) + m_to_propagate.push_back(lit); + } + + bool seq_regex::propagate() { + bool change = false; + for (unsigned i = 0; !ctx.inconsistent() && i < m_to_propagate.size(); ++i) { + if (propagate(m_to_propagate[i])) { + m_to_propagate.erase_and_swap(i--); + change = true; + } + } + return change; + } + + bool seq_regex::propagate(literal lit) { + expr* s = nullptr, *r = nullptr; + expr* e = ctx.bool_var2expr(lit.var()); + VERIFY(str().is_in_re(e, s, r)); + + // only positive assignments of membership propagation are relevant. + if (lit.sign() && sk().is_tail(s)) + return true; + + // convert negative negative membership literals to positive + // ~(s in R) => s in C(R) + if (lit.sign()) { + expr_ref fml(re().mk_in_re(s, re().mk_complement(r)), m); + rewrite(fml); + literal nlit = th.mk_literal(fml); + th.propagate_lit(nullptr, 1, &lit, nlit); + return true; + } + + if (!sk().is_tail(s) && coallesce_in_re(lit)) + return true; + + // TBD + // for !sk().is_tail(s): + // s in R => R != {} + + if (block_unfolding(lit, s)) + return true; + + // s in R[if(p,R1,R2)] & p => s in R[R1] + // s in R[if(p,R1,R2)] & ~p => s in R[R2] + expr_ref cond(m), tt(m), el(m); + if (seq_rw().has_cofactor(r, cond, tt, el)) { + literal lcond = th.mk_literal(cond), next_lit; + switch (ctx.get_assignment(lcond)) { + case l_true: { + rewrite(tt); + literal lits[2] = { lit, lcond }; + next_lit = th.mk_literal(re().mk_in_re(s, tt)); + th.propagate_lit(nullptr, 2, lits, next_lit); + return true; + } + case l_false: { + rewrite(el); + next_lit = th.mk_literal(re().mk_in_re(s, el)); + literal lits[2] = { lit, ~lcond }; + th.propagate_lit(nullptr, 2, lits, next_lit); + return true; + } + case l_undef: + ctx.mark_as_relevant(lcond); + return false; + } + } + + // s in R & s = "" => nullable(R) + literal is_empty = th.mk_eq(s, str().mk_empty(m.get_sort(s)), false); + expr_ref is_nullable = seq_rw().is_nullable(r); + rewrite(is_nullable); + th.add_axiom(~lit, ~is_empty, th.mk_literal(is_nullable)); + switch (ctx.get_assignment(is_empty)) { + case l_undef: + ctx.mark_as_relevant(is_empty); + return false; + case l_true: + return true; + case l_false: + break; + } + + // s in R & s != "" => s = head ++ tail + // s in R & s != "" => tail in D(head, R) + expr_ref head(m), tail(m), d(m); + expr* h = nullptr; + sk().decompose(s, head, tail); + if (!str().is_unit(head, h)) + throw default_exception("expected a unit"); + d = seq_rw().derivative(h, r); + if (!d) + throw default_exception("unable to expand derivative"); + th.add_axiom(is_empty, th.mk_eq(s, th.mk_concat(head, tail), false)); + th.add_axiom(~lit, is_empty, th.mk_literal(re().mk_in_re(tail, d))); + return true; + } + + bool seq_regex::block_unfolding(literal lit, expr* s) { + expr* t = nullptr; + unsigned i = 0; + if (!sk().is_tail_u(s, t, i)) + return false; + if (i > th.m_max_unfolding_depth && + th.m_max_unfolding_lit != null_literal && + ctx.get_assignment(th.m_max_unfolding_lit) == l_true && + !ctx.at_base_level()) { + th.propagate_lit(nullptr, 1, &lit, ~th.m_max_unfolding_lit); + return true; + } + return false; + } + + bool seq_regex::coallesce_in_re(literal lit) { + // initially disable this + return false; + expr* s = nullptr, *r = nullptr; + expr* e = ctx.bool_var2expr(lit.var()); + VERIFY(str().is_in_re(e, s, r)); + expr_ref regex(r, m); + literal_vector lits; + for (unsigned i = 0; i < m_s_in_re.size(); ++i) { + auto const& entry = m_s_in_re[i]; + if (!entry.m_active) + continue; + if (th.get_root(entry.m_s) != th.get_root(s)) + continue; + if (entry.m_re == regex) + continue; + + th.m_trail_stack.push(vector_value_trail(m_s_in_re, i)); + m_s_in_re[i].m_active = false; + IF_VERBOSE(11, verbose_stream() << "intersect " << regex << " " << + mk_pp(entry.m_re, m) << " " << mk_pp(s, m) << " " << mk_pp(entry.m_s, m) << "\n";); + regex = re().mk_inter(entry.m_re, regex); + rewrite(regex); + lits.push_back(~entry.m_lit); + enode* n1 = th.ensure_enode(entry.m_s); + enode* n2 = th.ensure_enode(s); + if (n1 != n2) { + lits.push_back(~th.mk_eq(n1->get_owner(), n2->get_owner(), false)); + } + } + m_s_in_re.push_back(s_in_re(lit, s, regex)); + th.get_trail_stack().push(push_back_vector>(m_s_in_re)); + if (lits.empty()) + return false; + lits.push_back(~lit); + lits.push_back(th.mk_literal(re().mk_in_re(s, regex))); + ctx.mk_th_axiom(th.get_id(), lits.size(), lits.c_ptr()); + return true; + } + + + void seq_regex::propagate_is_empty(literal lit) { + // the dual version of unroll_non_empty, but + // skolem functions have to be eliminated or turned into + // universal quantifiers. + throw default_exception("emptiness checking of regex is TBD"); + } + + void seq_regex::propagate_is_nonempty(expr* r) { + sort* seq_sort = nullptr; + VERIFY(u().is_re(r, seq_sort)); + literal lit = ~th.mk_eq(r, re().mk_empty(seq_sort), false); + expr_mark seen; + expr_ref non_empty = unroll_non_empty(r, seen, 0); + if (non_empty) { + rewrite(non_empty); + th.add_axiom(~lit, th.mk_literal(non_empty)); + } + else { + // generally introduce predicate (re.nonempty r seen) + // with inference rules based on unroll_non_empty + throw default_exception("unrolling large regexes is TBD"); + } + } + + /** + nonempty(R union Q, Seen) = R = {} or Q = {} + nonempty(R[if(p,R1,R2)], Seen) = if(p, nonempty(R[R1], Seen), nonempty(R[R2], Seen)) (co-factor) + nonempty(R, Seen) = nullable(R) or (R not in Seen and nonempty(D(first(R),R), Seen u { R })) (derivative) + + TBD: eliminate variables from p when possible to perform quantifier elimination. + + p := first(R) == 'a' + then replace first(R) by 'a' in R[R1] + TBD: + empty(R, Seen) = R = {} if R does not contain a subterm in Seen and Seen is non-empty + + + first : RegEx -> Char is a skolem function + */ + + expr_ref seq_regex::mk_first(expr* r) { + sort* elem_sort = nullptr, *seq_sort = nullptr; + VERIFY(u().is_re(r, seq_sort)); + VERIFY(u().is_seq(seq_sort, elem_sort)); + return expr_ref(m.mk_fresh_const("re.first", elem_sort), m); + // return sk().mk("re.first", r, elem_sort); + } + + expr_ref seq_regex::unroll_non_empty(expr* r, expr_mark& seen, unsigned depth) { + if (seen.is_marked(r)) + return expr_ref(m.mk_false(), m); + if (depth > 300) + return expr_ref(m); + expr_ref result(m), cond(m), th(m), el(m); + // TBD: try also rewriting + if (seq_rw().has_cofactor(r, cond, th, el)) { + th = unroll_non_empty(th, seen, depth + 1); + el = unroll_non_empty(el, seen, depth + 1); + if (th && el) + result = m.mk_ite(cond, th, el); + return result; + } + expr_ref hd = mk_first(r); + result = seq_rw().derivative(hd, r); + if (result) { + // TBD fast check if r is a subterm of result, if not, then + // loop instead of recurse + seen.mark(r, true); + result = unroll_non_empty(result, seen, depth + 1); + seen.mark(r, false); + } + return result; + } +} diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h new file mode 100644 index 000000000..97b8e56b2 --- /dev/null +++ b/src/smt/seq_regex.h @@ -0,0 +1,77 @@ +/*++ +Copyright (c) 2011 Microsoft Corporation + +Module Name: + + seq_regex.h + +Abstract: + + Solver for regexes + +Author: + + Nikolaj Bjorner (nbjorner) 2020-5-22 + +--*/ +#pragma once + +#include "util/scoped_vector.h" +#include "ast/seq_decl_plugin.h" +#include "ast/rewriter/seq_rewriter.h" +#include "smt/smt_context.h" +#include "smt/seq_skolem.h" + +namespace smt { + + class theory_seq; + + class seq_regex { + struct s_in_re { + literal m_lit; + expr* m_s; + expr* m_re; + bool m_active; + s_in_re(literal l, expr* s, expr* r): + m_lit(l), m_s(s), m_re(r), m_active(true) {} + }; + + theory_seq& th; + context& ctx; + ast_manager& m; + vector m_s_in_re; + scoped_vector m_to_propagate; + + seq_util& u(); + class seq_util::re& re(); + class seq_util::str& str(); + seq_rewriter& seq_rw(); + seq_skolem& sk(); + + void rewrite(expr_ref& e); + + bool propagate(literal lit); + + bool coallesce_in_re(literal lit); + + bool block_unfolding(literal lit, expr* s); + + expr_ref mk_first(expr* r); + + expr_ref unroll_non_empty(expr* r, expr_mark& seen, unsigned depth); + + public: + + seq_regex(theory_seq& th); + + bool propagate(); + + void propagate_in_re(literal lit); + + void propagate_is_empty(literal lit); + + void propagate_is_nonempty(expr* r); + }; + +}; + diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index 3bc033b25..a27197f41 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -299,6 +299,7 @@ theory_seq::theory_seq(context& ctx): m_sk(m, m_rewrite), m_ax(*this, m_rewrite), m_unicode(*this), + m_regex(*this), m_arith_value(m), m_trail_stack(*this), m_ls(m), m_rs(m), @@ -367,6 +368,10 @@ final_check_status theory_seq::final_check_eh() { TRACEFIN("solve_nqs"); return FC_CONTINUE; } + if (m_regex.propagate()) { + TRACEFIN("regex propagate"); + return FC_CONTINUE; + } if (check_contains()) { ++m_stats.m_propagate_contains; TRACEFIN("propagate_contains"); @@ -2617,6 +2622,7 @@ expr_ref theory_seq::add_elim_string_axiom(expr* n) { } void theory_seq::propagate_in_re(expr* n, bool is_true) { + TRACE("seq", tout << mk_pp(n, m) << " <- " << (is_true?"true":"false") << "\n";); expr_ref tmp(n, m); @@ -3053,7 +3059,12 @@ void theory_seq::assign_eh(bool_var v, bool is_true) { } } else if (m_util.str.is_in_re(e)) { - propagate_in_re(e, is_true); + if (ctx.get_fparams().m_seq_use_derivatives) { + m_regex.propagate_in_re(lit); + } + else { + propagate_in_re(e, is_true); + } } else if (m_sk.is_digit(e)) { // no-op diff --git a/src/smt/theory_seq.h b/src/smt/theory_seq.h index 22d94ddd4..cd9fa2dda 100644 --- a/src/smt/theory_seq.h +++ b/src/smt/theory_seq.h @@ -34,11 +34,14 @@ Revision History: #include "smt/seq_skolem.h" #include "smt/seq_axioms.h" #include "smt/seq_unicode.h" +#include "smt/seq_regex.h" #include "smt/seq_offset_eq.h" namespace smt { class theory_seq : public theory { + friend class seq_regex; + struct assumption { enode* n1, *n2; literal lit; @@ -404,6 +407,7 @@ namespace smt { seq_skolem m_sk; seq_axioms m_ax; seq_unicode m_unicode; + seq_regex m_regex; arith_value m_arith_value; th_trail_stack m_trail_stack; stats m_stats;