diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index b732670a6..258bbee92 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2278,6 +2278,13 @@ expr_ref seq_rewriter::derivative(expr* elem, expr* r) { result = array.mk_select(2, args); result = kleene_predicate(result, seq_sort); } + else if (m().is_ite(r, p, r1, r2)) { + dr1 = derivative(elem, r1); + dr2 = derivative(elem, r2); + if (dr1 && dr2) { + result = m().mk_ite(p, dr1, dr2); + } + } return result; } diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 0dc9094ca..b9ed08cf7 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1220,6 +1220,44 @@ app* seq_util::str::mk_is_empty(expr* s) const { return m.mk_eq(s, mk_empty(get_sort(s))); } +unsigned seq_util::str::min_length(expr* s) const { + SASSERT(u.is_seq(s)); + expr_ref_vector es(m); + unsigned result = 0; + get_concat_units(s, es); + for (expr* arg : es) { + if (is_unit(arg)) + result++; + } + return result; +} + +unsigned seq_util::re::min_length(expr* r) const { + SASSERT(u.is_re(r)); + expr* r1 = nullptr, *r2 = nullptr, *s = nullptr; + if (is_empty(r)) + return UINT_MAX; + if (is_concat(r, r1, r2)) { + unsigned l1 = min_length(r1); + if (l1 == UINT_MAX) + return l1; + unsigned l2 = min_length(r2); + if (l2 == UINT_MAX) + return l2; + return l1 + l2; + } + if (m.is_ite(r, s, r1, r2)) + return std::min(min_length(r1), min_length(r2)); + if (is_diff(r, r1, r2)) + return min_length(r1); + if (is_union(r, r1, r2)) + return std::min(min_length(r1), min_length(r2)); + if (is_intersection(r, r1, r2)) + return std::max(min_length(r1), min_length(r2)); + if (is_to_re(r, s)) + return u.str.min_length(s); + return 0; +} sort* seq_util::re::to_seq(sort* re) { (void)u; diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 46cc27fe4..e54db6567 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -394,6 +394,8 @@ public: void get_concat_units(expr* e, expr_ref_vector& es) const; expr* get_leftmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e1; return e; } expr* get_rightmost_concat(expr* e) const { expr* e1, *e2; while (is_concat(e, e1, e2)) e = e2; return e; } + + unsigned min_length(expr* s) const; }; class re { @@ -456,6 +458,7 @@ public: bool is_loop(expr const* n, expr*& body, unsigned& lo); bool is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi); bool is_loop(expr const* n, expr*& body, expr*& lo); + unsigned min_length(expr* r) const; }; str str; re re; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index d355d210f..51ca4ccf8 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -100,6 +100,32 @@ namespace smt { m_to_propagate.push_back(lit); } + + // s in R[if(p,R1,R2)] & p => s in R[R1] + // s in R[if(p,R1,R2)] & ~p => s in R[R2] + + bool seq_regex::unfold_cofactors(expr_ref& r, literal_vector& conds) { + expr_ref cond(m), tt(m), el(m); + while (seq_rw().has_cofactor(r, cond, tt, el)) { + rewrite(cond); + literal lcond = th.mk_literal(cond); + switch (ctx.get_assignment(lcond)) { + case l_true: + conds.push_back(~lcond); + r = tt; + break; + case l_false: + conds.push_back(lcond); + r = el; + break; + case l_undef: + ctx.mark_as_relevant(lcond); + return false; + } + rewrite(r); + } + return true; + } /** * Propagate the atom (accept s i r) @@ -144,79 +170,30 @@ namespace smt { case l_false: break; } - - // s in R[if(p,R1,R2)] & p => s in R[R1] - // s in R[if(p,R1,R2)] & ~p => s in R[R2] - // TBD combine the two places that perform co-factor rewriting - expr_ref cond(m), tt(m), el(m); - if (seq_rw().has_cofactor(r, cond, tt, el)) { - rewrite(cond); - literal lcond = th.mk_literal(cond), next_lit; - switch (ctx.get_assignment(lcond)) { - case l_true: { - rewrite(tt); - literal lits[2] = { lit, lcond }; - next_lit = th.mk_literal(sk().mk_accept(s, i, tt)); - th.propagate_lit(nullptr, 2, lits, next_lit); - return true; - } - case l_false: { - rewrite(el); - next_lit = th.mk_literal(sk().mk_accept(s, i, el)); - literal lits[2] = { lit, ~lcond }; - th.propagate_lit(nullptr, 2, lits, next_lit); - return true; - } - case l_undef: - ctx.mark_as_relevant(lcond); - return false; - } - } + expr_ref head(m), d(r, m), i_next(m); + literal_vector conds; + + if (!unfold_cofactors(d, conds)) + return false; // s in R & len(s) > i => tail(s,i) in D(nth(s, i), R) - expr_ref head(m), d(m), i_next(m); head = th.mk_nth(s, i); i_next = a().mk_int(idx + 1); - d = seq_rw().derivative(head, r); + d = seq_rw().derivative(head, d); if (!d) throw default_exception("unable to expand derivative"); - // immediately expand a co-factor here so that condition on the character - // is expanded. - - literal_vector conds; - while (seq_rw().has_cofactor(d, cond, tt, el)) { - rewrite(cond); - literal lcond = th.mk_literal(cond); - bool is_undef = false; - switch (ctx.get_assignment(lcond)) { - case l_true: - conds.push_back(~lcond); - d = tt; - break; - case l_false: - conds.push_back(lcond); - d = el; - break; - case l_undef: - ctx.mark_as_relevant(lcond); - d = m.mk_ite(cond, tt, el); - is_undef = true; - break; - } - if (is_undef) - break; - rewrite(d); - } + literal acc_next = th.mk_literal(sk().mk_accept(s, i_next, d)); if (conds.empty()) { - th.add_axiom(~lit, len_s_le_i, th.mk_literal(sk().mk_accept(s, i_next, d))); + th.add_axiom(~lit, len_s_le_i, acc_next); } else { conds.push_back(~lit); conds.push_back(len_s_le_i); - conds.push_back(th.mk_literal(sk().mk_accept(s, i_next, d))); - for (literal lit : conds) ctx.mark_as_relevant(lit); + conds.push_back(acc_next); + for (literal c : conds) + ctx.mark_as_relevant(c); ctx.mk_th_axiom(th.get_id(), conds.size(), conds.c_ptr()); } TRACE("seq", tout << "head " << head << "\n"; @@ -226,15 +203,12 @@ namespace smt { /** * Put a limit to the unfolding of s. - * TBD: - * Given an Regex R, we can compute the minimal length string accepted by R - * A strong analysis could compute the minimal length of s to be accepted by R - * For example if s = x + "abc" and R = .* ++ "def" . .**, then minimal length - * of s is 6. - * Limiting the depth of unfolding s below such lengths is not useful. */ bool seq_regex::block_unfolding(literal lit, unsigned i) { + expr* s = nullptr, *idx = nullptr, *r = nullptr; + expr* e = ctx.bool_var2expr(lit.var()); + VERIFY(sk().is_accept(e, s, idx, r)); expr* t = nullptr; if (i > th.m_max_unfolding_depth && th.m_max_unfolding_lit != null_literal && diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index aa2b66b69..be88fee5e 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -61,6 +61,8 @@ namespace smt { expr_ref unroll_non_empty(expr* r, expr_mark& seen, unsigned depth); + bool unfold_cofactors(expr_ref& r, literal_vector& conds); + public: seq_regex(theory_seq& th); diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index d9ec1c9a3..8cb98b479 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -3493,8 +3493,10 @@ bool theory_seq::should_research(expr_ref_vector & unsat_core) { } if (k_min < UINT_MAX) { m_max_unfolding_depth++; - IF_VERBOSE(1, verbose_stream() << "(smt.seq :increase-length " << mk_pp(s_min, m) << " " << 2*k_min << ")\n"); - add_length_limit(s_min, 2*k_min, false); + k_min *= 2; + k_min = std::max(m_util.str.min_length(s_min), k_min); + IF_VERBOSE(1, verbose_stream() << "(smt.seq :increase-length " << mk_pp(s_min, m) << " " << k_min << ")\n"); + add_length_limit(s_min, k_min, false); return true; } else if (has_max_unfolding) {