From 5afb95b34aeab0a475d2c2f4512723fc2b386c25 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Wed, 22 Dec 2021 17:53:34 -0800 Subject: [PATCH] improved subset checking for regexes with counters (#5731) --- src/ast/rewriter/seq_rewriter.cpp | 43 ++++++++++++++++++++++++++----- src/ast/rewriter/seq_rewriter.h | 1 + 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 40a46ec0f..c04c9991b 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3112,7 +3112,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref else // D(e,r1)r2|(ite (r1nullable) (D(e,r2)) []) // observe that (mk_ite_simplify(true, D(e,r2), []) = D(e,r2) - result = mk_regex_union_normalize(c1, re().mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing())); + result = mk_antimirov_deriv_union(c1, re().mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing())); } else if (m().is_ite(r, c, r1, r2)) { c1 = simplify_path(e, m().mk_and(c, path)); @@ -3171,7 +3171,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = re().mk_ite_simplify(range, epsilon(), nothing()); } else if (re().is_union(r, r1, r2)) - result = mk_regex_union_normalize(mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path)); + result = mk_antimirov_deriv_union(mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path)); else if (re().is_intersection(r, r1, r2)) result = mk_antimirov_deriv_intersection(e, mk_antimirov_deriv(e, r1, path), @@ -3237,11 +3237,11 @@ expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr* result = mk_antimirov_deriv_restrict(e, d2, path); else if (re().is_union(d1, a, b)) // distribute intersection over the union in d1 - result = mk_regex_union_normalize(mk_antimirov_deriv_intersection(e, a, d2, path), + result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, a, d2, path), mk_antimirov_deriv_intersection(e, b, d2, path)); else if (re().is_union(d2, a, b)) // distribute intersection over the union in d2 - result = mk_regex_union_normalize(mk_antimirov_deriv_intersection(e, d1, a, path), + result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, d1, a, path), mk_antimirov_deriv_intersection(e, d1, b, path)); else result = mk_regex_inter_normalize(d1, d2); @@ -3256,7 +3256,7 @@ expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) { if (m().is_ite(d, c, t, e)) result = m().mk_ite(c, mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r)); else if (re().is_union(d, t, e)) - result = mk_regex_union_normalize(mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r)); + result = mk_antimirov_deriv_union(mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r)); else result = mk_re_append(d, r); return result; @@ -3284,7 +3284,7 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) { else if (re().is_union(d, t, e)) result = mk_antimirov_deriv_intersection(elem, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e), m().mk_true()); else if (re().is_intersection(d, t, e)) - result = mk_regex_union_normalize(mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); + result = mk_antimirov_deriv_union(mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); else if (re().is_complement(d, t)) result = t; else @@ -3292,6 +3292,20 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) { return result; } +expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) { + sort* seq_sort = nullptr, * ele_sort = nullptr; + VERIFY(m_util.is_re(d1, seq_sort)); + VERIFY(m_util.is_seq(seq_sort, ele_sort)); + expr_ref result(m()); + expr* c1, * t1, * e1, * c2, * t2, * e2; + if (m().is_ite(d1, c1, t1, e1) && m().is_ite(d2, c2, t2, e2) && c1 == c2) + // eliminate duplicate branching on exactly the same condition + result = m().mk_ite(c1, mk_antimirov_deriv_union(t1, t2), mk_antimirov_deriv_union(e1, e2)); + else + result = mk_regex_union_normalize(d1, d2); + return result; +} + // restrict the guards of all conditionals id d and simplify the resulting derivative // restrict(if(c, a, b), cond) = if(c, restrict(a, cond & c), restrict(b, cond & ~c)) // restrict(a U b, cond) = restrict(a, cond) U restrict(b, cond) @@ -3320,7 +3334,7 @@ expr_ref seq_rewriter::mk_antimirov_deriv_restrict(expr* e, expr* d, expr* cond) else if (re().is_union(d, a, b)) { expr_ref a1(mk_antimirov_deriv_restrict(e, a, cond), m()); expr_ref b1(mk_antimirov_deriv_restrict(e, b, cond), m()); - result = mk_regex_union_normalize(a1, b1); + result = mk_antimirov_deriv_union(a1, b1); } return result; } @@ -4586,6 +4600,7 @@ bool seq_rewriter::is_subset(expr* r1, expr* r2) const { // return false; expr* ra1 = nullptr, *ra2 = nullptr, *ra3 = nullptr; expr* rb1 = nullptr, *rb2 = nullptr, *rb3 = nullptr; + unsigned la, ua, lb, ub; if (re().is_complement(r1, ra1) && re().is_complement(r2, rb1)) { return is_subset(rb1, ra1); @@ -4611,6 +4626,20 @@ bool seq_rewriter::is_subset(expr* r1, expr* r2) const { r1 = ra2; continue; } + // r1=ra3{la,ua}ra2, r2=rb3{lb,ub}rb2, ra3=rb3, lb<=la, ua<=ub + if (re().is_concat(r1, ra1, ra2) && re().is_loop(ra1, ra3, la, ua) && + re().is_concat(r2, rb1, rb2) && re().is_loop(rb1, rb3, lb, ub) && + ra3 == rb3 && lb <= la && ua <= ub) { + r1 = ra2; + r2 = rb2; + continue; + } + // ra1=ra3{la,ua}, r2=rb3{lb,ub}, ra3=rb3, lb<=la, ua<=ub + if (re().is_loop(r1, ra3, la, ua) && + re().is_loop(r2, rb3, lb, ub) && + ra3 == rb3 && lb <= la && ua <= ub) { + return true; + } return false; } } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 9357a2be8..0d8ac029c 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -217,6 +217,7 @@ class seq_rewriter { expr_ref mk_antimirov_deriv_intersection(expr* elem, expr* d1, expr* d2, expr* path); expr_ref mk_antimirov_deriv_concat(expr* d, expr* r); expr_ref mk_antimirov_deriv_negate(expr* elem, expr* d); + expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2); expr_ref mk_antimirov_deriv_restrict(expr* elem, expr* d1, expr* cond); expr_ref mk_regex_reverse(expr* r); expr_ref mk_regex_concat(expr* r1, expr* r2);