3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-05 17:14:07 +00:00

Update regex union and intersection to maintain ANF (#5717)

* added merge for unions and intersections

* added normalization rules to ensure ANF

* fixing PR comments related to merge
This commit is contained in:
Margus Veanes 2021-12-16 19:19:36 -08:00 committed by GitHub
parent db62038845
commit a288f9048a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 255 additions and 41 deletions

View file

@ -3112,7 +3112,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
else
// D(e,r1)r2|(ite (r1nullable) (D(e,r2)) [])
// observe that (mk_ite_simplify(true, D(e,r2), []) = D(e,r2)
result = mk_antimirov_deriv_union(c1, re().mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing()));
result = mk_regex_union_normalize(c1, re().mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing()));
}
else if (m().is_ite(r, c, r1, r2)) {
c1 = simplify_path(e, m().mk_and(c, path));
@ -3171,7 +3171,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
result = re().mk_ite_simplify(range, epsilon(), nothing());
}
else if (re().is_union(r, r1, r2))
result = mk_antimirov_deriv_union(mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path));
result = mk_regex_union_normalize(mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path));
else if (re().is_intersection(r, r1, r2))
result = mk_antimirov_deriv_intersection(e,
mk_antimirov_deriv(e, r1, path),
@ -3213,11 +3213,6 @@ expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr*
VERIFY(m_util.is_seq(seq_sort, ele_sort));
expr_ref result(m());
expr* c, * a, * b;
//if (d1 == d2 || re().is_full_seq(d2) || re().is_empty(d1))
// result = d1;
//else if (re().is_full_seq(d1) || re().is_empty(d2))
// result = d2;
//else
if (re().is_empty(d1))
result = d1;
else if (re().is_empty(d2))
@ -3242,16 +3237,14 @@ expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr*
result = mk_antimirov_deriv_restrict(e, d2, path);
else if (re().is_union(d1, a, b))
// distribute intersection over the union in d1
result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, a, d2, path),
result = mk_regex_union_normalize(mk_antimirov_deriv_intersection(e, a, d2, path),
mk_antimirov_deriv_intersection(e, b, d2, path));
else if (re().is_union(d2, a, b))
// distribute intersection over the union in d2
result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, d1, a, path),
result = mk_regex_union_normalize(mk_antimirov_deriv_intersection(e, d1, a, path),
mk_antimirov_deriv_intersection(e, d1, b, path));
else
// in all other cases create the intersection regex
// TODO: flatten, order and merge d1 and d2 to maintain equality under similarity
result = (d1->get_id() <= d2->get_id() ? re().mk_inter(d1, d2) : re().mk_inter(d2, d1));
result = mk_regex_inter_normalize(d1, d2);
return result;
}
@ -3263,7 +3256,7 @@ expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) {
if (m().is_ite(d, c, t, e))
result = m().mk_ite(c, mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r));
else if (re().is_union(d, t, e))
result = re().mk_union(mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r));
result = mk_regex_union_normalize(mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r));
else
result = mk_re_append(d, r);
return result;
@ -3289,10 +3282,9 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) {
else if (m().is_ite(d, c, t, e))
result = m().mk_ite(c, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e));
else if (re().is_union(d, t, e))
//result = re().mk_inter(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e));
result = mk_antimirov_deriv_intersection(elem, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e), m().mk_true());
else if (re().is_intersection(d, t, e))
result = re().mk_union(mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e));
result = mk_regex_union_normalize(mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e));
else if (re().is_complement(d, t))
result = t;
else
@ -3300,22 +3292,6 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) {
return result;
}
expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) {
expr_ref result(m());
if (re().is_empty(d1) || re().is_full_seq(d2))
result = d2;
else if (re().is_empty(d2) || re().is_full_seq(d1))
result = d1;
else if (re().is_dot_plus(d1) && re().get_info(d2).min_length > 0)
result = d1;
else if (re().is_dot_plus(d2) && re().get_info(d1).min_length > 0)
result = d2;
else
// TODO: flatten, order and merge d1 and d2 to maintain equality under similarity
result = (d1->get_id() <= d2->get_id() ? re().mk_union(d1, d2) : re().mk_union(d2, d1));
return result;
}
// restrict the guards of all conditionals id d and simplify the resulting derivative
// restrict(if(c, a, b), cond) = if(c, restrict(a, cond & c), restrict(b, cond & ~c))
// restrict(a U b, cond) = restrict(a, cond) U restrict(b, cond)
@ -3344,17 +3320,212 @@ expr_ref seq_rewriter::mk_antimirov_deriv_restrict(expr* e, expr* d, expr* cond)
else if (re().is_union(d, a, b)) {
expr_ref a1(mk_antimirov_deriv_restrict(e, a, cond), m());
expr_ref b1(mk_antimirov_deriv_restrict(e, b, cond), m());
if (a1 == b1 || re().is_empty(b1) || re().is_full_seq(a1))
result = a1;
else if (re().is_empty(a1) || re().is_full_seq(b1))
result = b1;
else
//TODO: merge
result = (a1->get_id() <= b1->get_id() ? re().mk_union(a1, b1) : re().mk_union(b1, a1));
result = mk_regex_union_normalize(a1, b1);
}
return result;
}
expr_ref seq_rewriter::mk_regex_union_normalize(expr* r1, expr* r2) {
VERIFY(m_util.is_re(r1));
VERIFY(m_util.is_re(r2));
expr_ref result(m());
std::function<bool(expr*, expr*&, expr*&)> test = [&](expr* t, expr*& a, expr*& b) { return re().is_union(t, a, b); };
std::function<expr* (expr*, expr*)> compose = [&](expr* r1, expr* r2) { return re().mk_union(r1, r2); };
if (r1 == r2 || re().is_empty(r2) || re().is_full_seq(r1))
result = r1;
else if (re().is_empty(r1) || re().is_full_seq(r2))
result = r2;
else if (re().is_dot_plus(r1) && re().get_info(r2).min_length > 0)
result = r1;
else if (re().is_dot_plus(r2) && re().get_info(r1).min_length > 0)
result = r2;
else
result = merge_regex_sets(r1, r2, re().mk_full_seq(r1->get_sort()), test, compose);
return result;
}
expr_ref seq_rewriter::mk_regex_inter_normalize(expr* r1, expr* r2) {
VERIFY(m_util.is_re(r1));
VERIFY(m_util.is_re(r2));
expr_ref result(m());
std::function<bool(expr*, expr*&, expr*&)> test = [&](expr* t, expr*& a, expr*& b) { return re().is_intersection(t, a, b); };
std::function<expr* (expr*, expr*)> compose = [&](expr* r1, expr* r2) { return re().mk_inter(r1, r2); };
if (r1 == r2 || re().is_empty(r1) || re().is_full_seq(r2))
result = r1;
else if (re().is_empty(r2) || re().is_full_seq(r1))
result = r2;
else if (re().is_epsilon(r1)) {
if (re().get_info(r2).nullable == l_true)
result = r1;
else if (re().get_info(r2).nullable == l_false)
result = re().mk_empty(r1->get_sort());
else
result = merge_regex_sets(r1, r2, re().mk_empty(r1->get_sort()), test, compose);
}
else if (re().is_dot_plus(r1) && re().get_info(r2).min_length > 0)
result = r2;
else if (re().is_dot_plus(r2) && re().get_info(r1).min_length > 0)
result = r1;
else {
result = merge_regex_sets(r1, r2, re().mk_empty(r1->get_sort()), test, compose);
}
return result;
}
expr_ref seq_rewriter::merge_regex_sets(expr* r1, expr* r2, expr* unit,
std::function<bool(expr*, expr*&, expr*&)>& test,
std::function<expr* (expr*, expr*)>& compose) {
sort* seq_sort;
expr_ref result(unit, m());
expr_ref_vector prefix(m());
expr* a, * ar, * ar1, * b, * br, * br1;
VERIFY(m_util.is_re(r1, seq_sort));
VERIFY(m_util.is_re(r2));
SASSERT(r2->get_sort() == r1->get_sort());
// Ordering of expressions used by merging, 0 means unordered, -1 means e1 < e2, 1 means e2 < e1
auto compare = [&](expr* x, expr* y) {
expr* z;
unsigned int xid, yid;
// TODO: consider also the case of A{0,l}++B having the same id as A*++B
// in which case return 0
if (x == y)
return 0;
xid = (re().is_complement(x, z) ? z->get_id() : x->get_id());
yid = (re().is_complement(y, z) ? z->get_id() : y->get_id());
VERIFY(xid != yid);
return (xid < yid ? -1 : 1);
};
auto composeresult = [&](expr* suffix) {
result = suffix;
while (!prefix.empty()) {
result = compose(prefix.back(), result);
prefix.pop_back();
}
};
if (r1 == r2)
result = r1;
else if (are_complements(r1, r2))
// TODO: loops
result = unit;
else {
signed int k;
ar = r1;
br = r2;
while (true) {;
if (test(ar, a, ar1)) {
if (test(br, b, br1)) {
if (a == b) {
prefix.push_back(a);
ar = ar1;
br = br1;
}
else if (are_complements(a, b)) {
result = unit;
break;
}
else {
k = compare(a, b);
if (k == -1) {
// a < b
prefix.push_back(a);
ar = ar1;
}
else {
// b < a
prefix.push_back(b);
br = br1;
}
}
}
else {
// br is not decomposable
if (a == br) {
// result = prefix ++ ar
composeresult(ar);
break;
}
else if (are_complements(a, br)) {
result = unit;
break;
}
else {
k = compare(a, br);
if (k == -1) {
// a < br
prefix.push_back(a);
ar = ar1;
}
else {
// br < a, result = prefix ++ (br) ++ ar
prefix.push_back(br);
composeresult(ar);
break;
}
}
}
}
else {
// ar is not decomposable
if (test(br, b, br1)) {
if (ar == b) {
// result = prefix ++ br
composeresult(br);
break;
}
else if (are_complements(ar, b)) {
result = unit;
break;
}
else {
k = compare(ar, b);
if (k == -1) {
// ar < b, result = prefix ++ (ar) ++ br
prefix.push_back(ar);
composeresult(br);
break;
}
else {
// b < ar
prefix.push_back(b);
br = br1;
}
}
}
else {
// neither ar nor br is decomposable
if (ar == br) {
// result = prefix ++ ar
composeresult(ar);
break;
}
else if (are_complements(ar, br)) {
result = unit;
break;
}
else {
k = compare(ar, br);
if (k == -1) {
// ar < br, result = prefix ++ (ar) ++ (br)
prefix.push_back(ar);
composeresult(br);
break;
}
else {
// br < ar, result = prefix ++ (br) ++ (ar)
prefix.push_back(br);
composeresult(ar);
break;
}
}
}
}
}
}
return result;
}
expr_ref seq_rewriter::mk_regex_reverse(expr* r) {
expr* r1 = nullptr, * r2 = nullptr, * c = nullptr;
unsigned lo = 0, hi = 0;
@ -4344,6 +4515,7 @@ br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) {
r ++ [] -> []
r ++ "" -> r
"" ++ r -> r
. ++ .* -> .+
to_re and star:
(str.to_re s1) ++ (str.to_re s2) -> (str.to_re (s1 ++ s2))
@ -4371,6 +4543,14 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
result = a;
return BR_DONE;
}
if (re().is_full_char(a) && re().is_full_seq(b)) {
result = re().mk_plus(a);
return BR_DONE;
}
if (re().is_full_char(b) && re().is_full_seq(a)) {
result = re().mk_plus(b);
return BR_DONE;
}
expr_ref a_str(m());
expr_ref b_str(m());
if (lift_str_from_to_re(a, a_str) && lift_str_from_to_re(b, b_str)) {
@ -4515,6 +4695,11 @@ br_status seq_rewriter::mk_re_union(expr* a, expr* b, expr_ref& result) {
result = mk_full();
return BR_DONE;
}
//just keep the union normalized
result = mk_regex_union_normalize(a, b);
return BR_DONE;
expr* a1 = nullptr, *a2 = nullptr;
expr* b1 = nullptr, *b2 = nullptr;
@ -4643,9 +4828,17 @@ br_status seq_rewriter::mk_re_inter(expr* a, expr* b, expr_ref& result) {
result = mk_empty();
return BR_DONE;
}
// intersect and normalize
result = mk_regex_inter_normalize(a, b);
return BR_DONE;
expr* a1 = nullptr, *a2 = nullptr;
expr* b1 = nullptr, *b2 = nullptr;
// the following rewrite rules do not seem to
// do the right thing when it comes to normalizing
// ensure intersection is right-associative
// and swap-sort entries
if (re().is_intersection(a, a1, a2)) {
@ -4696,7 +4889,7 @@ br_status seq_rewriter::mk_re_inter(expr* a, expr* b, expr_ref& result) {
}
br_status seq_rewriter::mk_re_diff(expr* a, expr* b, expr_ref& result) {
result = re().mk_inter(a, re().mk_complement(b));
result = mk_regex_inter_normalize(a, re().mk_complement(b));
return BR_REWRITE2;
}
@ -4939,7 +5132,7 @@ void seq_rewriter::elim_condition(expr* elem, expr_ref& cond) {
flatten_and(cond, conds);
expr* lhs = nullptr, *rhs = nullptr, *e1 = nullptr;
if (u().is_char(elem)) {
unsigned ch = 0;
unsigned ch = 0, ch2 = 0;
svector<std::pair<unsigned, unsigned>> ranges, ranges1;
ranges.push_back(std::make_pair(0, u().max_char()));
auto exclude_char = [&](unsigned ch) {
@ -4991,6 +5184,19 @@ void seq_rewriter::elim_condition(expr* elem, expr_ref& cond) {
else
intersect(0, ch-1, ranges);
}
else if (m().is_true(e) || (m().is_eq(e, lhs, rhs) && lhs == rhs)) {
// trivially true
continue;
}
else if (m().is_not(e, e1) && m().is_eq(e1, lhs, rhs) && u().is_const_char(lhs, ch) && u().is_const_char(rhs, ch2) && ch != ch2) {
// trivially true
continue;
}
else if (m().is_false(e) || (m().is_not(e, e1) && m().is_eq(e1, lhs, rhs) && lhs == rhs)) {
// trivially false
cond = m().mk_false();
return;
}
else {
all_ranges = false;
break;

View file

@ -217,11 +217,19 @@ class seq_rewriter {
expr_ref mk_antimirov_deriv_intersection(expr* elem, expr* d1, expr* d2, expr* path);
expr_ref mk_antimirov_deriv_concat(expr* d, expr* r);
expr_ref mk_antimirov_deriv_negate(expr* elem, expr* d);
expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2);
expr_ref mk_antimirov_deriv_restrict(expr* elem, expr* d1, expr* cond);
expr_ref mk_regex_reverse(expr* r);
expr_ref mk_regex_concat(expr* r1, expr* r2);
expr_ref merge_regex_sets(expr* r1, expr* r2, expr* unit, std::function<bool(expr*, expr*&, expr*&)>& decompose, std::function<expr* (expr*, expr*)>& compose);
// Apply simplifications and keep the representation normalized
// Assuming r1 and r2 are normalized
expr_ref mk_regex_union_normalize(expr* r1, expr* r2);
expr_ref mk_regex_inter_normalize(expr* r1, expr* r2);
// elem is (:var 0) and path a condition that may have (:var 0) as a free variable
// simplify path, e.g., (:var 0) = 'a' & (:var 0) = 'b' is simplified to false
expr_ref simplify_path(expr* elem, expr* path);
bool lt_char(expr* ch1, expr* ch2);