3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-12 12:08:18 +00:00

updates related to issue #5140 (#5463)

* updates related to issue #5140

* updated/simplified some cases

* fixing feedback comments

* fixed comments and added missing case for get_re_head_tail_reversed

* two bug fixes and some other code improvements
This commit is contained in:
Margus Veanes 2021-08-09 10:48:56 -07:00 committed by GitHub
parent af5fd1014f
commit 225204e2f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 202 additions and 38 deletions

View file

@ -14,6 +14,7 @@ Author:
Nikolaj Bjorner (nbjorner) 2015-12-5 Nikolaj Bjorner (nbjorner) 2015-12-5
Murphy Berzish 2017-02-21 Murphy Berzish 2017-02-21
Caleb Stanford 2020-07-07 Caleb Stanford 2020-07-07
Margus Veanes 2021
--*/ --*/
@ -872,6 +873,65 @@ br_status seq_rewriter::mk_seq_length(expr* a, expr_ref& result) {
return BR_FAILED; return BR_FAILED;
} }
/*
* In general constructs nth(t,0) but if t = substring(s,j,..) then simplifies to nth(s,j)
* This method assumes that |t| > 0.
*/
expr_ref seq_rewriter::mk_seq_first(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
if (str().is_extract(t, s, j, k))
result = str().mk_nth_i(s, j);
else
result = str().mk_nth_i(t, m_autil.mk_int(0));
return result;
}
/*
* In general constructs substring(t,1,|t|-1) but if t = substring(s,j,k) then simplifies to substring(s,j+1,k-1)
* This method assumes that |t| > 0.
*/
expr_ref seq_rewriter::mk_seq_rest(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
expr_ref one(m_autil.mk_int(1), m());
if (str().is_extract(t, s, j, k))
result = str().mk_substr(s, m_autil.mk_add(j, one), m_autil.mk_sub(k, one));
else
result = str().mk_substr(t, one, m_autil.mk_sub(str().mk_length(t), one));
return result;
}
/*
* In general constructs nth(t,|t|-1) but if t = substring(s,j,k) then simplifies to nth(s,j+k-1)
* This method assumes that |t| > 0.
*/
expr_ref seq_rewriter::mk_seq_last(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
expr_ref one(m_autil.mk_int(1), m());
if (str().is_extract(t, s, j, k))
result = str().mk_nth_i(s, m_autil.mk_sub(m_autil.mk_add(j, k), one));
else
result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), one));
return result;
}
/*
* In general constructs substring(t,0,|t|-1) but if t = substring(s,j,k) then simplifies to substring(s,j,k-1)
* This method assumes that |t| > 0 holds.
*/
expr_ref seq_rewriter::mk_seq_butlast(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
expr_ref one(m_autil.mk_int(1), m());
if (str().is_extract(t, s, j, k))
result = str().mk_substr(s, j, m_autil.mk_sub(k, one));
else
result = str().mk_substr(t, m_autil.mk_int(0), m_autil.mk_sub(str().mk_length(t), one));
return result;
}
/* /*
Lift all ite expressions to the top level, safely Lift all ite expressions to the top level, safely
throttled to not blowup the size of the expression. throttled to not blowup the size of the expression.
@ -1893,13 +1953,13 @@ br_status seq_rewriter::mk_seq_replace_all(expr* a, expr* b, expr* c, expr_ref&
return BR_REWRITE1; return BR_REWRITE1;
} }
zstring s1, s2; zstring s1, s2;
expr_ref_vector strs(m());
if (str().is_string(a, s1) && str().is_string(b, s2)) { if (str().is_string(a, s1) && str().is_string(b, s2)) {
SASSERT(s2.length() > 0); SASSERT(s2.length() > 0);
if (s1.length() < s2.length()) { if (s1.length() < s2.length()) {
result = a; result = a;
return BR_DONE; return BR_DONE;
} }
expr_ref_vector strs(m());
for (unsigned i = 0; i < s1.length(); ++i) { for (unsigned i = 0; i < s1.length(); ++i) {
if (s1.length() >= s2.length() + i && if (s1.length() >= s2.length() + i &&
s2 == s1.extract(i, s2.length())) { s2 == s1.extract(i, s2.length())) {
@ -1912,11 +1972,62 @@ br_status seq_rewriter::mk_seq_replace_all(expr* a, expr* b, expr* c, expr_ref&
result = str().mk_concat(strs, a->get_sort()); result = str().mk_concat(strs, a->get_sort());
return BR_REWRITE_FULL; return BR_REWRITE_FULL;
} }
// TBD: add case when a, b are concatenation of units that are values. expr_ref_vector a_vals(m());
// in this case we can use a similar loop as for strings. expr_ref_vector b_vals(m());
if (try_get_unit_values(a, a_vals) && try_get_unit_values(b, b_vals)) {
replace_all_subvectors(a_vals, b_vals, c, strs);
result = str().mk_concat(strs, a->get_sort());
return BR_REWRITE_FULL;
}
//TODO: the case when a is a unit or concatenation of units while b is a string
//or the other way around -- if that situation is possible at all -- is similar to the above
return BR_FAILED; return BR_FAILED;
} }
/*
* Returns false if s is not a single unit value or concatenation of unit values.
* Else extracts the units from s into vals and returns true.
*/
bool seq_rewriter::try_get_unit_values(expr* s, expr_ref_vector& vals) {
expr* h, * t, * v;
t = s;
//collect all unit values from s, if not all elements are unit-values then fail
while (str().is_concat(t, h, t))
if (str().is_unit(h, v) && m().is_value(v))
vals.push_back(h);
else
return false;
//add the last element
if (str().is_unit(t, v) && m().is_value(v))
vals.push_back(t);
else
return false;
return true;
}
/*
* Replace all subvectors of b in a by c
*/
void seq_rewriter::replace_all_subvectors(expr_ref_vector const& a, expr_ref_vector const& b, expr* c, expr_ref_vector& result) {
unsigned int i = 0;
unsigned int k = b.size();
while (i + k <= a.size()) {
//if a[i..i+k-1] equals b then replace it by c and inceremnt i by k
int j = 0;
while (j < k && b[j] == a[i + j]) j += 1;
if (j < k) //the equality failed
result.push_back(a[i++]);
else { //the equality succeeded
result.push_back(c);
i += k;
}
}
//add the trailing elements from a
while (i < a.size())
result.push_back(a[i++]);
}
br_status seq_rewriter::mk_seq_replace_re_all(expr* a, expr* b, expr* c, expr_ref& result) { br_status seq_rewriter::mk_seq_replace_re_all(expr* a, expr* b, expr* c, expr_ref& result) {
return BR_FAILED; return BR_FAILED;
} }
@ -2521,12 +2632,18 @@ bool seq_rewriter::get_re_head_tail_reversed(expr* r, expr_ref& head, expr_ref&
if (re().is_concat(r, r1, r2)) { if (re().is_concat(r, r1, r2)) {
unsigned len = re().min_length(r2); unsigned len = re().min_length(r2);
if (len != UINT_MAX && re().max_length(r2) == len) { if (len != UINT_MAX && re().max_length(r2) == len) {
head = r1; if (get_re_head_tail_reversed(r1, head, tail))
tail = r2; // left associative binding of concat
tail = mk_re_append(tail, r2);
else {
// right associative binding of concat
head = r1;
tail = r2;
}
return true; return true;
} }
if (get_re_head_tail_reversed(r2, head, tail)) { if (get_re_head_tail_reversed(r2, head, tail)) {
head = re().mk_concat(r1, head); head = mk_re_append(r1, head);
return true; return true;
} }
} }
@ -3059,7 +3176,8 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
return result; return result;
} }
// Order with higher IDs on the outside // Order with higher IDs on the outside
if (get_id(ca) < get_id(cb)) { bool is_symmetric = k == OP_RE_UNION || k == OP_RE_INTERSECT;
if (is_symmetric && get_id(ca) < get_id(cb)) {
std::swap(a, b); std::swap(a, b);
std::swap(ca, cb); std::swap(ca, cb);
std::swap(notca, notcb); std::swap(notca, notcb);
@ -3348,7 +3466,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
//while mk_empty() = [], because deriv(epsilon) = [] = nothing //while mk_empty() = [], because deriv(epsilon) = [] = nothing
return mk_empty(); return mk_empty();
} }
else if (str().is_itos(r1, r2)) { else if (str().is_itos(r1)) {
// //
// here r1 = (str.from_int r2) and r2 is non-ground // here r1 = (str.from_int r2) and r2 is non-ground
// or else the expression would have been simplified earlier // or else the expression would have been simplified earlier
@ -3356,27 +3474,45 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
// '0' <= elem <= '9' // '0' <= elem <= '9'
// if ((isdigit ele) and (ele = (hd r1))) then (to_re (tl r1)) else [] // if ((isdigit ele) and (ele = (hd r1))) then (to_re (tl r1)) else []
// //
hd = str().mk_nth_i(r1, m_autil.mk_int(0)); hd = mk_seq_first(r1);
m_br.mk_and(u().mk_le(m_util.mk_char('0'), ele), u().mk_le(ele, m_util.mk_char('9')), m().mk_eq(hd, ele), result); m_br.mk_and(u().mk_le(m_util.mk_char('0'), ele), u().mk_le(ele, m_util.mk_char('9')), m().mk_eq(hd, ele), result);
tl = re().mk_to_re(str().mk_substr(r1, m_autil.mk_int(1), m_autil.mk_sub(str().mk_length(r1), m_autil.mk_int(1)))); tl = re().mk_to_re(mk_seq_rest(r1));
return re_and(result, tl);
}
else {
// recall: [] denotes the empty language (nothing) regex, () denotes epsilon or empty sequence
// construct the term (if (r1 != () and (ele = (first r1)) then (to_re (rest r1)) else []))
hd = mk_seq_first(r1);
m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result);
tl = re().mk_to_re(mk_seq_rest(r1));
return re_and(result, tl); return re_and(result, tl);
} }
} }
else if (re().is_reverse(r, r1) && re().is_to_re(r1, r2)) { else if (re().is_reverse(r, r1)) {
// Reverses are rewritten so that the only derivative case is if (re().is_to_re(r1, r2)) {
// derivative of a reverse of a string. (All other cases stuck) // First try to exctract hd and tl such that r = hd ++ tl and |tl|=1
// This is analagous to the previous is_to_re case. expr_ref hd(m()), tl(m());
expr_ref hd(m()), tl(m()); if (get_head_tail_reversed(r2, hd, tl)) {
if (get_head_tail_reversed(r2, hd, tl)) { // Use mk_der_cond to normalize
// Use mk_der_cond to normalize STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;);
STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;); result = m().mk_eq(ele, tl);
result = m().mk_eq(ele, tl); result = mk_der_cond(result, ele, seq_sort);
result = mk_der_cond(result, ele, seq_sort); result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd)));
result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd))); return result;
return result; }
} else if (str().is_empty(r2)) {
else if (str().is_empty(r2)) { return mk_empty();
return mk_empty(); }
else {
// construct the term (if (r2 != () and (ele = (last r2)) then reverse(to_re (butlast r2)) else []))
// hd = first of reverse(r2) i.e. last of r2
// tl = rest of reverse(r2) i.e. butlast of r2
//hd = str().mk_nth_i(r2, m_autil.mk_sub(str().mk_length(r2), m_autil.mk_int(1)));
hd = mk_seq_last(r2);
m_br.mk_and(m().mk_not(m().mk_eq(r2, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result);
tl = re().mk_to_re(mk_seq_butlast(r2));
return re_and(result, re().mk_reverse(tl));
}
} }
} }
else if (re().is_range(r, r1, r2)) { else if (re().is_range(r, r1, r2)) {
@ -3423,10 +3559,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
STRACE("seq_verbose", tout << "deriv of_pred" << std::endl;); STRACE("seq_verbose", tout << "deriv of_pred" << std::endl;);
return mk_der_cond(result, ele, seq_sort); return mk_der_cond(result, ele, seq_sort);
} }
// stuck cases: re.derivative, variable, // stuck cases: re.derivative, re variable,
// str.to_re if the head of the string can't be obtained,
// and re.reverse if not applied to a string or if the tail char
// of the string can't be obtained
return expr_ref(re().mk_derivative(ele, r), m()); return expr_ref(re().mk_derivative(ele, r), m());
} }
@ -3662,12 +3795,14 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
} }
if (get_re_head_tail_reversed(b, hd, tl)) { if (get_re_head_tail_reversed(b, hd, tl)) {
SASSERT(re().min_length(tl) == re().max_length(tl)); SASSERT(re().min_length(tl) == re().max_length(tl));
expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m()); expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m());
expr_ref len_a(str().mk_length(a), m()); expr_ref len_a(str().mk_length(a), m());
expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m()); expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m());
expr* s = nullptr;
result = m().mk_and(m_autil.mk_ge(len_a, len_tl), result = m().mk_and(m_autil.mk_ge(len_a, len_tl),
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd), re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl)); (false && re().is_to_re(tl, s) ? m().mk_eq(s, str().mk_substr(a, len_hd, len_tl)) :
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl)));
return BR_REWRITE_FULL; return BR_REWRITE_FULL;
} }

View file

@ -180,6 +180,16 @@ class seq_rewriter {
expr_ref mk_seq_concat(expr* a, expr* b); expr_ref mk_seq_concat(expr* a, expr* b);
// Construct the expressions for taking the first element, the last element, the rest, and the butlast element
expr_ref mk_seq_first(expr* s);
expr_ref mk_seq_rest(expr* s);
expr_ref mk_seq_last(expr* s);
expr_ref mk_seq_butlast(expr* s);
bool try_get_unit_values(expr* s, expr_ref_vector& result);
//replace b in a by c into result
void replace_all_subvectors(expr_ref_vector const& as, expr_ref_vector const& bs, expr* c, expr_ref_vector& result);
// Calculate derivative, memoized and enforcing a normal form // Calculate derivative, memoized and enforcing a normal form
expr_ref is_nullable_rec(expr* r); expr_ref is_nullable_rec(expr* r);
expr_ref mk_derivative_rec(expr* ele, expr* r); expr_ref mk_derivative_rec(expr* ele, expr* r);
@ -344,6 +354,16 @@ public:
return result; return result;
} }
/*
* makes concat and simplifies
*/
expr_ref mk_re_append(expr* r1, expr* r2) {
expr_ref result(m());
if (mk_re_concat(r1, r2, result) == BR_FAILED)
result = re().mk_concat(r1, r2);
return result;
}
/** /**
* check if regular expression is of the form all ++ s ++ all ++ t + u ++ all, where, s, t, u are sequences * check if regular expression is of the form all ++ s ++ all ++ t + u ++ all, where, s, t, u are sequences
*/ */

View file

@ -911,7 +911,7 @@ unsigned seq_util::str::max_length(expr* s) const {
return UINT_MAX; return UINT_MAX;
}; };
while (is_concat(s, s1, s2)) { while (is_concat(s, s1, s2)) {
result = u.max_plus(get_length(s), result); result = u.max_plus(get_length(s1), result);
s = s2; s = s2;
} }
result = u.max_plus(get_length(s), result); result = u.max_plus(get_length(s), result);
@ -1058,6 +1058,7 @@ app* seq_util::rex::mk_epsilon(sort* seq_sort) {
*/ */
std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) const { std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) const {
SASSERT(re.u.is_seq(s)); SASSERT(re.u.is_seq(s));
zstring z;
if (re.u.str.is_empty(s)) if (re.u.str.is_empty(s))
out << "()"; out << "()";
else if (re.u.str.is_unit(s)) else if (re.u.str.is_unit(s))
@ -1068,6 +1069,10 @@ std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s)
for (expr* e : es) for (expr* e : es)
compact_helper_seq(out, e); compact_helper_seq(out, e);
} }
else if (re.u.str.is_string(s, z)) {
for (int i = 0; i < z.length(); i++)
out << (char)z[i];
}
//using braces to indicate 'full' output //using braces to indicate 'full' output
//for example an uninterpreted constant X will be printed as {X} //for example an uninterpreted constant X will be printed as {X}
//while a unit sequence "X" will be printed as X //while a unit sequence "X" will be printed as X
@ -1100,7 +1105,7 @@ bool seq_util::rex::pp::can_skip_parenth(expr* r) const {
std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const { std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
expr* e; expr* e;
unsigned n = 0; unsigned n = 0;
if (re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) { if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) {
char c = (char)n; char c = (char)n;
if (c == '\n') if (c == '\n')
out << "\\n"; out << "\\n";
@ -1148,7 +1153,11 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
std::ostream& seq_util::rex::pp::display(std::ostream& out) const { std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
unsigned lo = 0, hi = 0; unsigned lo = 0, hi = 0;
if (re.is_full_char(e)) if (re.u.is_char(e))
return seq_unit(out, e);
else if (re.u.is_seq(e))
return compact_helper_seq(out, e);
else if (re.is_full_char(e))
return out << "."; return out << ".";
else if (re.is_full_seq(e)) else if (re.is_full_seq(e))
return out << ".*"; return out << ".*";
@ -1163,9 +1172,9 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
else if (re.is_concat(e, r1, r2)) else if (re.is_concat(e, r1, r2))
return out << pp(re, r1) << pp(re, r2); return out << pp(re, r1) << pp(re, r2);
else if (re.is_union(e, r1, r2)) else if (re.is_union(e, r1, r2))
return out << pp(re, r1) << "|" << pp(re, r2); return out << "(" << pp(re, r1) << "|" << pp(re, r2) << ")";
else if (re.is_intersection(e, r1, r2)) else if (re.is_intersection(e, r1, r2))
return out << "(" << pp(re, r1) << (html_encode ? ")&amp;(": ")&(" ) << pp(re, r2) << ")"; return out << "(" << pp(re, r1) << "&amp;" /*(html_encode ? ")&amp;(" : ")&(")*/ << pp(re, r2) << ")";
else if (re.is_complement(e, r1)) { else if (re.is_complement(e, r1)) {
if (can_skip_parenth(r1)) if (can_skip_parenth(r1))
return out << "~" << pp(re, r1); return out << "~" << pp(re, r1);

View file

@ -13,7 +13,7 @@ Abstract:
Author: Author:
Caleb Stanford (calebstanford-msr / cdstanford) 2020-7 Caleb Stanford (calebstanford-msr / cdstanford) 2020-7
Margus Veanes 2020-8
--*/ --*/
#include "state_graph.h" #include "state_graph.h"