mirror of
https://github.com/Z3Prover/z3
synced 2025-04-07 18:05:21 +00:00
* updates related to issue #5140 * updated/simplified some cases * fixing feedback comments * fixed comments and added missing case for get_re_head_tail_reversed * two bug fixes and some other code improvements
This commit is contained in:
parent
af5fd1014f
commit
225204e2f4
|
@ -14,6 +14,7 @@ Author:
|
|||
Nikolaj Bjorner (nbjorner) 2015-12-5
|
||||
Murphy Berzish 2017-02-21
|
||||
Caleb Stanford 2020-07-07
|
||||
Margus Veanes 2021
|
||||
|
||||
--*/
|
||||
|
||||
|
@ -872,6 +873,65 @@ br_status seq_rewriter::mk_seq_length(expr* a, expr_ref& result) {
|
|||
return BR_FAILED;
|
||||
}
|
||||
|
||||
/*
|
||||
* In general constructs nth(t,0) but if t = substring(s,j,..) then simplifies to nth(s,j)
|
||||
* This method assumes that |t| > 0.
|
||||
*/
|
||||
expr_ref seq_rewriter::mk_seq_first(expr* t) {
|
||||
expr_ref result(m());
|
||||
expr* s, * j, * k;
|
||||
if (str().is_extract(t, s, j, k))
|
||||
result = str().mk_nth_i(s, j);
|
||||
else
|
||||
result = str().mk_nth_i(t, m_autil.mk_int(0));
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* In general constructs substring(t,1,|t|-1) but if t = substring(s,j,k) then simplifies to substring(s,j+1,k-1)
|
||||
* This method assumes that |t| > 0.
|
||||
*/
|
||||
expr_ref seq_rewriter::mk_seq_rest(expr* t) {
|
||||
expr_ref result(m());
|
||||
expr* s, * j, * k;
|
||||
expr_ref one(m_autil.mk_int(1), m());
|
||||
if (str().is_extract(t, s, j, k))
|
||||
result = str().mk_substr(s, m_autil.mk_add(j, one), m_autil.mk_sub(k, one));
|
||||
else
|
||||
result = str().mk_substr(t, one, m_autil.mk_sub(str().mk_length(t), one));
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* In general constructs nth(t,|t|-1) but if t = substring(s,j,k) then simplifies to nth(s,j+k-1)
|
||||
* This method assumes that |t| > 0.
|
||||
*/
|
||||
expr_ref seq_rewriter::mk_seq_last(expr* t) {
|
||||
expr_ref result(m());
|
||||
expr* s, * j, * k;
|
||||
expr_ref one(m_autil.mk_int(1), m());
|
||||
if (str().is_extract(t, s, j, k))
|
||||
result = str().mk_nth_i(s, m_autil.mk_sub(m_autil.mk_add(j, k), one));
|
||||
else
|
||||
result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), one));
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* In general constructs substring(t,0,|t|-1) but if t = substring(s,j,k) then simplifies to substring(s,j,k-1)
|
||||
* This method assumes that |t| > 0 holds.
|
||||
*/
|
||||
expr_ref seq_rewriter::mk_seq_butlast(expr* t) {
|
||||
expr_ref result(m());
|
||||
expr* s, * j, * k;
|
||||
expr_ref one(m_autil.mk_int(1), m());
|
||||
if (str().is_extract(t, s, j, k))
|
||||
result = str().mk_substr(s, j, m_autil.mk_sub(k, one));
|
||||
else
|
||||
result = str().mk_substr(t, m_autil.mk_int(0), m_autil.mk_sub(str().mk_length(t), one));
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
Lift all ite expressions to the top level, safely
|
||||
throttled to not blowup the size of the expression.
|
||||
|
@ -1893,13 +1953,13 @@ br_status seq_rewriter::mk_seq_replace_all(expr* a, expr* b, expr* c, expr_ref&
|
|||
return BR_REWRITE1;
|
||||
}
|
||||
zstring s1, s2;
|
||||
expr_ref_vector strs(m());
|
||||
if (str().is_string(a, s1) && str().is_string(b, s2)) {
|
||||
SASSERT(s2.length() > 0);
|
||||
if (s1.length() < s2.length()) {
|
||||
result = a;
|
||||
return BR_DONE;
|
||||
}
|
||||
expr_ref_vector strs(m());
|
||||
for (unsigned i = 0; i < s1.length(); ++i) {
|
||||
if (s1.length() >= s2.length() + i &&
|
||||
s2 == s1.extract(i, s2.length())) {
|
||||
|
@ -1912,11 +1972,62 @@ br_status seq_rewriter::mk_seq_replace_all(expr* a, expr* b, expr* c, expr_ref&
|
|||
result = str().mk_concat(strs, a->get_sort());
|
||||
return BR_REWRITE_FULL;
|
||||
}
|
||||
// TBD: add case when a, b are concatenation of units that are values.
|
||||
// in this case we can use a similar loop as for strings.
|
||||
expr_ref_vector a_vals(m());
|
||||
expr_ref_vector b_vals(m());
|
||||
if (try_get_unit_values(a, a_vals) && try_get_unit_values(b, b_vals)) {
|
||||
replace_all_subvectors(a_vals, b_vals, c, strs);
|
||||
result = str().mk_concat(strs, a->get_sort());
|
||||
return BR_REWRITE_FULL;
|
||||
}
|
||||
|
||||
//TODO: the case when a is a unit or concatenation of units while b is a string
|
||||
//or the other way around -- if that situation is possible at all -- is similar to the above
|
||||
return BR_FAILED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns false if s is not a single unit value or concatenation of unit values.
|
||||
* Else extracts the units from s into vals and returns true.
|
||||
*/
|
||||
bool seq_rewriter::try_get_unit_values(expr* s, expr_ref_vector& vals) {
|
||||
expr* h, * t, * v;
|
||||
t = s;
|
||||
//collect all unit values from s, if not all elements are unit-values then fail
|
||||
while (str().is_concat(t, h, t))
|
||||
if (str().is_unit(h, v) && m().is_value(v))
|
||||
vals.push_back(h);
|
||||
else
|
||||
return false;
|
||||
//add the last element
|
||||
if (str().is_unit(t, v) && m().is_value(v))
|
||||
vals.push_back(t);
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace all subvectors of b in a by c
|
||||
*/
|
||||
void seq_rewriter::replace_all_subvectors(expr_ref_vector const& a, expr_ref_vector const& b, expr* c, expr_ref_vector& result) {
|
||||
unsigned int i = 0;
|
||||
unsigned int k = b.size();
|
||||
while (i + k <= a.size()) {
|
||||
//if a[i..i+k-1] equals b then replace it by c and inceremnt i by k
|
||||
int j = 0;
|
||||
while (j < k && b[j] == a[i + j]) j += 1;
|
||||
if (j < k) //the equality failed
|
||||
result.push_back(a[i++]);
|
||||
else { //the equality succeeded
|
||||
result.push_back(c);
|
||||
i += k;
|
||||
}
|
||||
}
|
||||
//add the trailing elements from a
|
||||
while (i < a.size())
|
||||
result.push_back(a[i++]);
|
||||
}
|
||||
|
||||
br_status seq_rewriter::mk_seq_replace_re_all(expr* a, expr* b, expr* c, expr_ref& result) {
|
||||
return BR_FAILED;
|
||||
}
|
||||
|
@ -2521,12 +2632,18 @@ bool seq_rewriter::get_re_head_tail_reversed(expr* r, expr_ref& head, expr_ref&
|
|||
if (re().is_concat(r, r1, r2)) {
|
||||
unsigned len = re().min_length(r2);
|
||||
if (len != UINT_MAX && re().max_length(r2) == len) {
|
||||
head = r1;
|
||||
tail = r2;
|
||||
if (get_re_head_tail_reversed(r1, head, tail))
|
||||
// left associative binding of concat
|
||||
tail = mk_re_append(tail, r2);
|
||||
else {
|
||||
// right associative binding of concat
|
||||
head = r1;
|
||||
tail = r2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (get_re_head_tail_reversed(r2, head, tail)) {
|
||||
head = re().mk_concat(r1, head);
|
||||
head = mk_re_append(r1, head);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -3059,7 +3176,8 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
|
|||
return result;
|
||||
}
|
||||
// Order with higher IDs on the outside
|
||||
if (get_id(ca) < get_id(cb)) {
|
||||
bool is_symmetric = k == OP_RE_UNION || k == OP_RE_INTERSECT;
|
||||
if (is_symmetric && get_id(ca) < get_id(cb)) {
|
||||
std::swap(a, b);
|
||||
std::swap(ca, cb);
|
||||
std::swap(notca, notcb);
|
||||
|
@ -3348,7 +3466,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
|
|||
//while mk_empty() = [], because deriv(epsilon) = [] = nothing
|
||||
return mk_empty();
|
||||
}
|
||||
else if (str().is_itos(r1, r2)) {
|
||||
else if (str().is_itos(r1)) {
|
||||
//
|
||||
// here r1 = (str.from_int r2) and r2 is non-ground
|
||||
// or else the expression would have been simplified earlier
|
||||
|
@ -3356,27 +3474,45 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
|
|||
// '0' <= elem <= '9'
|
||||
// if ((isdigit ele) and (ele = (hd r1))) then (to_re (tl r1)) else []
|
||||
//
|
||||
hd = str().mk_nth_i(r1, m_autil.mk_int(0));
|
||||
hd = mk_seq_first(r1);
|
||||
m_br.mk_and(u().mk_le(m_util.mk_char('0'), ele), u().mk_le(ele, m_util.mk_char('9')), m().mk_eq(hd, ele), result);
|
||||
tl = re().mk_to_re(str().mk_substr(r1, m_autil.mk_int(1), m_autil.mk_sub(str().mk_length(r1), m_autil.mk_int(1))));
|
||||
tl = re().mk_to_re(mk_seq_rest(r1));
|
||||
return re_and(result, tl);
|
||||
}
|
||||
else {
|
||||
// recall: [] denotes the empty language (nothing) regex, () denotes epsilon or empty sequence
|
||||
// construct the term (if (r1 != () and (ele = (first r1)) then (to_re (rest r1)) else []))
|
||||
hd = mk_seq_first(r1);
|
||||
m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result);
|
||||
tl = re().mk_to_re(mk_seq_rest(r1));
|
||||
return re_and(result, tl);
|
||||
}
|
||||
}
|
||||
else if (re().is_reverse(r, r1) && re().is_to_re(r1, r2)) {
|
||||
// Reverses are rewritten so that the only derivative case is
|
||||
// derivative of a reverse of a string. (All other cases stuck)
|
||||
// This is analagous to the previous is_to_re case.
|
||||
expr_ref hd(m()), tl(m());
|
||||
if (get_head_tail_reversed(r2, hd, tl)) {
|
||||
// Use mk_der_cond to normalize
|
||||
STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;);
|
||||
result = m().mk_eq(ele, tl);
|
||||
result = mk_der_cond(result, ele, seq_sort);
|
||||
result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd)));
|
||||
return result;
|
||||
}
|
||||
else if (str().is_empty(r2)) {
|
||||
return mk_empty();
|
||||
else if (re().is_reverse(r, r1)) {
|
||||
if (re().is_to_re(r1, r2)) {
|
||||
// First try to exctract hd and tl such that r = hd ++ tl and |tl|=1
|
||||
expr_ref hd(m()), tl(m());
|
||||
if (get_head_tail_reversed(r2, hd, tl)) {
|
||||
// Use mk_der_cond to normalize
|
||||
STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;);
|
||||
result = m().mk_eq(ele, tl);
|
||||
result = mk_der_cond(result, ele, seq_sort);
|
||||
result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd)));
|
||||
return result;
|
||||
}
|
||||
else if (str().is_empty(r2)) {
|
||||
return mk_empty();
|
||||
}
|
||||
else {
|
||||
// construct the term (if (r2 != () and (ele = (last r2)) then reverse(to_re (butlast r2)) else []))
|
||||
// hd = first of reverse(r2) i.e. last of r2
|
||||
// tl = rest of reverse(r2) i.e. butlast of r2
|
||||
//hd = str().mk_nth_i(r2, m_autil.mk_sub(str().mk_length(r2), m_autil.mk_int(1)));
|
||||
hd = mk_seq_last(r2);
|
||||
m_br.mk_and(m().mk_not(m().mk_eq(r2, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result);
|
||||
tl = re().mk_to_re(mk_seq_butlast(r2));
|
||||
return re_and(result, re().mk_reverse(tl));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (re().is_range(r, r1, r2)) {
|
||||
|
@ -3423,10 +3559,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
|
|||
STRACE("seq_verbose", tout << "deriv of_pred" << std::endl;);
|
||||
return mk_der_cond(result, ele, seq_sort);
|
||||
}
|
||||
// stuck cases: re.derivative, variable,
|
||||
// str.to_re if the head of the string can't be obtained,
|
||||
// and re.reverse if not applied to a string or if the tail char
|
||||
// of the string can't be obtained
|
||||
// stuck cases: re.derivative, re variable,
|
||||
return expr_ref(re().mk_derivative(ele, r), m());
|
||||
}
|
||||
|
||||
|
@ -3662,12 +3795,14 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
|
|||
}
|
||||
if (get_re_head_tail_reversed(b, hd, tl)) {
|
||||
SASSERT(re().min_length(tl) == re().max_length(tl));
|
||||
expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m());
|
||||
expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m());
|
||||
expr_ref len_a(str().mk_length(a), m());
|
||||
expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m());
|
||||
expr* s = nullptr;
|
||||
result = m().mk_and(m_autil.mk_ge(len_a, len_tl),
|
||||
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
|
||||
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl));
|
||||
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
|
||||
(false && re().is_to_re(tl, s) ? m().mk_eq(s, str().mk_substr(a, len_hd, len_tl)) :
|
||||
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl)));
|
||||
return BR_REWRITE_FULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -180,6 +180,16 @@ class seq_rewriter {
|
|||
|
||||
expr_ref mk_seq_concat(expr* a, expr* b);
|
||||
|
||||
// Construct the expressions for taking the first element, the last element, the rest, and the butlast element
|
||||
expr_ref mk_seq_first(expr* s);
|
||||
expr_ref mk_seq_rest(expr* s);
|
||||
expr_ref mk_seq_last(expr* s);
|
||||
expr_ref mk_seq_butlast(expr* s);
|
||||
|
||||
bool try_get_unit_values(expr* s, expr_ref_vector& result);
|
||||
//replace b in a by c into result
|
||||
void replace_all_subvectors(expr_ref_vector const& as, expr_ref_vector const& bs, expr* c, expr_ref_vector& result);
|
||||
|
||||
// Calculate derivative, memoized and enforcing a normal form
|
||||
expr_ref is_nullable_rec(expr* r);
|
||||
expr_ref mk_derivative_rec(expr* ele, expr* r);
|
||||
|
@ -344,6 +354,16 @@ public:
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* makes concat and simplifies
|
||||
*/
|
||||
expr_ref mk_re_append(expr* r1, expr* r2) {
|
||||
expr_ref result(m());
|
||||
if (mk_re_concat(r1, r2, result) == BR_FAILED)
|
||||
result = re().mk_concat(r1, r2);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* check if regular expression is of the form all ++ s ++ all ++ t + u ++ all, where, s, t, u are sequences
|
||||
*/
|
||||
|
|
|
@ -911,7 +911,7 @@ unsigned seq_util::str::max_length(expr* s) const {
|
|||
return UINT_MAX;
|
||||
};
|
||||
while (is_concat(s, s1, s2)) {
|
||||
result = u.max_plus(get_length(s), result);
|
||||
result = u.max_plus(get_length(s1), result);
|
||||
s = s2;
|
||||
}
|
||||
result = u.max_plus(get_length(s), result);
|
||||
|
@ -1058,6 +1058,7 @@ app* seq_util::rex::mk_epsilon(sort* seq_sort) {
|
|||
*/
|
||||
std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) const {
|
||||
SASSERT(re.u.is_seq(s));
|
||||
zstring z;
|
||||
if (re.u.str.is_empty(s))
|
||||
out << "()";
|
||||
else if (re.u.str.is_unit(s))
|
||||
|
@ -1068,6 +1069,10 @@ std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s)
|
|||
for (expr* e : es)
|
||||
compact_helper_seq(out, e);
|
||||
}
|
||||
else if (re.u.str.is_string(s, z)) {
|
||||
for (int i = 0; i < z.length(); i++)
|
||||
out << (char)z[i];
|
||||
}
|
||||
//using braces to indicate 'full' output
|
||||
//for example an uninterpreted constant X will be printed as {X}
|
||||
//while a unit sequence "X" will be printed as X
|
||||
|
@ -1100,7 +1105,7 @@ bool seq_util::rex::pp::can_skip_parenth(expr* r) const {
|
|||
std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
|
||||
expr* e;
|
||||
unsigned n = 0;
|
||||
if (re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) {
|
||||
if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) {
|
||||
char c = (char)n;
|
||||
if (c == '\n')
|
||||
out << "\\n";
|
||||
|
@ -1148,7 +1153,11 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
|
|||
std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
|
||||
expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
|
||||
unsigned lo = 0, hi = 0;
|
||||
if (re.is_full_char(e))
|
||||
if (re.u.is_char(e))
|
||||
return seq_unit(out, e);
|
||||
else if (re.u.is_seq(e))
|
||||
return compact_helper_seq(out, e);
|
||||
else if (re.is_full_char(e))
|
||||
return out << ".";
|
||||
else if (re.is_full_seq(e))
|
||||
return out << ".*";
|
||||
|
@ -1163,9 +1172,9 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
|
|||
else if (re.is_concat(e, r1, r2))
|
||||
return out << pp(re, r1) << pp(re, r2);
|
||||
else if (re.is_union(e, r1, r2))
|
||||
return out << pp(re, r1) << "|" << pp(re, r2);
|
||||
return out << "(" << pp(re, r1) << "|" << pp(re, r2) << ")";
|
||||
else if (re.is_intersection(e, r1, r2))
|
||||
return out << "(" << pp(re, r1) << (html_encode ? ")&(": ")&(" ) << pp(re, r2) << ")";
|
||||
return out << "(" << pp(re, r1) << "&" /*(html_encode ? ")&(" : ")&(")*/ << pp(re, r2) << ")";
|
||||
else if (re.is_complement(e, r1)) {
|
||||
if (can_skip_parenth(r1))
|
||||
return out << "~" << pp(re, r1);
|
||||
|
|
|
@ -13,7 +13,7 @@ Abstract:
|
|||
Author:
|
||||
|
||||
Caleb Stanford (calebstanford-msr / cdstanford) 2020-7
|
||||
|
||||
Margus Veanes 2020-8
|
||||
--*/
|
||||
|
||||
#include "state_graph.h"
|
||||
|
|
Loading…
Reference in a new issue