3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-06 17:44:08 +00:00

Updated regex derivative engine (#5567)

* updated derivative engine

* some edit

* further improvements in derivative code

* more deriv code edits and re::to_str update

* optimized mk_deriv_accept

* fixed PR comments

* small syntax fix

* updated some simplifications

* bugfix:forgot to_re before reverse

* fixed PR comments

* more PR comment fixes

* more PR comment fixes

* forgot to delete

* deleting unused definition

* fixes

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* fixes

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Margus Veanes 2021-10-08 13:04:49 -07:00 committed by GitHub
parent c0c3e685e7
commit 146f4621c5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 893 additions and 280 deletions

View file

@ -850,7 +850,7 @@ namespace seq {
add_clause(~eq, ge10k);
for (unsigned i = 0; i < k; ++i) {
expr* ch = seq.str.mk_nth_i(ubvs, i);
expr* ch = seq.str.mk_nth_c(ubvs, i);
is_digit = seq.mk_char_is_digit(ch);
add_clause(~ge_len, is_digit);
}

View file

@ -859,13 +859,12 @@ br_status seq_rewriter::mk_seq_length(expr* a, expr_ref& result) {
// elif offset >= len(s) then 0
// elif offset + length > len(s) then len(s) - offset
// else length
expr_ref zero(m_autil.mk_int(0), m());
result = length;
result = m().mk_ite(m_autil.mk_gt(m_autil.mk_add(offset, length), len_s),
m_autil.mk_sub(len_s, offset),
result);
result = m().mk_ite(m().mk_or(m_autil.mk_le(len_s, offset), m_autil.mk_le(length, zero), m_autil.mk_lt(offset, zero)),
zero,
result = m().mk_ite(m().mk_or(m_autil.mk_le(len_s, offset), m_autil.mk_le(length, zero()), m_autil.mk_lt(offset, zero())),
zero(),
result);
return BR_REWRITE_FULL;
}
@ -883,37 +882,56 @@ expr_ref seq_rewriter::mk_seq_first(expr* t) {
if (str().is_extract(t, s, j, k))
result = str().mk_nth_i(s, j);
else
result = str().mk_nth_i(t, m_autil.mk_int(0));
result = str().mk_nth_c(t, 0);
return result;
}
expr_ref seq_rewriter::mk_sub(expr* a, rational const& n) {
expr* a1, *a2;
SASSERT(n.is_int());
rational k;
if (m_autil.is_sub(a, a1, a2) && m_autil.is_numeral(a2, k))
return expr_ref(m_autil.mk_sub(a1, m_autil.mk_int(k + n)), m());
if (m_autil.is_add(a, a1, a2) && m_autil.is_numeral(a2, k))
return expr_ref(m_autil.mk_add(a1, m_autil.mk_int(k - n)), m());
if (m_autil.is_add(a, a1, a2) && m_autil.is_numeral(a1, k))
return expr_ref(m_autil.mk_add(a2, m_autil.mk_int(k - n)), m());
return expr_ref(m_autil.mk_sub(a, m_autil.mk_int(n)), m());
}
/*
* In general constructs substring(t,1,|t|-1) but if t = substring(s,j,k) then simplifies to substring(s,j+1,k-1)
* This method assumes that |t| > 0.
*/
expr_ref seq_rewriter::mk_seq_rest(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
expr_ref one(m_autil.mk_int(1), m());
if (str().is_extract(t, s, j, k))
result = str().mk_substr(s, m_autil.mk_add(j, one), m_autil.mk_sub(k, one));
else
result = str().mk_substr(t, one, m_autil.mk_sub(str().mk_length(t), one));
expr* s, * j, * k;
rational jv;
if (str().is_extract(t, s, j, k) && m_autil.is_numeral(j, jv) && jv >= 0)
result = str().mk_substr(s, m_autil.mk_int(jv + 1), mk_sub(k, 1));
else
result = str().mk_substr(t, one(), mk_sub(str().mk_length(t), 1));
return result;
}
/*
* In general constructs nth(t,|t|-1) but if t = substring(s,j,k) then simplifies to nth(s,j+k-1)
* In general constructs nth(t,|t|-1) but if t = substring(s,j,|s|-j) j >= 0, then simplifies to nth(s,|s|-1)
* This method assumes that |t| > 0.
*/
expr_ref seq_rewriter::mk_seq_last(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
expr_ref one(m_autil.mk_int(1), m());
if (str().is_extract(t, s, j, k))
result = str().mk_nth_i(s, m_autil.mk_sub(m_autil.mk_add(j, k), one));
expr* s, * j, * k, * s_, * len_s;
rational jv, i;
if (str().is_extract(t, s, j, k) &&
m_autil.is_numeral(j, jv) && jv >= 0 &&
str().is_len_sub(k, len_s, s_, i) &&
s == s_ && jv == i) {
expr_ref lastpos = mk_sub(len_s, 1);
result = str().mk_nth_i(s, lastpos);
}
else
result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), one));
result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), one()));
return result;
}
@ -924,11 +942,14 @@ expr_ref seq_rewriter::mk_seq_last(expr* t) {
expr_ref seq_rewriter::mk_seq_butlast(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
expr_ref one(m_autil.mk_int(1), m());
if (str().is_extract(t, s, j, k))
result = str().mk_substr(s, j, m_autil.mk_sub(k, one));
if (str().is_extract(t, s, j, k)) {
expr_ref_vector k_min_1(m());
k_min_1.push_back(k);
k_min_1.push_back(minus_one());
result = str().mk_substr(s, j, m_autil.mk_add_simplify(k_min_1));
}
else
result = str().mk_substr(t, m_autil.mk_int(0), m_autil.mk_sub(str().mk_length(t), one));
result = str().mk_substr(t, zero(), m_autil.mk_sub(str().mk_length(t), one()));
return result;
}
@ -1678,7 +1699,7 @@ br_status seq_rewriter::mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result
return BR_DONE;
}
if (m_autil.is_numeral(c, r) && r.is_neg()) {
result = m_autil.mk_int(-1);
result = minus_one();
return BR_DONE;
}
@ -1688,10 +1709,10 @@ br_status seq_rewriter::mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result
}
if (str().is_empty(b)) {
result = m().mk_ite(m().mk_and(m_autil.mk_le(m_autil.mk_int(0), c),
result = m().mk_ite(m().mk_and(m_autil.mk_le(zero(), c),
m_autil.mk_le(c, str().mk_length(a))),
c,
m_autil.mk_int(-1));
minus_one());
return BR_REWRITE2;
}
@ -2307,7 +2328,7 @@ br_status seq_rewriter::mk_str_to_code(expr* a, expr_ref& result) {
if (s.length() == 1)
result = m_autil.mk_int(s[0]);
else
result = m_autil.mk_int(-1);
result = minus_one();
return BR_DONE;
}
return BR_FAILED;
@ -2448,7 +2469,7 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) {
result = m_autil.mk_int(ch - '0');
}
else {
result = m_autil.mk_int(-1);
result = minus_one();
}
return BR_DONE;
}
@ -2456,7 +2477,7 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) {
expr_ref_vector as(m());
str().get_concat_units(a, as);
if (as.empty()) {
result = m_autil.mk_int(-1);
result = minus_one();
return BR_DONE;
}
if (str().is_unit(as.back())) {
@ -2466,11 +2487,11 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) {
expr_ref tail(str().mk_stoi(as.back()), m());
expr_ref head(str().mk_concat(as.size() - 1, as.data(), a->get_sort()), m());
expr_ref stoi_head(str().mk_stoi(head), m());
result = m().mk_ite(m_autil.mk_ge(stoi_head, m_autil.mk_int(0)),
result = m().mk_ite(m_autil.mk_ge(stoi_head, zero()),
m_autil.mk_add(m_autil.mk_mul(m_autil.mk_int(10), stoi_head), tail),
m_autil.mk_int(-1));
minus_one());
result = m().mk_ite(m_autil.mk_ge(tail, m_autil.mk_int(0)),
result = m().mk_ite(m_autil.mk_ge(tail, zero()),
result,
tail);
result = m().mk_ite(str().mk_is_empty(head),
@ -2481,7 +2502,7 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) {
if (str().is_unit(as.get(0), u) && m_util.is_const_char(u, ch) && '0' == ch) {
result = str().mk_concat(as.size() - 1, as.data() + 1, as[0]->get_sort());
result = m().mk_ite(str().mk_is_empty(result),
m_autil.mk_int(0),
zero(),
str().mk_stoi(result));
return BR_REWRITE_FULL;
}
@ -2573,7 +2594,7 @@ bool seq_rewriter::is_sequence(expr* e, expr_ref_vector& seq) {
}
/*
s = head + tail where |head| = 1
s = [head] + tail where head is the first element of s
*/
bool seq_rewriter::get_head_tail(expr* s, expr_ref& head, expr_ref& tail) {
expr* h = nullptr, *t = nullptr;
@ -2670,10 +2691,10 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) {
expr_ref seq_rewriter::is_nullable(expr* r) {
STRACE("seq_verbose", tout << "is_nullable: "
<< mk_pp(r, m()) << std::endl;);
expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m());
expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr, nullptr), m());
if (!result) {
result = is_nullable_rec(r);
m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result);
m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, nullptr, result);
}
STRACE("seq_verbose", tout << "is_nullable result: "
<< result << std::endl;);
@ -2691,7 +2712,7 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
re().is_intersection(r, r1, r2)) {
m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
}
else if (re().is_union(r, r1, r2)) {
else if (re().is_union(r, r1, r2) || re().is_antimorov_union(r, r1, r2)) {
m_br.mk_or(is_nullable(r1), is_nullable(r2), result);
}
else if (re().is_diff(r, r1, r2)) {
@ -2701,6 +2722,7 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
else if (re().is_star(r) ||
re().is_opt(r) ||
re().is_full_seq(r) ||
re().is_epsilon(r) ||
(re().is_loop(r, r1, lo) && lo == 0) ||
(re().is_loop(r, r1, lo, hi) && lo == 0)) {
result = m().mk_true();
@ -2724,7 +2746,7 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) {
result = is_nullable(r1);
}
else if (m().is_ite(r, cond, r1, r2)) {
result = m().mk_ite(cond, is_nullable(r1), is_nullable(r2));
m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result);
}
else if (m_util.is_re(r, seq_sort)) {
result = is_nullable_symbolic_regex(r, seq_sort);
@ -2881,7 +2903,8 @@ br_status seq_rewriter::mk_re_reverse(expr* r, expr_ref& result) {
br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) {
result = mk_derivative(ele, r);
// TBD: we may even declare BR_DONE here and potentially miss some simplifications
return re().is_derivative(result) ? BR_DONE : BR_REWRITE_FULL;
// return re().is_derivative(result) ? BR_DONE : BR_REWRITE_FULL;
return BR_DONE;
}
/*
@ -2976,29 +2999,379 @@ bool seq_rewriter::check_deriv_normal_form(expr* r, int level) {
}
#endif
/*
Memoized, recursive implementation of the symbolic derivative such that
the result is in normal form.
expr_ref seq_rewriter::mk_derivative(expr* r) {
sort* seq_sort = nullptr, * ele_sort = nullptr;
VERIFY(m_util.is_re(r, seq_sort));
VERIFY(m_util.is_seq(seq_sort, ele_sort));
expr_ref v(m().mk_var(0, ele_sort), m());
return mk_antimirov_deriv(v, r, m().mk_true());
}
Functions without _rec are memoized wrappers, which call the _rec
version if lookup fails.
The main logic is in mk_der_op_rec for combining normal forms.
*/
expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) {
STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m())
<< "," << mk_pp(r, m()) << std::endl;);
expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m());
return mk_antimirov_deriv(ele, r, m().mk_true());
}
expr_ref seq_rewriter::mk_antimirov_deriv(expr* e, expr* r, expr* path) {
// Ensure references are owned
expr_ref _e(e, m()), _path(path, m()), _r(r, m());
expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, e, r, path), m());
if (!result) {
result = mk_derivative_rec(ele, r);
m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result);
mk_antimirov_deriv_rec(e, r, path, result);
m_op_cache.insert(OP_RE_DERIVATIVE, e, r, path, result);
STRACE("seq_regex", tout << "D(" << mk_pp(e, m()) << "," << mk_pp(r, m()) << "," << mk_pp(path, m()) << ")" << std::endl;);
STRACE("seq_regex", tout << "= " << mk_pp(result, m()) << std::endl;);
}
STRACE("seq_verbose", tout << "derivative result: "
<< mk_pp(result, m()) << std::endl;);
CASSERT("seq_regex", check_deriv_normal_form(r));
return result;
}
void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result) {
sort* seq_sort = nullptr, * ele_sort = nullptr;
VERIFY(m_util.is_re(r, seq_sort));
VERIFY(m_util.is_seq(seq_sort, ele_sort));
SASSERT(ele_sort == e->get_sort());
expr* r1 = nullptr, * r2 = nullptr, * c = nullptr;
expr_ref c1(m());
expr_ref c2(m());
auto nothing = [&]() { return expr_ref(re().mk_empty(r->get_sort()), m()); };
auto epsilon = [&]() { return expr_ref(re().mk_epsilon(seq_sort), m()); };
auto dotstar = [&]() { return expr_ref(re().mk_full_seq(r->get_sort()), m()); };
auto dotplus = [&]() { return expr_ref(re().mk_plus(re().mk_full_char(r->get_sort())), m()); };
unsigned lo = 0, hi = 0;
if (re().is_empty(r) || re().is_epsilon(r))
// D(e,[]) = D(e,()) = []
result = nothing();
else if (re().is_full_seq(r) || re().is_dot_plus(r))
// D(e,.*) = D(e,.+) = .*
result = dotstar();
else if (re().is_full_char(r))
// D(e,.) = ()
result = epsilon();
else if (re().is_to_re(r, r1)) {
expr_ref h(m());
expr_ref t(m());
// here r1 is a sequence
if (get_head_tail(r1, h, t)) {
if (eq_char(e, h))
result = re().mk_to_re(t);
else if (neq_char(e, h))
result = nothing();
else
result = re().mk_ite_simplify(m().mk_eq(e, h), re().mk_to_re(t), nothing());
}
else {
// observe that the precondition |r1|>0 is is implied by c1 for use of mk_seq_first
m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(mk_seq_first(r1), e), c1);
m_br.mk_and(path, c1, c2);
if (m().is_false(c2))
result = nothing();
else
// observe that the precondition |r1|>0 is implied by c1 for use of mk_seq_rest
result = m().mk_ite(c1, re().mk_to_re(mk_seq_rest(r1)), nothing());
}
}
else if (re().is_reverse(r, r2))
if (re().is_to_re(r2, r1)) {
// here r1 is a sequence
// observe that the precondition |r1|>0 of mk_seq_last is implied by c1
m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(mk_seq_last(r1), e), c1);
m_br.mk_and(path, c1, c2);
if (m().is_false(c2))
result = nothing();
else
// observe that the precondition |r1|>0 of mk_seq_rest is implied by c1
result = re().mk_ite_simplify(c1, re().mk_reverse(re().mk_to_re(mk_seq_butlast(r1))), nothing());
}
else {
result = mk_regex_reverse(r2);
if (result.get() == r)
//r2 is an uninterpreted regex that is stuck
//for example if r = (re.reverse R) where R is a regex variable then
//here result.get() == r
result = re().mk_derivative(e, result);
else
result = mk_antimirov_deriv(e, result, path);
}
else if (re().is_concat(r, r1, r2)) {
expr_ref r1nullable(is_nullable(r1), m());
c1 = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), r2);
expr_ref r1nullable_and_path(m());
m_br.mk_and(r1nullable, path, r1nullable_and_path);
if (m().is_false(r1nullable_and_path))
// D(e,r1)r2
result = c1;
else
// D(e,r1)r2|(ite (r1nullable) (D(e,r2)) [])
// observe that (mk_ite_simplify(true, D(e,r2), []) = D(e,r2)
result = mk_antimirov_deriv_union(c1, re().mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing()));
}
else if (m().is_ite(r, c, r1, r2)) {
c1 = simplify_path(m().mk_and(c, path));
c2 = simplify_path(m().mk_and(m().mk_not(c), path));
if (m().is_false(c1))
result = mk_antimirov_deriv(e, r2, c2);
else if (m().is_false(c2))
result = mk_antimirov_deriv(e, r1, c1);
else
result = re().mk_ite_simplify(c, mk_antimirov_deriv(e, r1, c1), mk_antimirov_deriv(e, r2, c2));
}
else if (re().is_range(r, r1, r2)) {
expr_ref range(m());
expr_ref psi(m());
if (str().is_unit_string(r1, c1) && str().is_unit_string(r2, c2)) {
SASSERT(u().is_char(c1));
SASSERT(u().is_char(c2));
// range represents c1 <= e <= c2
range = simplify_path(m().mk_and(u().mk_le(c1, e), u().mk_le(e, c2)));
psi = simplify_path(m().mk_and(path, range));
if (m().is_false(psi))
result = nothing();
else
// D(e,c1..c2) = if (c1<=e<=c2) then () else []
result = re().mk_ite_simplify(range, epsilon(), nothing());
}
else
result = nothing();
}
else if (re().is_union(r, r1, r2))
result = mk_antimirov_deriv_union(mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path));
else if (re().is_intersection(r, r1, r2))
result = mk_antimirov_deriv_intersection(
mk_antimirov_deriv(e, r1, path),
mk_antimirov_deriv(e, r2, path), m().mk_true());
else if (re().is_star(r, r1) || re().is_plus(r, r1) || (re().is_loop(r, r1, lo) && 0 <= lo && lo <= 1))
result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_star(r1));
else if (re().is_loop(r, r1, lo))
result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_loop(r1, lo - 1));
else if (re().is_loop(r, r1, lo, hi)) {
if (lo == 0 && hi == 0 || hi < lo)
result = nothing();
else
result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_loop(r1, (lo == 0 ? 0 : lo - 1), hi - 1));
}
else if (re().is_opt(r, r1))
result = mk_antimirov_deriv(e, r1, path);
else if (re().is_complement(r, r1))
// D(e,~r1) = ~D(e,r1)
result = mk_antimirov_deriv_negate(mk_antimirov_deriv(e, r1, path));
else if (re().is_diff(r, r1, r2))
result = mk_antimirov_deriv_intersection(
mk_antimirov_deriv(e, r1, path),
mk_antimirov_deriv_negate(mk_antimirov_deriv(e, r2, path)), m().mk_true());
else if (re().is_of_pred(r, r1)) {
array_util array(m());
expr* args[2] = { r1, e };
result = array.mk_select(2, args);
// Use mk_der_cond to normalize
result = mk_der_cond(result, e, seq_sort);
}
else
// stuck cases
result = re().mk_derivative(e, r);
}
expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path) {
sort* seq_sort = nullptr, * ele_sort = nullptr;
VERIFY(m_util.is_re(d1, seq_sort));
VERIFY(m_util.is_seq(seq_sort, ele_sort));
expr_ref result(m());
expr* c, * a, * b;
if (d1 == d2 || re().is_full_seq(d2) || re().is_empty(d1))
result = d1;
else if (re().is_full_seq(d1) || re().is_empty(d2))
result = d2;
else if (m().is_ite(d1, c, a, b)) {
expr_ref path_and_c(simplify_path(m().mk_and(path, c)), m());
expr_ref path_and_notc(simplify_path(m().mk_and(path, m().mk_not(c))), m());
if (m().is_false(path_and_c))
result = mk_antimirov_deriv_intersection(b, d2, path);
else if (m().is_false(path_and_notc))
result = mk_antimirov_deriv_intersection(a, d2, path);
else
result = m().mk_ite(c, mk_antimirov_deriv_intersection(a, d2, path_and_c),
mk_antimirov_deriv_intersection(b, d2, path_and_notc));
}
else if (m().is_ite(d2))
// swap d1 and d2
result = mk_antimirov_deriv_intersection(d2, d1, path);
else if (re().is_union(d1, a, b))
// distribute intersection over the union in d1
result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(a, d2, path), mk_antimirov_deriv_intersection(b, d2, path));
else if (re().is_union(d2, a, b))
// distribute intersection over the union in d2
result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(d1, a, path), mk_antimirov_deriv_intersection(d1, b, path));
else
// in all other cases create the intersection regex
// TODO: flatten, order and merge d1 and d2 to maintain equality under similarity
result = (d1->get_id() <= d2->get_id() ? re().mk_inter(d1, d2) : re().mk_inter(d2, d1));
return result;
}
expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) {
expr_ref result(m());
// Take reference count of r and d
expr_ref _r(r, m()), _d(d, m());
expr* c, * t, * e;
if (m().is_ite(d, c, t, e))
result = m().mk_ite(c, mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r));
else if (re().is_union(d, t, e))
result = re().mk_union(mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r));
else
result = mk_re_append(d, r);
return result;
}
expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* d) {
sort* seq_sort = nullptr, * ele_sort = nullptr;
VERIFY(m_util.is_re(d, seq_sort));
auto nothing = [&]() { return expr_ref(re().mk_empty(d->get_sort()), m()); };
auto epsilon = [&]() { return expr_ref(re().mk_epsilon(seq_sort), m()); };
auto dotstar = [&]() { return expr_ref(re().mk_full_seq(d->get_sort()), m()); };
auto dotplus = [&]() { return expr_ref(re().mk_plus(re().mk_full_char(d->get_sort())), m()); };
expr_ref result(m());
expr* c, * t, * e;
if (re().is_empty(d))
result = dotstar();
else if (re().is_epsilon(d))
result = dotplus();
else if (re().is_full_seq(d))
result = nothing();
else if (re().is_dot_plus(d))
result = epsilon();
else if (m().is_ite(d, c, t, e))
result = m().mk_ite(c, mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e));
else if (re().is_union(d, t, e))
result = re().mk_inter(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e));
else if (re().is_intersection(d, t, e))
result = re().mk_union(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e));
else if (re().is_complement(d, t))
result = t;
else
result = re().mk_complement(d);
return result;
}
expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) {
expr_ref result(m());
if (re().is_empty(d1) || re().is_full_seq(d2))
result = d2;
else if (re().is_empty(d2) || re().is_full_seq(d1))
result = d1;
else if (re().is_dot_plus(d1) && re().get_info(d2).min_length > 0)
result = d1;
else if (re().is_dot_plus(d2) && re().get_info(d1).min_length > 0)
result = d2;
else
// TODO: flatten, order and merge d1 and d2 to maintain equality under similarity
result = (d1->get_id() <= d2->get_id() ? re().mk_union(d1, d2) : re().mk_union(d2, d1));
return result;
}
expr_ref seq_rewriter::mk_regex_reverse(expr* r) {
expr* r1 = nullptr, * r2 = nullptr, * c = nullptr;
unsigned lo = 0, hi = 0;
expr_ref result(m());
if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) ||
re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r))
result = r;
else if (re().is_to_re(r))
result = re().mk_reverse(r);
else if (re().is_reverse(r, r1))
result = r1;
else if (re().is_concat(r, r1, r2))
result = mk_regex_concat(mk_regex_reverse(r2), mk_regex_reverse(r1));
else if (m().is_ite(r, c, r1, r2))
result = m().mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2));
else if (re().is_union(r, r1, r2))
result = re().mk_union(mk_regex_reverse(r1), mk_regex_reverse(r2));
else if (re().is_intersection(r, r1, r2))
result = re().mk_inter(mk_regex_reverse(r1), mk_regex_reverse(r2));
else if (re().is_diff(r, r1, r2))
result = re().mk_diff(mk_regex_reverse(r1), mk_regex_reverse(r2));
else if (re().is_star(r, r1))
result = re().mk_star(mk_regex_reverse(r1));
else if (re().is_plus(r, r1))
result = re().mk_plus(mk_regex_reverse(r1));
else if (re().is_loop(r, r1, lo))
result = re().mk_loop(mk_regex_reverse(r1), lo);
else if (re().is_loop(r, r1, lo, hi))
result = re().mk_loop(mk_regex_reverse(r1), lo, hi);
else if (re().is_opt(r, r1))
result = re().mk_opt(mk_regex_reverse(r1));
else if (re().is_complement(r, r1))
result = re().mk_complement(mk_regex_reverse(r1));
else
//stuck cases: such as r being a regex variable
//observe that re().mk_reverse(to_re(s)) is not a stuck case
result = re().mk_reverse(r);
return result;
}
expr_ref seq_rewriter::mk_regex_concat(expr* r, expr* s) {
sort* seq_sort = nullptr;
VERIFY(m_util.is_re(r, seq_sort));
SASSERT(r->get_sort() == s->get_sort());
expr_ref result(m());
expr* r1, * r2;
if (re().is_epsilon(r) || re().is_empty(s))
result = s;
else if (re().is_epsilon(s) || re().is_empty(r))
result = r;
else if (re().is_full_seq(r) && re().is_full_seq(s))
result = r;
else if (re().is_concat(r, r1, r2))
//create the resulting concatenation in right-associative form
result = mk_regex_concat(r1, mk_regex_concat(r2, s));
else {
//TODO: perhaps simplifiy some further cases such as .*. = ..* = .*.+ = .+.* = .+
result = re().mk_concat(r, s);
}
return result;
}
expr_ref seq_rewriter::mk_in_antimirov(expr* s, expr* d){
expr_ref result(mk_in_antimirov_rec(s, d), m());
return result;
}
expr_ref seq_rewriter::mk_in_antimirov_rec(expr* s, expr* d) {
expr* c, * d1, * d2;
expr_ref result(m());
if (re().is_full_seq(d) || (str().min_length(s) > 0 && re().is_dot_plus(d)))
// s in .* <==> true, also: s in .+ <==> true when |s|>0
result = m().mk_true();
else if (re().is_empty(d) || (str().min_length(s) > 0 && re().is_epsilon(d)))
// s in [] <==> false, also: s in () <==> false when |s|>0
result = m().mk_false();
else if (m().is_ite(d, c, d1, d2))
result = re().mk_ite_simplify(c, mk_in_antimirov_rec(s, d1), mk_in_antimirov_rec(s, d2));
else if (re().is_union(d, d1, d2))
m_br.mk_or(mk_in_antimirov_rec(s, d1), mk_in_antimirov_rec(s, d2), result);
else
result = re().mk_in_re(s, d);
return result;
}
/*
path is typically a conjunction of (negated) character equations or constraints that can potentially be simplified
the first element of each equation is assumed to be the element parameter, for example x = (:var 0),
for example a constraint x='a' & x='b' is simplified to false
*/
expr_ref seq_rewriter::simplify_path(expr* path) {
//TODO: more systematic simplifications
expr_ref result(path, m());
expr* h = nullptr, * t = nullptr, * lhs = nullptr, * rhs = nullptr, * h1 = nullptr;
if (m().is_and(path, h, t)) {
if (m().is_true(h))
result = simplify_path(t);
else if (m().is_true(t))
result = simplify_path(h);
else if (m().is_eq(h, lhs, rhs) || m().is_not(h, h1) && m().is_eq(h1, lhs, rhs))
elim_condition(lhs, result);
}
return result;
}
expr_ref seq_rewriter::mk_der_antimorov_union(expr* r1, expr* r2) {
return mk_der_op(_OP_RE_ANTIMOROV_UNION, r1, r2);
}
@ -3016,7 +3389,7 @@ expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) {
}
/*
Utility functions to decide char <, ==, and <=.
Utility functions to decide char <, ==, !=, and <=.
Return true if deduced, false if unknown.
*/
bool seq_rewriter::lt_char(expr* ch1, expr* ch2) {
@ -3027,6 +3400,11 @@ bool seq_rewriter::lt_char(expr* ch1, expr* ch2) {
bool seq_rewriter::eq_char(expr* ch1, expr* ch2) {
return ch1 == ch2;
}
bool seq_rewriter::neq_char(expr* ch1, expr* ch2) {
unsigned u1, u2;
return u().is_const_char(ch1, u1) &&
u().is_const_char(ch2, u2) && (u1 != u2);
}
bool seq_rewriter::le_char(expr* ch1, expr* ch2) {
return eq_char(ch1, ch2) || lt_char(ch1, ch2);
}
@ -3257,10 +3635,10 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
default:
break;
}
result = m_op_cache.find(k, a, b);
result = m_op_cache.find(k, a, b, nullptr);
if (!result) {
result = mk_der_op_rec(k, a, b);
m_op_cache.insert(k, a, b, result);
m_op_cache.insert(k, a, b, nullptr, result);
}
CASSERT("seq_regex", check_deriv_normal_form(result));
return result;
@ -3269,7 +3647,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) {
expr_ref seq_rewriter::mk_der_compl(expr* r) {
STRACE("seq_verbose", tout << "mk_der_compl: " << mk_pp(r, m())
<< std::endl;);
expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m());
expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr, nullptr), m());
if (!result) {
expr* c = nullptr, * r1 = nullptr, * r2 = nullptr;
if (re().is_antimorov_union(r, r1, r2)) {
@ -3285,7 +3663,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) {
}
else if (BR_FAILED == mk_re_complement(r, result))
result = re().mk_complement(r);
m_op_cache.insert(OP_RE_COMPLEMENT, r, nullptr, result);
m_op_cache.insert(OP_RE_COMPLEMENT, r, nullptr, nullptr, result);
}
CASSERT("seq_regex", check_deriv_normal_form(result));
return result;
@ -3509,7 +3887,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
// construct the term (if (r2 != () and (ele = (last r2)) then reverse(to_re (butlast r2)) else []))
// hd = first of reverse(r2) i.e. last of r2
// tl = rest of reverse(r2) i.e. butlast of r2
//hd = str().mk_nth_i(r2, m_autil.mk_sub(str().mk_length(r2), m_autil.mk_int(1)));
//hd = str().mk_nth_i(r2, m_autil.mk_sub(str().mk_length(r2), one()));
hd = mk_seq_last(r2);
m_br.mk_and(m().mk_not(m().mk_eq(r2, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result);
tl = re().mk_to_re(mk_seq_butlast(r2));
@ -3537,9 +3915,9 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
return mk_empty();
}
}
expr* e1 = nullptr, *e2 = nullptr;
expr* e1 = nullptr, * e2 = nullptr;
if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) {
SASSERT(u().is_char(e1));
SASSERT(u().is_char(e1));
// Use mk_der_cond to normalize
STRACE("seq_verbose", tout << "deriv range str" << std::endl;);
expr_ref p1(u().mk_le(e1, ele), m());
@ -3760,7 +4138,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
(re().is_union(b, b1, eps) && re().is_epsilon(eps)) ||
(re().is_union(b, eps, b1) && re().is_epsilon(eps)))
{
result = m().mk_ite(m().mk_eq(str().mk_length(a), m_autil.mk_int(0)),
result = m().mk_ite(m().mk_eq(str().mk_length(a), zero()),
m().mk_true(),
re().mk_in_re(a, b1));
return BR_REWRITE_FULL;
@ -3775,8 +4153,10 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
expr_ref hd(m()), tl(m());
if (get_head_tail(a, hd, tl)) {
result = re().mk_in_re(tl, re().mk_derivative(hd, b));
return BR_REWRITE2;
//result = re().mk_in_re(tl, re().mk_derivative(hd, b));
//result = re().mk_in_re(tl, mk_derivative(hd, b));
result = mk_in_antimirov(tl, mk_antimirov_deriv(hd, b, m().mk_true()));
return BR_REWRITE_FULL;
}
if (get_head_tail_reversed(a, hd, tl)) {
@ -3791,7 +4171,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
expr_ref len_a(str().mk_length(a), m());
expr_ref len_tl(m_autil.mk_sub(len_a, len_hd), m());
result = m().mk_and(m_autil.mk_ge(len_a, len_hd),
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
re().mk_in_re(str().mk_substr(a, zero(), len_hd), hd),
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl));
return BR_REWRITE_FULL;
}
@ -3802,7 +4182,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m());
expr* s = nullptr;
result = m().mk_and(m_autil.mk_ge(len_a, len_tl),
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
re().mk_in_re(str().mk_substr(a, zero(), len_hd), hd),
(re().is_to_re(tl, s) ? m().mk_eq(s, str().mk_substr(a, len_hd, len_tl)) :
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl)));
return BR_REWRITE_FULL;
@ -3912,6 +4292,10 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
return BR_REWRITE2;
}
expr* a1 = nullptr, *b1 = nullptr;
if (re().is_to_re(a, a1) && re().is_to_re(b, b1)) {
result = re().mk_to_re(str().mk_concat(a1, b1));
return BR_DONE;
}
if (re().is_star(a, a1) && re().is_star(b, b1) && a1 == b1) {
result = a;
return BR_DONE;
@ -5151,15 +5535,15 @@ bool seq_rewriter::reduce_eq_empty(expr* l, expr* r, expr_ref& result) {
if (str().is_extract(r, s, offset, len)) {
expr_ref len_s(str().mk_length(s), m());
expr_ref_vector fmls(m());
fmls.push_back(m_autil.mk_lt(offset, m_autil.mk_int(0)));
fmls.push_back(m_autil.mk_lt(offset, zero()));
fmls.push_back(m().mk_eq(s, l));
fmls.push_back(m_autil.mk_le(len, m_autil.mk_int(0)));
fmls.push_back(m_autil.mk_le(len, zero()));
fmls.push_back(m_autil.mk_le(len_s, offset));
result = m().mk_or(fmls);
return true;
}
if (str().is_itos(r, s)) {
result = m_autil.mk_lt(s, m_autil.mk_int(0));
result = m_autil.mk_lt(s, zero());
return true;
}
return false;
@ -5275,19 +5659,20 @@ seq_rewriter::op_cache::op_cache(ast_manager& m):
m_trail(m)
{}
expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) {
op_entry e(op, a, b, nullptr);
expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b, expr* c) {
op_entry e(op, a, b, c, nullptr);
m_table.find(e, e);
return e.r;
}
void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* r) {
void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* c, expr* r) {
cleanup();
if (a) m_trail.push_back(a);
if (b) m_trail.push_back(b);
if (c) m_trail.push_back(c);
if (r) m_trail.push_back(r);
m_table.insert(op_entry(op, a, b, r));
m_table.insert(op_entry(op, a, b, c, r));
}
void seq_rewriter::op_cache::cleanup() {

View file

@ -117,20 +117,20 @@ class seq_rewriter {
class op_cache {
struct op_entry {
decl_kind k;
expr* a, *b, *r;
op_entry(decl_kind k, expr* a, expr* b, expr* r): k(k), a(a), b(b), r(r) {}
op_entry():k(0), a(nullptr), b(nullptr), r(nullptr) {}
expr* a, *b, *c, *r;
op_entry(decl_kind k, expr* a, expr* b, expr* c, expr* r): k(k), a(a), b(b), c(c), r(r) {}
op_entry():k(0), a(nullptr), b(nullptr), c(nullptr), r(nullptr) {}
};
struct hash_entry {
unsigned operator()(op_entry const& e) const {
return mk_mix(e.k, e.a ? e.a->get_id() : 0, e.b ? e.b->get_id() : 0);
return combine_hash(mk_mix(e.k, e.a ? e.a->get_id() : 0, e.b ? e.b->get_id() : 0), e.c ? e.c->get_id() : 0);
}
};
struct eq_entry {
bool operator()(op_entry const& a, op_entry const& b) const {
return a.k == b.k && a.a == b.a && a.b == b.b;
bool operator()(op_entry const& a, op_entry const& b) const {
return a.k == b.k && a.a == b.a && a.b == b.b && a.c == b.c;
}
};
@ -143,8 +143,8 @@ class seq_rewriter {
public:
op_cache(ast_manager& m);
expr* find(decl_kind op, expr* a, expr* b);
void insert(decl_kind op, expr* a, expr* b, expr* r);
expr* find(decl_kind op, expr* a, expr* b, expr* c);
void insert(decl_kind op, expr* a, expr* b, expr* c, expr* r);
};
seq_util m_util;
@ -208,8 +208,24 @@ class seq_rewriter {
bool check_deriv_normal_form(expr* r, int level = 3);
#endif
void mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result);
expr_ref mk_antimirov_deriv(expr* e, expr* r, expr* path);
expr_ref mk_in_antimirov_rec(expr* s, expr* d);
expr_ref mk_in_antimirov(expr* s, expr* d);
expr_ref mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path);
expr_ref mk_antimirov_deriv_concat(expr* d, expr* r);
expr_ref mk_antimirov_deriv_negate(expr* d);
expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2);
expr_ref mk_regex_reverse(expr* r);
expr_ref mk_regex_concat(expr* r1, expr* r2);
expr_ref simplify_path(expr* path);
bool lt_char(expr* ch1, expr* ch2);
bool eq_char(expr* ch1, expr* ch2);
bool neq_char(expr* ch1, expr* ch2);
bool le_char(expr* ch1, expr* ch2);
bool pred_implies(expr* a, expr* b);
bool are_complements(expr* r1, expr* r2) const;
@ -286,6 +302,8 @@ class seq_rewriter {
expr_ref zero() { return expr_ref(m_autil.mk_int(0), m()); }
expr_ref one() { return expr_ref(m_autil.mk_int(1), m()); }
expr_ref minus_one() { return expr_ref(m_autil.mk_int(-1), m()); }
expr_ref mk_sub(expr* a, rational const& n);
expr_ref mk_sub(expr* a, unsigned n) { return mk_sub(a, rational(n)); }
bool is_suffix(expr* s, expr* offset, expr* len);
bool is_prefix(expr* s, expr* offset, expr* len);
@ -379,9 +397,19 @@ public:
void add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_pair_vector& new_eqs);
// Expose derivative and nullability check
/*
create the nullability check for r
*/
expr_ref is_nullable(expr* r);
/*
make the derivative of r wrt the given element ele
*/
expr_ref mk_derivative(expr* ele, expr* r);
/*
make the derivative of r wrt the canonical variable v0 = (:var 0),
for example mk_derivative(a+) = (if (v0 = 'a') then a* else [])
*/
expr_ref mk_derivative(expr* r);
// heuristic elimination of element from condition that comes form a derivative.
// special case optimization for conjunctions of equalities, disequalities and ranges.

View file

@ -839,7 +839,7 @@ bool seq_util::str::is_nth_i(expr const* n, expr*& s, unsigned& idx) const {
return arith_util(m).is_unsigned(i, idx);
}
app* seq_util::str::mk_nth_i(expr* s, unsigned i) const {
app* seq_util::str::mk_nth_c(expr* s, unsigned i) const {
return mk_nth_i(s, arith_util(m).mk_int(i));
}
@ -854,6 +854,48 @@ void seq_util::str::get_concat(expr* e, expr_ref_vector& es) const {
}
}
/*
Returns true if s is an expression of the form (l = |u|) |u|-k or (-k)+|u| or |u|+(-k).
Also returns true and assigns k=0 and l=s if s is |u|.
*/
bool seq_util::str::is_len_sub(expr const* s, expr*& l, expr*& u, rational& k) const {
expr* x;
rational v;
arith_util a(m);
if (is_length(s, l)) {
k = 0;
return true;
}
else if (a.is_sub(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonneg()) {
k = v;
return true;
}
else if (a.is_add(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
k = - v;
return true;
}
else if (a.is_add(s, x, l) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
k = - v;
return true;
}
else
return false;
}
bool seq_util::str::is_unit_string(expr const* s, expr_ref& c) const {
zstring z;
expr* ch = nullptr;
if (is_string(s, z) && z.length() == 1) {
c = mk_char(z[0]);
return true;
}
else if (is_unit(s, ch)) {
c = ch;
return true;
}
return false;
}
void seq_util::str::get_concat_units(expr* e, expr_ref_vector& es) const {
expr* e1, *e2;
while (is_concat(e, e1, e2)) {
@ -876,8 +918,6 @@ app* seq_util::str::mk_is_empty(expr* s) const {
return m.mk_eq(s, mk_empty(s->get_sort()));
}
unsigned seq_util::str::min_length(expr* s) const {
SASSERT(u.is_seq(s));
unsigned result = 0;
@ -1065,38 +1105,71 @@ app* seq_util::rex::mk_epsilon(sort* seq_sort) {
/*
Produces compact view of concrete concatenations such as (abcd).
*/
std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) const {
std::ostream& seq_util::rex::pp::print_seq(std::ostream& out, expr* s) const {
SASSERT(re.u.is_seq(s));
zstring z;
expr* x, * j, * k, * l, * i, * x_;
if (re.u.str.is_empty(s))
out << "()";
else if (re.u.str.is_unit(s))
seq_unit(out, s);
print_unit(out, s);
else if (re.u.str.is_concat(s)) {
expr_ref_vector es(re.m);
re.u.str.get_concat(s, es);
for (expr* e : es)
compact_helper_seq(out, e);
print_seq(out, e);
}
else if (re.u.str.is_string(s, z)) {
for (unsigned i = 0; i < z.length(); i++)
out << (char)z[i];
}
//using braces to indicate 'full' output
//for example an uninterpreted constant X will be printed as {X}
//while a unit sequence "X" will be printed as X
//thus for example (concat "X" "Y" Z "W") where Z is uninterpreted is printed as XY{Z}W
else out << "{" << mk_pp(s, re.m) << "}";
else if (re.u.str.is_extract(s, x, j, k)) {
rational jv, iv;
print(out, x);
if (arith_util(re.m).is_numeral(j, jv)) {
if (arith_util(re.m).is_numeral(k, iv)) {
// output X[j,k]
out << "[" << jv.get_int32() << "," << jv.get_int32() << "]";
}
else if (arith_util(re.m).is_sub(k, l, i) && re.u.str.is_length(l, x_) && x == x_ &&
arith_util(re.m).is_numeral(i, iv) && iv == jv) {
// case X[j,|X|-j] is denoted by X[j..]
out << "[" << jv.get_int32() << "..]";
}
else if (((arith_util(re.m).is_add(k, l, i) && re.u.str.is_length(l, x_)) ||
(arith_util(re.m).is_add(k, i, l) && re.u.str.is_length(l, x_))) && x == x_ &&
arith_util(re.m).is_numeral(i, iv) && iv.get_int32() + jv.get_int32() == 0) {
// case X[j,|X|-j] is denoted by X[j..]
out << "[" << jv.get_int32() << "..]";
}
else {
out << "[" << jv.get_int32() << ",";
print(out, k);
out << "]";
}
}
else {
out << "[";
print(out, j);
out << ",";
print(out, k);
out << "]";
}
}
else
out << mk_pp(s, re.m);
return out;
}
/*
Produces output such as [a-z] for a range.
*/
std::ostream& seq_util::rex::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s2) const {
std::ostream& seq_util::rex::pp::print_range(std::ostream& out, expr* s1, expr* s2) const {
out << "[";
seq_unit(out, s1) << "-";
seq_unit(out, s2) << "]";
print_unit(out, s1);
out << "-";
print_unit(out, s2);
out << "]";
return out;
}
@ -1111,8 +1184,8 @@ bool seq_util::rex::pp::can_skip_parenth(expr* r) const {
/*
Specialize output for a unit sequence converting to visible ASCII characters if possible.
*/
std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
expr* e;
std::ostream& seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const {
expr* e, * i;
unsigned n = 0;
if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) {
char c = (char)n;
@ -1122,22 +1195,21 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
out << "\\r";
else if (c == '\f')
out << "\\f";
else if (c == ' ')
out << "\\s";
else if (c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']' || c == '.' || c == '\\')
out << "\\" << c;
else if (32 < n && n < 127) {
else if (32 <= n && n < 127 && n != '\"' && n != ' '
&& n != '\\' && n != '\'' && n != '?' && n != '.' && n != '(' && n != ')' && n != '[' && n != ']'
&& n != '{' && n != '}' && n != '&') {
if (html_encode) {
if (c == '<')
out << "&lt;";
else if (c == '>')
out << "&gt;";
else if (c == '&')
out << "&amp;";
else if (c == '\"')
out << "&quot;";
//else if (c == '&')
// out << "&amp;";
//else if (c == '\"')
// out << "&quot;";
else
out << "\\x" << std::hex << n;
//out << "\\x" << std::hex << n;
out << c;
}
else
out << c;
@ -1151,92 +1223,188 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
else
out << "\\u" << std::hex << n;
}
else if (re.u.str.is_nth_i(s, e, i)) {
print(out, e);
out << "[" << mk_pp(i, re.m) << "]";
}
else if (re.m.is_value(e))
out << mk_pp(e, re.m);
else if (is_app(e)) {
out << "(" << to_app(e)->get_decl()->get_name().str();
for (expr * arg : *to_app(e))
print(out << " ", arg);
out << ")";
}
else
out << "{" << mk_pp(s, re.m) << "}";
out << mk_pp(s, re.m);
return out;
}
/*
Pretty prints the regex r into the out stream
Pretty prints the regex r into the ostream out
*/
std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const {
expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
unsigned lo = 0, hi = 0;
rational v;
if (re.u.is_char(e))
return seq_unit(out, e);
print_unit(out, e);
else if (re.u.is_seq(e))
return compact_helper_seq(out, e);
print_seq(out, e);
else if (re.is_full_char(e))
return out << ".";
out << ".";
else if (re.is_full_seq(e))
return out << ".*";
out << ".*";
else if (re.is_to_re(e, s))
return compact_helper_seq(out, s);
else if (re.is_range(e, s, s2))
return compact_helper_range(out, s, s2);
print_seq(out, s);
else if (re.is_range(e, s, s2))
print_range(out, s, s2);
else if (re.is_epsilon(e))
return out << "()";
// &#X03B5; = epsilon
out << (html_encode ? "&#X03B5;" : "()");
else if (re.is_empty(e))
return out << "[]";
else if (re.is_concat(e, r1, r2))
return out << pp(re, r1) << pp(re, r2);
else if (re.is_union(e, r1, r2))
return out << "(" << pp(re, r1) << "|" << pp(re, r2) << ")";
else if (re.is_intersection(e, r1, r2))
return out << "(" << pp(re, r1) << "&amp;" /*(html_encode ? ")&amp;(" : ")&(")*/ << pp(re, r2) << ")";
// &#X2205; = emptyset
out << (html_encode ? "&#X2205;" : "[]");
else if (re.is_concat(e, r1, r2)) {
print(out, r1);
print(out, r2);
}
else if (re.is_antimorov_union(e, r1, r2) || re.is_union(e, r1, r2)) {
out << "(";
print(out, r1);
out << (html_encode ? "&#X22C3;" : "|");
print(out, r2);
out << ")";
}
else if (re.is_intersection(e, r1, r2))
{
out << "(";
print(out, r1);
out << (html_encode ? "&#X22C2;" : "&");
print(out, r2);
out << ")";
}
else if (re.is_complement(e, r1)) {
out << "~";
if (can_skip_parenth(r1))
return out << "~" << pp(re, r1);
else
return out << "~(" << pp(re, r1) << ")";
print(out, r1);
else {
out << "(";
print(out, r1);
out << ")";
}
}
else if (re.is_plus(e, r1)) {
if (can_skip_parenth(r1))
return out << pp(re, r1) << "+";
else
return out << "(" << pp(re, r1) << ")+";
if (can_skip_parenth(r1)) {
print(out, r1);
out << "+";
}
else {
out << "(";
print(out, r1);
out << ")+";
}
}
else if (re.is_star(e, r1)) {
if (can_skip_parenth(r1))
return out << pp(re, r1) << "*";
else
return out << "(" << pp(re, r1) << ")*";
if (can_skip_parenth(r1)) {
print(out, r1);
out << "*";
}
else {
out << "(";
print(out, r1);
out << ")*";
}
}
else if (re.is_loop(e, r1, lo)) {
if (can_skip_parenth(r1))
return out << pp(re, r1) << "{" << lo << ",}";
else
return out << "(" << pp(re, r1) << "){" << lo << ",}";
if (can_skip_parenth(r1)) {
print(out, r1);
out << "{" << lo << ",}";
}
else
{
out << "(";
print(out, r1);
out << "){" << lo << ",}";
}
}
else if (re.is_loop(e, r1, lo, hi)) {
if (can_skip_parenth(r1)) {
print(out, r1);
if (lo == hi)
return out << pp(re, r1) << "{" << lo << "}";
else
return out << pp(re, r1) << "{" << lo << "," << hi << "}";
out << "{" << lo << "}";
else
out << "{" << lo << "," << hi << "}";
}
else {
out << "(";
print(out, r1);
if (lo == hi)
return out << "(" << pp(re, r1) << "){" << lo << "}";
out << "){" << lo << "}";
else
return out << "(" << pp(re, r1) << "){" << lo << "," << hi << "}";
out << "){" << lo << "," << hi << "}";
}
}
else if (re.is_diff(e, r1, r2))
return out << "(" << pp(re, r1) << ")\\(" << pp(re, r2) << ")";
else if (re.m.is_ite(e, s, r1, r2))
return out << "if(" << mk_pp(s, re.m) << "," << pp(re, r1) << "," << pp(re, r2) << ")";
else if (re.is_opt(e, r1)) {
if (can_skip_parenth(r1))
return out << pp(re, r1) << "?";
else
return out << "(" << pp(re, r1) << ")?";
else if (re.is_diff(e, r1, r2)) {
out << "(";
print(out, r1);
out << ")\\(";
print(out, r2);
out << ")";
}
else if (re.m.is_ite(e, s, r1, r2)) {
out << (html_encode ? "(&#X1D422;&#X1D41F; " : "(if ");
print(out, s);
out << (html_encode ? " &#X1D42D;&#X1D5F5;&#X1D41E;&#X1D427; " : " then ");
print(out, r1);
out << (html_encode ? " &#X1D41E;&#X1D425;&#X1D600;&#X1D41E; " : " else ");
print(out, r2);
out << ")";
}
else if (re.is_opt(e, r1)) {
if (can_skip_parenth(r1)) {
print(out, r1);
out << "?";
}
else {
out << "(";
print(out, r1);
out << ")?";
}
}
else if (re.is_reverse(e, r1)) {
out << "(reverse ";
print(out, r1);
out << ")";
}
else if (re.m.is_eq(e, r1, r2)) {
out << "(";
print(out, r1);
out << "=";
print(out, r2);
out << ")";
}
else if (re.m.is_not(e, r1)) {
out << "!";
print(out, r1);
}
else if (re.m.is_value(e))
out << mk_pp(e, re.m);
else if (is_app(e)) {
out << "(" << to_app(e)->get_decl()->get_name().str();
for (expr* arg : *to_app(e))
print(out << " ", arg);
out << ")";
}
else if (re.is_reverse(e, r1))
return out << "reverse(" << pp(re, r1) << ")";
else
// Else: derivative or is_of_pred
return out << "{" << mk_pp(e, re.m) << "}";
// for all remaining cases use the default pretty printer
out << mk_pp(e, re.m);
return out;
}
std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
print(out, ex);
return out;
}
/*
@ -1244,7 +1412,16 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
*/
std::string seq_util::rex::to_str(expr* r) const {
std::ostringstream out;
out << pp(u.re, r);
pp(u.re, r, false).display(out);
return out.str();
}
/*
Pretty prints the regex r into the output string that is htmlencoded
*/
std::string seq_util::rex::to_strh(expr* r) const {
std::ostringstream out;
pp(u.re, r, true).display(out);
return out.str();
}
@ -1290,7 +1467,7 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
else
result = mk_info_rec(to_app(e));
m_infos.setx(e->get_id(), result, invalid_info);
STRACE("re_info", tout << "compute_info(" << pp(u.re, e) << ")=" << result << std::endl;);
STRACE("re_info", tout << "compute_info(" << pp(u.re, e, false) << ")=" << result << std::endl;);
return result;
}

View file

@ -286,7 +286,7 @@ public:
app* mk_at(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_AT, 2, es); }
app* mk_nth(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH, 2, es); }
app* mk_nth_i(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH_I, 2, es); }
app* mk_nth_i(expr* s, unsigned i) const;
app* mk_nth_c(expr* s, unsigned i) const;
app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); }
app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); }
@ -350,6 +350,13 @@ public:
bool is_from_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_FROM_CODE); }
bool is_to_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_TO_CODE); }
bool is_len_sub(expr const* n, expr*& l, expr*& u, rational& k) const;
/*
tests if s is a single character string(c) or a unit (c)
*/
bool is_unit_string(expr const* s, expr_ref& c) const;
bool is_string_term(expr const * n) const {
return u.is_string(n->get_sort());
}
@ -530,7 +537,20 @@ public:
bool is_loop(expr const* n) const { return is_app_of(n, m_fid, OP_RE_LOOP); }
bool is_empty(expr const* n) const { return is_app_of(n, m_fid, OP_RE_EMPTY_SET); }
bool is_full_char(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_CHAR_SET); }
bool is_full_seq(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_SEQ_SET); }
bool is_full_seq(expr const* n) const {
expr* s;
return is_app_of(n, m_fid, OP_RE_FULL_SEQ_SET) || (is_star(n, s) && is_full_char(s));
}
bool is_dot_plus(expr const* n) const {
expr* s, * t;
if (is_plus(n, s) && is_full_char(s))
return true;
if (is_concat(n, s, t)) {
if ((is_full_char(s) && is_full_seq(t)) || (is_full_char(t) && is_full_seq(s)))
return true;
}
return false;
}
bool is_of_pred(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OF_PRED); }
bool is_reverse(expr const* n) const { return is_app_of(n, m_fid, OP_RE_REVERSE); }
bool is_derivative(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DERIVATIVE); }
@ -559,18 +579,32 @@ public:
app* mk_epsilon(sort* seq_sort);
info get_info(expr* r) const;
std::string to_str(expr* r) const;
std::string to_strh(expr* r) const;
expr_ref mk_ite_simplify(expr* c, expr* t, expr* e)
{
expr_ref result(m);
if (m.is_true(c) || t == e)
result = t;
else if (m.is_false(c))
result = e;
else
result = m.mk_ite(c, t, e);
return result;
}
class pp {
seq_util::rex& re;
expr* e;
expr* ex;
bool html_encode;
bool can_skip_parenth(expr* r) const;
std::ostream& seq_unit(std::ostream& out, expr* s) const;
std::ostream& compact_helper_seq(std::ostream& out, expr* s) const;
std::ostream& compact_helper_range(std::ostream& out, expr* s1, expr* s2) const;
std::ostream& print_unit(std::ostream& out, expr* s) const;
std::ostream& print_seq(std::ostream& out, expr* s) const;
std::ostream& print_range(std::ostream& out, expr* s1, expr* s2) const;
std::ostream& print(std::ostream& out, expr* e) const;
public:
pp(seq_util::rex& r, expr* e, bool html = false) : re(r), e(e), html_encode(html) {}
pp(seq_util::rex& re, expr* ex, bool html) : re(re), ex(ex), html_encode(html) {}
std::ostream& display(std::ostream&) const;
};
};

View file

@ -12,6 +12,7 @@ Abstract:
Author:
Nikolaj Bjorner (nbjorner) 2020-5-22
Margus Veanes 2021
--*/
@ -253,10 +254,8 @@ namespace smt {
* (accept s (i + 1) (derivative s[i] r)
*
* Acceptance of a derivative is unfolded into a disjunction over
* all derivatives. Effectively, this implements the following rule,
* but all in one step:
* (accept s i (ite c r1 r2)) =>
* c & (accept s i r1) \/ ~c & (accept s i r2)
* all derivatives. Effectively, this implements the following rule:
* (accept s i (ite c r1 r2)) => (ite c (accept s i r1) (accept s i r2))
*/
void seq_regex::propagate_accept(literal lit) {
SASSERT(!lit.sign());
@ -302,6 +301,7 @@ namespace smt {
unsigned min_len = re().min_length(r);
unsigned min_len_plus_i = u().max_plus(min_len, idx);
literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len_plus_i);
// Acc(s,i,r) ==> |s| >= i + minlength(r)
th.propagate_lit(nullptr, 1, &lit, len_s_ge_min);
// Axiom equivalent to the above: th.add_axiom(~lit, len_s_ge_min);
@ -316,6 +316,8 @@ namespace smt {
STRACE("seq_regex_brief", tout
<< " (Warning: min_length returned 0 for"
<< " non-nullable regex)";);
// since nullable(r) = false:
// Acc(s,i,r) ==> |s|>i
th.propagate_lit(nullptr, 1, &lit, ~len_s_le_i);
}
else if (!m.is_true(is_nullable)) {
@ -327,7 +329,9 @@ namespace smt {
<< " (Warning: is_nullable did not simplify)";);
literal is_nullable_lit = th.mk_literal(is_nullable);
ctx.mark_as_relevant(is_nullable_lit);
// Acc(s,i,r) & |s|<=i ==> nullable(r)
th.add_axiom(~lit, ~len_s_le_i, is_nullable_lit);
//TODO: what if is_nullable contains an in_re
if (str().is_in_re(is_nullable))
th.add_unhandled_expr(is_nullable);
}
@ -335,14 +339,19 @@ namespace smt {
// Rule 3: derivative unfolding
literal_vector accept_next;
expr_ref hd = th.mk_nth(s, i);
expr_ref s_i = th.mk_nth(s, i);
expr_ref deriv(m);
deriv = derivative_wrapper(hd, r);
deriv = mk_derivative_wrapper(s_i, r);
STRACE("seq_regex", tout
<< "mk_derivative_wrapper: " << re().to_str(deriv) << std::endl;);
expr_ref accept_deriv(m);
accept_deriv = mk_deriv_accept(s, idx + 1, deriv);
accept_next.push_back(~lit);
accept_next.push_back(len_s_le_i);
accept_next.push_back(th.mk_literal(accept_deriv));
// Acc(s, i, r) => (|s|<=i or Acc(s, i+1, D(s_i,r)))
// where Acc(s, i+1, ite(c, t, f)) = ite(c, Acc(s, i+1, t), Acc(s, i+1, t))
// and Acc(s, i+1, r U s) = Acc(s, i+1, r) or Acc(s, i+1, s)
th.add_axiom(accept_next);
}
@ -418,20 +427,21 @@ namespace smt {
/*
Wrapper around calls to is_nullable from the seq rewriter.
Note: the nullable wrapper and derivative wrapper actually use
TODO: clean up the following:
Note: the is_nullable_wrapper and mk_derivative_wrapper actually use
different sequence rewriters; these are at:
m_seq_rewrite
(returned by seq_rw())
th.m_rewrite.m_imp->m_cfg.m_seq_rw
(private, can't be accessed directly)
As a result operations are cached separately for the nullable
and derivative calls. TBD if caching them using the same rewriter
makes any difference.
and derivative calls.
*/
expr_ref seq_regex::is_nullable_wrapper(expr* r) {
STRACE("seq_regex", tout << "nullable: " << mk_pp(r, m) << std::endl;);
expr_ref result = seq_rw().is_nullable(r);
//TODO: rewrite seems unnecessary here
rewrite(result);
STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;);
@ -442,39 +452,28 @@ namespace smt {
}
/*
Wrapper around the regex symbolic derivative from the seq rewriter.
Ensures that the derivative is written in a normalized BDD form
with optimizations for if-then-else expressions involving the head.
Note: the nullable wrapper and derivative wrapper actually use
different sequence rewriters; these are at:
m_seq_rewrite
(returned by seq_rw())
th.m_rewrite.m_imp->m_cfg.m_seq_rw
(private, can't be accessed directly)
As a result operations are cached separately for the nullable
and derivative calls. TBD if caching them using the same rewriter
makes any difference.
First creates a derivatrive of r wrt x=(:var 0) and then replaces x by ele.
This will create a cached entry for the generic derivative of r that is independent of ele.
*/
expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) {
STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;);
expr_ref seq_regex::mk_derivative_wrapper(expr* ele, expr* r) {
STRACE("seq_regex", tout << "derivative(" << mk_pp(ele, m) << "): " << mk_pp(r, m) << std::endl;);
// Use canonical variable for head
expr_ref hd_canon(m.mk_var(0, hd->get_sort()), m);
expr_ref result(re().mk_derivative(hd_canon, r), m);
rewrite(result);
// Uses canonical variable (:var 0) for the derivative element
expr_ref der(seq_rw().mk_derivative(r), m);
// Substitute with real head
// Substitute (:var 0) with the actual element
var_subst subst(m);
expr_ref_vector sub(m);
sub.push_back(hd);
result = subst(result, sub);
sub.push_back(ele);
der = subst(der, sub);
STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;);
STRACE("seq_regex", tout << "derivative result: " << mk_pp(der, m) << std::endl;);
STRACE("seq_regex_brief", tout << "d(" << state_str(r) << ")="
<< state_str(result) << " ";);
<< state_str(der) << " ";);
return result;
//TODO: simplify der further, if ele implies further simplifications
//e.g. if ele='b' then de(ite (x='a') t f) simplifies to t
return der;
}
void seq_regex::propagate_eq(expr* r1, expr* r2) {
@ -557,7 +556,7 @@ namespace smt {
literal null_lit = th.mk_literal(is_nullable);
expr_ref hd = mk_first(r, n);
expr_ref d(m);
d = derivative_wrapper(hd, r);
d = mk_derivative_wrapper(hd, r);
literal_vector lits;
lits.push_back(~lit);
@ -584,11 +583,10 @@ namespace smt {
}
/*
Given a string s, index i, and a derivative regex d, return an
Given a string s, index i, and a derivative r, return an
expression that is equivalent to
accept s i r
but which pushes accept s i r into the leaves (next derivatives to
explore).
but which pushes accept s i r into the leaves
Input r is of type regex; output is of type bool.
@ -600,14 +598,18 @@ namespace smt {
expr_ref seq_regex::mk_deriv_accept(expr* s, unsigned i, expr* r) {
vector<expr*> to_visit;
to_visit.push_back(r);
obj_map<expr, expr*> re_to_bool;
obj_map<expr, expr*> re_to_accept;
expr_ref_vector _temp_bool_owner(m); // temp owner for bools we create
// DFS
bool s_is_longer_than_i = str().min_length(s) > i;
expr* i_int = a().mk_int(i);
_temp_bool_owner.push_back(i_int);
// DFS, avoids duplicating derivative construction that has already been done
while (to_visit.size() > 0) {
expr* e = to_visit.back();
expr* econd = nullptr, *e1 = nullptr, *e2 = nullptr;
if (!re_to_bool.contains(e)) {
if (!re_to_accept.contains(e)) {
// First visit: add children
STRACE("seq_regex_verbose", tout << "1";);
if (m.is_ite(e, econd, e1, e2) ||
@ -616,36 +618,40 @@ namespace smt {
to_visit.push_back(e2);
}
// Mark first visit by adding nullptr to the map
re_to_bool.insert(e, nullptr);
re_to_accept.insert(e, nullptr);
}
else if (re_to_bool.find(e) == nullptr) {
else if (re_to_accept.find(e) == nullptr) {
// Second visit: set value
STRACE("seq_regex_verbose", tout << "2";);
to_visit.pop_back();
if (m.is_ite(e, econd, e1, e2)) {
expr* b1 = re_to_bool.find(e1);
expr* b2 = re_to_bool.find(e2);
expr* b = m.mk_ite(econd, b1, b2);
expr* b1 = re_to_accept.find(e1);
expr* b2 = re_to_accept.find(e2);
expr* b = m.is_true(econd) || b1 == b2 ? b1 : m.is_false(econd) ? b2 : m.mk_ite(econd, b1, b2);
_temp_bool_owner.push_back(b);
re_to_bool.find(e) = b;
re_to_accept.find(e) = b;
}
else if (re().is_empty(e) || (s_is_longer_than_i && re().is_epsilon(e)))
{
// s[i..] in [] <==> false, also: s[i..] in () <==> false when |s|>i
re_to_accept.find(e) = m.mk_false();
}
else if (re().is_full_seq(e) || (s_is_longer_than_i && re().is_dot_plus(e)))
{
// s[i..] in .* <==> true, also: s[i..] in .+ <==> true when |s|>i
re_to_accept.find(e) = m.mk_true();
}
/*
else if (re().is_empty(e))
{
re_to_bool.find(e) = m.mk_false();
}
else if (re().is_epsilon(e))
{
expr* iplus1 = a().mk_int(i);
expr* one = a().mk_int(1);
_temp_bool_owner.push_back(iplus1);
_temp_bool_owner.push_back(one);
//the substring starting after position iplus1 must be empty
expr* s_end = str().mk_substr(s, iplus1, one);
//the substring starting after position i must be empty
expr* s_end = str().mk_substr(s, i_int, one);
expr* s_end_is_epsilon = m.mk_eq(s_end, str().mk_empty(m.get_sort(s)));
_temp_bool_owner.push_back(s_end_is_epsilon);
re_to_bool.find(e) = s_end_is_epsilon;
re_to_accept.find(e) = s_end_is_epsilon;
STRACE("seq_regex_verbose", tout
<< "added empty sequence leaf: "
@ -653,18 +659,16 @@ namespace smt {
}
*/
else if (re().is_union(e, e1, e2)) {
expr* b1 = re_to_bool.find(e1);
expr* b2 = re_to_bool.find(e2);
expr* b = m.mk_or(b1, b2);
expr* b1 = re_to_accept.find(e1);
expr* b2 = re_to_accept.find(e2);
expr* b = m.is_false(b1) || b1 == b2 ? b2 : m.is_false(b2) ? b1 : m.mk_or(b1, b2);
_temp_bool_owner.push_back(b);
re_to_bool.find(e) = b;
re_to_accept.find(e) = b;
}
else {
expr* iplus1 = a().mk_int(i);
_temp_bool_owner.push_back(iplus1);
expr_ref acc_leaf = sk().mk_accept(s, iplus1, e);
expr_ref acc_leaf = sk().mk_accept(s, i_int, e);
_temp_bool_owner.push_back(acc_leaf);
re_to_bool.find(e) = acc_leaf;
re_to_accept.find(e) = acc_leaf;
STRACE("seq_regex_verbose", tout
<< "mk_deriv_accept: added accept leaf: "
@ -680,59 +684,44 @@ namespace smt {
// Finalize
expr_ref result(m);
result = re_to_bool.find(r); // Assigns ownership of all exprs in
// re_to_bool for after this completes
result = re_to_accept.find(r); // Assigns ownership of all exprs in
// re_to_accept for after this completes
rewrite(result);
return result;
}
/*
Return a list of all leaves in the derivative of a regex r,
Return a list of all target regexes in the derivative of a regex r,
ignoring the conditions along each path.
Warning: Although the derivative
normal form tries to eliminate unsat condition paths, one cannot
assume that the path to each leaf is satisfiable in general
(e.g. when regexes are created using re.pred).
So not all results may correspond to satisfiable predicates.
It is OK to rely on the results being satisfiable for completeness,
but not soundness.
The derivative construction uses (:var 0) and tries
to eliminate unsat condition paths but it does not perform
full satisfiability checks and it is not guaranteed
that all targets are actually reachable
*/
void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) {
// Get derivative
sort* seq_sort = nullptr;
VERIFY(u().is_re(r, seq_sort));
expr_ref n(m.mk_fresh_const("re.char", seq_sort), m);
expr_ref hd = mk_first(r, n);
expr_ref d(m);
d = derivative_wrapper(hd, r);
void seq_regex::get_derivative_targets(expr* r, expr_ref_vector& targets) {
// constructs the derivative wrt (:var 0)
expr_ref d(seq_rw().mk_derivative(r), m);
// DFS
vector<expr*> to_visit;
to_visit.push_back(d);
obj_map<expr, bool> visited; // set<expr> (bool is used as a unit type)
while (to_visit.size() > 0) {
expr* e = to_visit.back();
to_visit.pop_back();
if (visited.contains(e)) continue;
visited.insert(e, true);
expr* econd = nullptr, *e1 = nullptr, *e2 = nullptr;
if (m.is_ite(e, econd, e1, e2) ||
re().is_union(e, e1, e2)) {
to_visit.push_back(e1);
to_visit.push_back(e2);
}
else if (!re().is_empty(e)) {
results.push_back(e);
STRACE("seq_regex_verbose", tout
<< "get_all_derivatives: added deriv: "
<< mk_pp(e, m) << std::endl;);
// use DFS to collect all the targets (leaf regexes) in d.
expr* _1 = nullptr, * e1 = nullptr, * e2 = nullptr;
obj_hashtable<expr>::entry* _2 = nullptr;
vector<expr*> workset;
workset.push_back(d);
obj_hashtable<expr> done;
done.insert(d);
while (workset.size() > 0) {
expr* e = workset.back();
workset.pop_back();
if (m.is_ite(e, _1, e1, e2) || re().is_union(e, e1, e2)) {
if (done.insert_if_not_there_core(e1, _2))
workset.push_back(e1);
if (done.insert_if_not_there_core(e2, _2))
workset.push_back(e2);
}
else if (!re().is_empty(e))
targets.push_back(e);
}
STRACE("seq_regex", tout << "Number of derivatives: "
<< results.size() << std::endl;);
STRACE("seq_regex_brief", tout << "#derivs=" << results.size() << " ";);
}
/*
@ -800,7 +789,7 @@ namespace smt {
th.add_axiom(~lit, ~th.mk_literal(is_nullable));
expr_ref hd = mk_first(r, n);
expr_ref d(m);
d = derivative_wrapper(hd, r);
d = mk_derivative_wrapper(hd, r);
literal_vector lits;
expr_ref_pair_vector cofactors(m);
get_cofactors(d, cofactors);
@ -884,7 +873,7 @@ namespace smt {
STRACE("state_graph",
if (!m_state_graph.is_seen(r_id))
tout << std::endl << "state(" << r_id << ") = " << seq_util::rex::pp(re(), r) << std::endl << "info(" << r_id << ") = " << re().get_info(r) << std::endl;);
tout << std::endl << "state(" << r_id << ") = " << re().to_str(r) << std::endl << "info(" << r_id << ") = " << re().get_info(r) << std::endl;);
// Add state
m_state_graph.add_state(r_id);
STRACE("seq_regex", tout << "Updating state graph for regex "
@ -900,14 +889,14 @@ namespace smt {
expr_ref_vector derivatives(m);
STRACE("seq_regex_verbose", tout
<< "getting all derivs: " << r_id << " " << std::endl;);
get_all_derivatives(r, derivatives);
get_derivative_targets(r, derivatives);
for (auto const& dr: derivatives) {
unsigned dr_id = get_state_id(dr);
STRACE("seq_regex_verbose", tout
<< std::endl << " traversing deriv: " << dr_id << " ";);
STRACE("state_graph",
if (!m_state_graph.is_seen(dr_id))
tout << "state(" << dr_id << ") = " << seq_util::rex::pp(re(), dr) << std::endl << "info(" << dr_id << ") = " << re().get_info(dr) << std::endl;);
tout << "state(" << dr_id << ") = " << re().to_str(dr) << std::endl << "info(" << dr_id << ") = " << re().get_info(dr) << std::endl;);
// Add state
m_state_graph.add_state(dr_id);
bool maybecycle = can_be_in_cycle(r, dr);

View file

@ -158,12 +158,12 @@ namespace smt {
expr_ref symmetric_diff(expr* r1, expr* r2);
expr_ref is_nullable_wrapper(expr* r);
expr_ref derivative_wrapper(expr* hd, expr* r);
expr_ref mk_derivative_wrapper(expr* hd, expr* r);
// Various support for unfolding derivative expressions that are
// returned by derivative_wrapper
expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
void get_all_derivatives(expr* r, expr_ref_vector& results);
void get_derivative_targets(expr* r, expr_ref_vector& targets);
void get_cofactors(expr* r, expr_ref_pair_vector& result);
void get_cofactors_rec(expr* r, expr_ref_vector& conds,
expr_ref_pair_vector& result);