3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-06 17:44:08 +00:00

Cleanup regex info and some fixes in Derivative code (#5709)

* removed unused regex info fields

* cleanup of info and fixes in antimirov derivatives

* removed extra qualification on operator
This commit is contained in:
Margus Veanes 2021-12-15 10:59:34 -08:00 committed by GitHub
parent 3b58f548f7
commit 2be93870c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 132 additions and 143 deletions

View file

@ -3115,8 +3115,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
result = mk_antimirov_deriv_union(c1, re().mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing()));
}
else if (m().is_ite(r, c, r1, r2)) {
c1 = simplify_path(m().mk_and(c, path));
c2 = simplify_path(m().mk_and(m().mk_not(c), path));
c1 = simplify_path(e, m().mk_and(c, path));
c2 = simplify_path(e, m().mk_and(m().mk_not(c), path));
if (m().is_false(c1))
result = mk_antimirov_deriv(e, r2, c2);
else if (m().is_false(c2))
@ -3131,8 +3131,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
SASSERT(u().is_char(c1));
SASSERT(u().is_char(c2));
// case: c1 <= e <= c2
range = simplify_path(m().mk_and(u().mk_le(c1, e), u().mk_le(e, c2)));
psi = simplify_path(m().mk_and(path, range));
range = simplify_path(e, m().mk_and(u().mk_le(c1, e), u().mk_le(e, c2)));
psi = simplify_path(e, m().mk_and(path, range));
}
else if (!str().is_string(r1) && str().is_unit_string(r2, c2)) {
SASSERT(u().is_char(c2));
@ -3141,8 +3141,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
expr_ref zero(m_autil.mk_int(0), m());
expr_ref r1_length_eq_one(m().mk_eq(str().mk_length(r1), one), m());
expr_ref r1_0(str().mk_nth_i(r1, zero), m());
range = simplify_path(m().mk_and(r1_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, c2))));
psi = simplify_path(m().mk_and(path, range));
range = simplify_path(e, m().mk_and(r1_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, c2))));
psi = simplify_path(e, m().mk_and(path, range));
}
else if (!str().is_string(r2) && str().is_unit_string(r1, c1)) {
SASSERT(u().is_char(c1));
@ -3151,8 +3151,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
expr_ref zero(m_autil.mk_int(0), m());
expr_ref r2_length_eq_one(m().mk_eq(str().mk_length(r2), one), m());
expr_ref r2_0(str().mk_nth_i(r2, zero), m());
range = simplify_path(m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(c1, e), u().mk_le(e, r2_0))));
psi = simplify_path(m().mk_and(path, range));
range = simplify_path(e, m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(c1, e), u().mk_le(e, r2_0))));
psi = simplify_path(e, m().mk_and(path, range));
}
else if (!str().is_string(r1) && !str().is_string(r2)) {
// both r1 and r2 nonground: |r1|=1 & |r2|=1 & r1[0] <= e <= r2[0]
@ -3162,8 +3162,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
expr_ref r1_0(str().mk_nth_i(r1, zero), m());
expr_ref r2_length_eq_one(m().mk_eq(str().mk_length(r2), one), m());
expr_ref r2_0(str().mk_nth_i(r2, zero), m());
range = simplify_path(m().mk_and(r1_length_eq_one, m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, r2_0)))));
psi = simplify_path(m().mk_and(path, range));
range = simplify_path(e, m().mk_and(r1_length_eq_one, m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, r2_0)))));
psi = simplify_path(e, m().mk_and(path, range));
}
if (m().is_false(psi))
result = nothing();
@ -3173,7 +3173,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
else if (re().is_union(r, r1, r2))
result = mk_antimirov_deriv_union(mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path));
else if (re().is_intersection(r, r1, r2))
result = mk_antimirov_deriv_intersection(
result = mk_antimirov_deriv_intersection(e,
mk_antimirov_deriv(e, r1, path),
mk_antimirov_deriv(e, r2, path), m().mk_true());
else if (re().is_star(r, r1) || re().is_plus(r, r1) || (re().is_loop(r, r1, lo) && 0 <= lo && lo <= 1))
@ -3190,11 +3190,11 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
result = mk_antimirov_deriv(e, r1, path);
else if (re().is_complement(r, r1))
// D(e,~r1) = ~D(e,r1)
result = mk_antimirov_deriv_negate(mk_antimirov_deriv(e, r1, path));
result = mk_antimirov_deriv_negate(e, mk_antimirov_deriv(e, r1, path));
else if (re().is_diff(r, r1, r2))
result = mk_antimirov_deriv_intersection(
result = mk_antimirov_deriv_intersection(e,
mk_antimirov_deriv(e, r1, path),
mk_antimirov_deriv_negate(mk_antimirov_deriv(e, r2, path)), m().mk_true());
mk_antimirov_deriv_negate(e, mk_antimirov_deriv(e, r2, path)), m().mk_true());
else if (re().is_of_pred(r, r1)) {
array_util array(m());
expr* args[2] = { r1, e };
@ -3207,36 +3207,47 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref
result = re().mk_derivative(e, r);
}
expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path) {
expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr* d2, expr* path) {
sort* seq_sort = nullptr, * ele_sort = nullptr;
VERIFY(m_util.is_re(d1, seq_sort));
VERIFY(m_util.is_seq(seq_sort, ele_sort));
expr_ref result(m());
expr* c, * a, * b;
if (d1 == d2 || re().is_full_seq(d2) || re().is_empty(d1))
//if (d1 == d2 || re().is_full_seq(d2) || re().is_empty(d1))
// result = d1;
//else if (re().is_full_seq(d1) || re().is_empty(d2))
// result = d2;
//else
if (re().is_empty(d1))
result = d1;
else if (re().is_full_seq(d1) || re().is_empty(d2))
else if (re().is_empty(d2))
result = d2;
else if (m().is_ite(d1, c, a, b)) {
expr_ref path_and_c(simplify_path(m().mk_and(path, c)), m());
expr_ref path_and_notc(simplify_path(m().mk_and(path, m().mk_not(c))), m());
expr_ref path_and_c(simplify_path(e, m().mk_and(path, c)), m());
expr_ref path_and_notc(simplify_path(e, m().mk_and(path, m().mk_not(c))), m());
if (m().is_false(path_and_c))
result = mk_antimirov_deriv_intersection(b, d2, path);
result = mk_antimirov_deriv_intersection(e, b, d2, path);
else if (m().is_false(path_and_notc))
result = mk_antimirov_deriv_intersection(a, d2, path);
result = mk_antimirov_deriv_intersection(e, a, d2, path);
else
result = m().mk_ite(c, mk_antimirov_deriv_intersection(a, d2, path_and_c),
mk_antimirov_deriv_intersection(b, d2, path_and_notc));
result = m().mk_ite(c, mk_antimirov_deriv_intersection(e, a, d2, path_and_c),
mk_antimirov_deriv_intersection(e, b, d2, path_and_notc));
}
else if (m().is_ite(d2))
// swap d1 and d2
result = mk_antimirov_deriv_intersection(d2, d1, path);
result = mk_antimirov_deriv_intersection(e, d2, d1, path);
else if (d1 == d2 || re().is_full_seq(d2))
result = mk_antimirov_deriv_restrict(e, d1, path);
else if (re().is_full_seq(d1))
result = mk_antimirov_deriv_restrict(e, d2, path);
else if (re().is_union(d1, a, b))
// distribute intersection over the union in d1
result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(a, d2, path), mk_antimirov_deriv_intersection(b, d2, path));
result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, a, d2, path),
mk_antimirov_deriv_intersection(e, b, d2, path));
else if (re().is_union(d2, a, b))
// distribute intersection over the union in d2
result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(d1, a, path), mk_antimirov_deriv_intersection(d1, b, path));
result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, d1, a, path),
mk_antimirov_deriv_intersection(e, d1, b, path));
else
// in all other cases create the intersection regex
// TODO: flatten, order and merge d1 and d2 to maintain equality under similarity
@ -3258,7 +3269,7 @@ expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) {
return result;
}
expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* d) {
expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) {
sort* seq_sort = nullptr;
VERIFY(m_util.is_re(d, seq_sort));
auto nothing = [&]() { return expr_ref(re().mk_empty(d->get_sort()), m()); };
@ -3276,11 +3287,12 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* d) {
else if (re().is_dot_plus(d))
result = epsilon();
else if (m().is_ite(d, c, t, e))
result = m().mk_ite(c, mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e));
result = m().mk_ite(c, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e));
else if (re().is_union(d, t, e))
result = re().mk_inter(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e));
//result = re().mk_inter(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e));
result = mk_antimirov_deriv_intersection(elem, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e), m().mk_true());
else if (re().is_intersection(d, t, e))
result = re().mk_union(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e));
result = re().mk_union(mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e));
else if (re().is_complement(d, t))
result = t;
else
@ -3304,6 +3316,45 @@ expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) {
return result;
}
// restrict the guards of all conditionals id d and simplify the resulting derivative
// restrict(if(c, a, b), cond) = if(c, restrict(a, cond & c), restrict(b, cond & ~c))
// restrict(a U b, cond) = restrict(a, cond) U restrict(b, cond)
// where {} U X = X, X U X = X
// restrict(R, cond) = R
//
// restrict(d, false) = []
//
// it is already assumed that the restriction takes place witin a branch
// so the condition is not added explicitly but propagated down in order to eliminate
// infeasible cases
expr_ref seq_rewriter::mk_antimirov_deriv_restrict(expr* e, expr* d, expr* cond) {
expr_ref result(d, m());
expr_ref _cond(cond, m());
expr* c, * a, * b;
if (m().is_false(cond))
result = re().mk_empty(d->get_sort());
else if (re().is_empty(d) || m().is_true(cond))
result = d;
else if (m().is_ite(d, c, a, b)) {
expr_ref path_and_c(simplify_path(e, m().mk_and(cond, c)), m());
expr_ref path_and_notc(simplify_path(e, m().mk_and(cond, m().mk_not(c))), m());
result = re().mk_ite_simplify(c, mk_antimirov_deriv_restrict(e, a, path_and_c),
mk_antimirov_deriv_restrict(e, b, path_and_notc));
}
else if (re().is_union(d, a, b)) {
expr_ref a1(mk_antimirov_deriv_restrict(e, a, cond), m());
expr_ref b1(mk_antimirov_deriv_restrict(e, b, cond), m());
if (a1 == b1 || re().is_empty(b1) || re().is_full_seq(a1))
result = a1;
else if (re().is_empty(a1) || re().is_full_seq(b1))
result = b1;
else
//TODO: merge
result = (a1->get_id() <= b1->get_id() ? re().mk_union(a1, b1) : re().mk_union(b1, a1));
}
return result;
}
expr_ref seq_rewriter::mk_regex_reverse(expr* r) {
expr* r1 = nullptr, * r2 = nullptr, * c = nullptr;
unsigned lo = 0, hi = 0;
@ -3390,21 +3441,18 @@ expr_ref seq_rewriter::mk_in_antimirov_rec(expr* s, expr* d) {
}
/*
path is typically a conjunction of (negated) character equations or constraints that can potentially be simplified
the first element of each equation is assumed to be the element parameter, for example x = (:var 0),
for example a constraint x='a' & x='b' is simplified to false
* calls elim_condition
*/
expr_ref seq_rewriter::simplify_path(expr* path) {
//TODO: more systematic simplifications
expr_ref seq_rewriter::simplify_path(expr* elem, expr* path) {
expr_ref result(path, m());
expr* h = nullptr, * t = nullptr, * lhs = nullptr, * rhs = nullptr, * h1 = nullptr;
expr* h = nullptr, * t = nullptr;
if (m().is_and(path, h, t)) {
if (m().is_true(h))
result = simplify_path(t);
result = simplify_path(elem, t);
else if (m().is_true(t))
result = simplify_path(h);
else if (m().is_eq(h, lhs, rhs) || (m().is_not(h, h1) && m().is_eq(h1, lhs, rhs)))
elim_condition(lhs, result);
result = simplify_path(elem, h);
else
elim_condition(elem, result);
}
return result;
}

View file

@ -214,14 +214,15 @@ class seq_rewriter {
expr_ref mk_in_antimirov_rec(expr* s, expr* d);
expr_ref mk_in_antimirov(expr* s, expr* d);
expr_ref mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path);
expr_ref mk_antimirov_deriv_intersection(expr* elem, expr* d1, expr* d2, expr* path);
expr_ref mk_antimirov_deriv_concat(expr* d, expr* r);
expr_ref mk_antimirov_deriv_negate(expr* d);
expr_ref mk_antimirov_deriv_negate(expr* elem, expr* d);
expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2);
expr_ref mk_antimirov_deriv_restrict(expr* elem, expr* d1, expr* cond);
expr_ref mk_regex_reverse(expr* r);
expr_ref mk_regex_concat(expr* r1, expr* r2);
expr_ref simplify_path(expr* path);
expr_ref simplify_path(expr* elem, expr* path);
bool lt_char(expr* ch1, expr* ch2);
bool eq_char(expr* ch1, expr* ch2);

View file

@ -1503,9 +1503,9 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
if (e->get_family_id() == u.get_family_id()) {
switch (e->get_decl()->get_decl_kind()) {
case OP_RE_EMPTY_SET:
return info(true, true, true, true, true, true, false, l_false, UINT_MAX, 0);
return info(true, l_false, UINT_MAX);
case OP_RE_FULL_SEQ_SET:
return info(true, true, true, true, true, true, false, l_true, 0, 1);
return info(true, l_true, 0);
case OP_RE_STAR:
i1 = get_info_rec(e->get_arg(0));
return i1.star();
@ -1517,7 +1517,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
case OP_RE_OF_PRED:
//TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
//TBD: check if the range is unsat
return info(true, true, true, true, true, true, true, l_false, 1, 0);
return info(true, l_false, 1);
case OP_RE_CONCAT:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
@ -1534,7 +1534,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
min_length = u.str.min_length(e->get_arg(0));
is_value = m.is_value(e->get_arg(0));
nullable = (is_value && min_length == 0 ? l_true : (min_length > 0 ? l_false : l_undef));
return info(true, true, is_value, true, true, true, (min_length == 1 && u.str.max_length(e->get_arg(0)) == 1), nullable, min_length, 0);
return info(is_value, nullable, min_length);
case OP_RE_REVERSE:
return get_info_rec(e->get_arg(0));
case OP_RE_PLUS:
@ -1570,14 +1570,7 @@ std::ostream& seq_util::rex::info::display(std::ostream& out) const {
if (is_known()) {
out << "info("
<< "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", "
<< "classical=" << (classical ? "T" : "F") << ", "
<< "standard=" << (standard ? "T" : "F") << ", "
<< "nonbranching=" << (nonbranching ? "T" : "F") << ", "
<< "normalized=" << (normalized ? "T" : "F") << ", "
<< "monadic=" << (monadic ? "T" : "F") << ", "
<< "singleton=" << (singleton ? "T" : "F") << ", "
<< "min_length=" << min_length << ", "
<< "star_height=" << star_height << ")";
<< "min_length=" << min_length << ")";
}
else if (is_valid())
out << "UNKNOWN";
@ -1597,13 +1590,13 @@ std::string seq_util::rex::info::str() const {
seq_util::rex::info seq_util::rex::info::star() const {
//if is_known() is false then all mentioned properties will remain false
return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height + 1);
return seq_util::rex::info(interpreted, l_true, 0);
}
seq_util::rex::info seq_util::rex::info::plus() const {
if (is_known()) {
//plus never occurs in a normalized regex
return info(classical, classical, interpreted, nonbranching, false, monadic, false, nullable, min_length, star_height + 1);
return info(interpreted, nullable, min_length);
}
else
return *this;
@ -1612,14 +1605,14 @@ seq_util::rex::info seq_util::rex::info::plus() const {
seq_util::rex::info seq_util::rex::info::opt() const {
// if is_known() is false then all mentioned properties will remain false
// optional construct never occurs in a normalized regex
return seq_util::rex::info(classical, classical, interpreted, nonbranching, false, monadic, false, l_true, 0, star_height);
return seq_util::rex::info(interpreted, l_true, 0);
}
seq_util::rex::info seq_util::rex::info::complement() const {
if (is_known()) {
lbool compl_nullable = (nullable == l_true ? l_false : (nullable == l_false ? l_true : l_undef));
unsigned compl_min_length = (compl_nullable == l_false ? 1 : 0);
return info(false, standard, interpreted, nonbranching, normalized, monadic, false, compl_nullable, compl_min_length, star_height);
return info(interpreted, compl_nullable, compl_min_length);
}
else
return *this;
@ -1631,16 +1624,9 @@ seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs,
unsigned m = min_length + rhs.min_length;
if (m < min_length || m < rhs.min_length)
m = UINT_MAX;
return info(classical & rhs.classical,
classical && rhs.classical, //both args of concat must be classical for it to be standard
interpreted && rhs.interpreted,
nonbranching && rhs.nonbranching,
(normalized && !lhs_is_concat && rhs.normalized),
monadic && rhs.monadic,
false,
return info(interpreted && rhs.interpreted,
((nullable == l_false || rhs.nullable == l_false) ? l_false : ((nullable == l_true && rhs.nullable == l_true) ? l_true : l_undef)),
m,
std::max(star_height, rhs.star_height));
m);
}
else
return rhs;
@ -1652,16 +1638,9 @@ seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs,
seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) const {
if (is_known() || rhs.is_known()) {
//works correctly if one of the arguments is unknown
return info(classical & rhs.classical,
standard && rhs.standard,
interpreted && rhs.interpreted,
nonbranching && rhs.nonbranching,
normalized && rhs.normalized,
monadic && rhs.monadic,
singleton && rhs.singleton,
return info(interpreted && rhs.interpreted,
((nullable == l_true || rhs.nullable == l_true) ? l_true : ((nullable == l_false && rhs.nullable == l_false) ? l_false : l_undef)),
std::min(min_length, rhs.min_length),
std::max(star_height, rhs.star_height));
std::min(min_length, rhs.min_length));
}
else
return rhs;
@ -1670,16 +1649,9 @@ seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) co
seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) const {
if (is_known()) {
if (rhs.is_known()) {
return info(false,
standard && rhs.standard,
interpreted && rhs.interpreted,
nonbranching && rhs.nonbranching,
normalized && rhs.normalized,
monadic && rhs.monadic,
singleton && rhs.singleton,
return info(interpreted && rhs.interpreted,
((nullable == l_true && rhs.nullable == l_true) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
std::max(min_length, rhs.min_length),
std::max(star_height, rhs.star_height));
std::max(min_length, rhs.min_length));
}
else
return rhs;
@ -1691,16 +1663,9 @@ seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) co
seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) const {
if (is_known()) {
if (rhs.is_known()) {
return info(false,
standard & rhs.standard,
interpreted & rhs.interpreted,
nonbranching & rhs.nonbranching,
normalized & rhs.normalized,
monadic & rhs.monadic,
false,
return info(interpreted & rhs.interpreted,
((nullable == l_true && rhs.nullable == l_false) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
std::max(min_length, rhs.min_length),
std::max(star_height, rhs.star_height));
std::max(min_length, rhs.min_length));
}
else
return rhs;
@ -1715,13 +1680,9 @@ seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) co
// unsigned ite_min_length = std::min(min_length, i.min_length);
// lbool ite_nullable = (nullable == i.nullable ? nullable : l_undef);
// TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
return info(false, false, false, false,
normalized && i.normalized,
monadic && i.monadic,
singleton && i.singleton,
return info(false,
((nullable == l_true && i.nullable == l_true) ? l_true : ((nullable == l_false && i.nullable == l_false) ? l_false : l_undef)),
std::min(min_length, i.min_length),
std::max(star_height, i.star_height));
std::min(min_length, i.min_length));
}
else
return i;
@ -1737,24 +1698,22 @@ seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) co
if (m > 0 && (m < min_length || m < lower))
m = UINT_MAX;
lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
if (upper == UINT_MAX) {
// this means the loop is r{lower,*} and is therefore not normalized
// normalized regex would be r{lower,lower}r* and would in particular not use r{0,} for r*
return info(classical, classical, interpreted, nonbranching, false, singleton, false, loop_nullable, m, star_height + 1);
}
else {
bool loop_normalized = normalized;
// r{lower,upper} is not normalized if r is nullable but lower > 0
// r{0,1} is not normalized: it should be ()|r
// r{1,1} is not normalized: it should be r
// r{lower,upper} is not normalized if lower > upper it should then be [] (empty)
if ((nullable == l_true && lower > 0) || upper == 1 || lower > upper)
loop_normalized = false;
return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height);
}
return info(interpreted, loop_nullable, m);
}
else
return *this;
}
seq_util::rex::info& seq_util::rex::info::operator=(info const& other) {
if (this == &other) {
return *this;
}
known = other.known;
interpreted = other.interpreted;
nullable = other.nullable;
min_length = other.min_length;
return *this;
}

View file

@ -411,26 +411,11 @@ public:
struct info {
/* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/
lbool known { l_undef };
/* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */
bool classical { false };
/* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */
bool standard { false };
/* There are no uninterpreted symbols. */
bool interpreted { false };
/* No if-then-else is used. */
bool nonbranching { false };
/* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */
bool normalized { false };
/* All bounded loops have a body that is a singleton. */
bool monadic { false };
/* Positive Boolean combination of ranges or predicates or singleton sequences. */
bool singleton { false };
/* If l_true then empty word is accepted, if l_false then empty word is not accepted. */
lbool nullable { l_undef };
/* Lower bound on the length of all accepted words. */
unsigned min_length { 0 };
/* Maximum nesting depth of Kleene stars. */
unsigned star_height { 0 };
/*
Default constructor of invalid info.
@ -445,19 +430,13 @@ public:
/*
General info constructor.
*/
info(bool is_classical,
bool is_standard,
bool is_interpreted,
bool is_nonbranching,
bool is_normalized,
bool is_monadic,
bool is_singleton,
info(bool is_interpreted,
lbool is_nullable,
unsigned min_l,
unsigned star_h) :
known(l_true), classical(is_classical), standard(is_standard), interpreted(is_interpreted), nonbranching(is_nonbranching),
normalized(is_normalized), monadic(is_monadic), singleton(is_singleton), nullable(is_nullable),
min_length(min_l), star_height(star_h) {}
unsigned min_l) :
known(l_true),
interpreted(is_interpreted),
nullable(is_nullable),
min_length(min_l) {}
/*
Appends a string representation of the info into the stream.
@ -483,6 +462,8 @@ public:
info diff(info const& rhs) const;
info orelse(info const& rhs) const;
info loop(unsigned lower, unsigned upper) const;
info& operator=(info const& other);
};
private:
seq_util& u;