3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-12 12:08:18 +00:00

re info extension (#4659)

* made loop info calculation more accurate

* made loop info calculation more accurate

* updated formattig and added const declarations
This commit is contained in:
Margus Veanes 2020-08-22 15:59:53 -07:00 committed by GitHub
parent a58b8ceced
commit 5e5ef50dbc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 50 additions and 35 deletions

View file

@ -1639,7 +1639,7 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const { seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
info i1, i2; info i1, i2;
lbool nullable(l_false); lbool nullable(l_false);
unsigned min_length(0), lower_bound(0); unsigned min_length(0), lower_bound(0), upper_bound(UINT_MAX);
bool is_value(false); bool is_value(false);
bool normalized(false); bool normalized(false);
if (e->get_family_id() == u.get_family_id()) { if (e->get_family_id() == u.get_family_id()) {
@ -1688,8 +1688,11 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
return i1.complement(); return i1.complement();
case OP_RE_LOOP: case OP_RE_LOOP:
i1 = get_info_rec(e->get_arg(0)); i1 = get_info_rec(e->get_arg(0));
lower_bound = e->get_decl()->get_parameter(0).get_int(); if (e->get_decl()->get_num_parameters() >= 1)
return i1.loop(lower_bound); lower_bound = e->get_decl()->get_parameter(0).get_int();
if (e->get_decl()->get_num_parameters() == 2)
upper_bound = e->get_decl()->get_parameter(1).get_int();
return i1.loop(lower_bound, upper_bound);
case OP_RE_DIFF: case OP_RE_DIFF:
i1 = get_info_rec(e->get_arg(0)); i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1)); i2 = get_info_rec(e->get_arg(1));
@ -1742,8 +1745,8 @@ seq_util::rex::info seq_util::rex::info::star() const {
seq_util::rex::info seq_util::rex::info::plus() const { seq_util::rex::info seq_util::rex::info::plus() const {
if (is_known()) { if (is_known()) {
//r+ is not normalized if r is nullable, the normalized form would be r* //plus never occurs in a normalized regex
return info(classical, classical, interpreted, nonbranching, (nullable == l_false ? normalized : false), monadic, false, nullable, min_length, star_height + 1); return info(classical, classical, interpreted, nonbranching, false, monadic, false, nullable, min_length, star_height + 1);
} }
else else
return *this; return *this;
@ -1751,7 +1754,8 @@ seq_util::rex::info seq_util::rex::info::plus() const {
seq_util::rex::info seq_util::rex::info::opt() const { seq_util::rex::info seq_util::rex::info::opt() const {
//if is_known() is false then all mentioned properties will remain false //if is_known() is false then all mentioned properties will remain false
return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height); //optional construct never occurs in a normalized regex
return seq_util::rex::info(classical, classical, interpreted, nonbranching, false, monadic, false, l_true, 0, star_height);
} }
seq_util::rex::info seq_util::rex::info::complement() const { seq_util::rex::info seq_util::rex::info::complement() const {
@ -1764,7 +1768,7 @@ seq_util::rex::info seq_util::rex::info::complement() const {
return *this; return *this;
} }
seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info & rhs, bool lhs_is_concat) const { seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs, bool lhs_is_concat) const {
if (is_known()) { if (is_known()) {
if (rhs.is_known()) { if (rhs.is_known()) {
unsigned m = min_length + rhs.min_length; unsigned m = min_length + rhs.min_length;
@ -1788,7 +1792,7 @@ seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info & rhs, bool
return *this; return *this;
} }
seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info& rhs) const { seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) const {
if (is_known() || rhs.is_known()) { if (is_known() || rhs.is_known()) {
//works correctly if one of the arguments is unknown //works correctly if one of the arguments is unknown
return info(classical & rhs.classical, return info(classical & rhs.classical,
@ -1806,7 +1810,7 @@ seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info& rhs) const {
return rhs; return rhs;
} }
seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info& rhs) const { seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) const {
if (is_known()) { if (is_known()) {
if (rhs.is_known()) { if (rhs.is_known()) {
return info(false, return info(false,
@ -1827,7 +1831,7 @@ seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info& rhs) const {
return *this; return *this;
} }
seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info& rhs) const { seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) const {
if (is_known()) { if (is_known()) {
if (rhs.is_known()) { if (rhs.is_known()) {
return info(false, return info(false,
@ -1848,7 +1852,7 @@ seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info& rhs) const {
return *this; return *this;
} }
seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info& i) const { seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) const {
if (is_known()) { if (is_known()) {
if (i.is_known()) { if (i.is_known()) {
unsigned ite_min_length = std::min(min_length, i.min_length); unsigned ite_min_length = std::min(min_length, i.min_length);
@ -1863,16 +1867,27 @@ seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info& i) const {
return *this; return *this;
} }
seq_util::rex::info seq_util::rex::info::loop(unsigned lower) const { seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) const {
if (is_known()) { if (is_known()) {
//r{l,m} is not normalized if r is nullable but l > 0
unsigned m = min_length * lower; unsigned m = min_length * lower;
if (m > 0 && (m < min_length || m < lower)) if (m > 0 && (m < min_length || m < lower))
m = UINT_MAX; m = UINT_MAX;
bool loop_normalized = (nullable == l_false && lower > 0 ? false : normalized);
lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable); lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
//TBD: pass also upper bound and if upper bound is UINT_MAX then this is * and the loop is thus not normalized if (upper == UINT_MAX) {
return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height); //this means the loop is r{lower,*} and is therefore not normalized
//normalized regex would be r{lower,lower}r* and would in particular not use r{0,} for r*
return info(classical, classical, interpreted, nonbranching, false, singleton, false, loop_nullable, m, star_height + 1);
}
else {
bool loop_normalized = normalized;
//r{lower,upper} is not normalized if r is nullable but lower > 0
//r{0,1} is not normalized: it should be ()|r
//r{1,1} is not normalized: it should be r
//r{lower,upper} is not normalized if lower > upper it should then be [] (empty)
if ((nullable == l_true && lower > 0) || upper == 1 || lower > upper)
loop_normalized = false;
return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height);
}
} }
else else
return *this; return *this;

View file

@ -413,27 +413,27 @@ public:
public: public:
struct info { struct info {
/* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/ /* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/
lbool known{ l_undef }; lbool known { l_undef };
/* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */ /* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */
bool classical{ false }; bool classical { false };
/* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */ /* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */
bool standard{ false }; bool standard { false };
/* There are no uninterpreted symbols. */ /* There are no uninterpreted symbols. */
bool interpreted{ false }; bool interpreted { false };
/* No if-then-else is used. */ /* No if-then-else is used. */
bool nonbranching{ false }; bool nonbranching { false };
/* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */ /* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */
bool normalized{ false }; bool normalized { false };
/* All bounded loops have a body that is a singleton. */ /* All bounded loops have a body that is a singleton. */
bool monadic{ false }; bool monadic { false };
/* Positive Boolean combination of ranges or predicates or singleton sequences. */ /* Positive Boolean combination of ranges or predicates or singleton sequences. */
bool singleton{ false }; bool singleton { false };
/* If l_true then empty word is accepted, if l_false then empty word is not accepted. */ /* If l_true then empty word is accepted, if l_false then empty word is not accepted. */
lbool nullable{ l_undef }; lbool nullable { l_undef };
/* Lower bound on the length of all accepted words. */ /* Lower bound on the length of all accepted words. */
unsigned min_length{ 0 }; unsigned min_length { 0 };
/* Maximum nesting depth of Kleene stars. */ /* Maximum nesting depth of Kleene stars. */
unsigned star_height{ 0 }; unsigned star_height { 0 };
/* /*
Default constructor of invalid info. Default constructor of invalid info.
@ -480,12 +480,12 @@ public:
info plus() const; info plus() const;
info opt() const; info opt() const;
info complement() const; info complement() const;
info concat(info & rhs, bool lhs_is_concat) const; info concat(info const& rhs, bool lhs_is_concat) const;
info disj(info& rhs) const; info disj(info const& rhs) const;
info conj(info& rhs) const; info conj(info const& rhs) const;
info diff(info& rhs) const; info diff(info const& rhs) const;
info orelse(info& rhs) const; info orelse(info const& rhs) const;
info loop(unsigned lower) const; info loop(unsigned lower, unsigned upper) const;
}; };
private: private:
seq_util& u; seq_util& u;
@ -493,8 +493,8 @@ public:
family_id m_fid; family_id m_fid;
vector<info> mutable m_infos; vector<info> mutable m_infos;
expr_ref_vector mutable m_info_pinned; expr_ref_vector mutable m_info_pinned;
info invalid_info{ info(l_undef) }; info invalid_info { info(l_undef) };
info unknown_info{ info(l_false) }; info unknown_info { info(l_false) };
bool has_valid_info(expr* r) const; bool has_valid_info(expr* r) const;
info get_info_rec(expr* r) const; info get_info_rec(expr* r) const;