From 5e5ef50dbc02a2496bbb4611917f4a6f35db63b5 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Sat, 22 Aug 2020 15:59:53 -0700 Subject: [PATCH] re info extension (#4659) * made loop info calculation more accurate * made loop info calculation more accurate * updated formattig and added const declarations --- src/ast/seq_decl_plugin.cpp | 47 ++++++++++++++++++++++++------------- src/ast/seq_decl_plugin.h | 38 +++++++++++++++--------------- 2 files changed, 50 insertions(+), 35 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 95f242685..df11184a4 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1639,7 +1639,7 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const { seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const { info i1, i2; lbool nullable(l_false); - unsigned min_length(0), lower_bound(0); + unsigned min_length(0), lower_bound(0), upper_bound(UINT_MAX); bool is_value(false); bool normalized(false); if (e->get_family_id() == u.get_family_id()) { @@ -1688,8 +1688,11 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const { return i1.complement(); case OP_RE_LOOP: i1 = get_info_rec(e->get_arg(0)); - lower_bound = e->get_decl()->get_parameter(0).get_int(); - return i1.loop(lower_bound); + if (e->get_decl()->get_num_parameters() >= 1) + lower_bound = e->get_decl()->get_parameter(0).get_int(); + if (e->get_decl()->get_num_parameters() == 2) + upper_bound = e->get_decl()->get_parameter(1).get_int(); + return i1.loop(lower_bound, upper_bound); case OP_RE_DIFF: i1 = get_info_rec(e->get_arg(0)); i2 = get_info_rec(e->get_arg(1)); @@ -1742,8 +1745,8 @@ seq_util::rex::info seq_util::rex::info::star() const { seq_util::rex::info seq_util::rex::info::plus() const { if (is_known()) { - //r+ is not normalized if r is nullable, the normalized form would be r* - return info(classical, classical, interpreted, nonbranching, (nullable == l_false ? normalized : false), monadic, false, nullable, min_length, star_height + 1); + //plus never occurs in a normalized regex + return info(classical, classical, interpreted, nonbranching, false, monadic, false, nullable, min_length, star_height + 1); } else return *this; @@ -1751,7 +1754,8 @@ seq_util::rex::info seq_util::rex::info::plus() const { seq_util::rex::info seq_util::rex::info::opt() const { //if is_known() is false then all mentioned properties will remain false - return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height); + //optional construct never occurs in a normalized regex + return seq_util::rex::info(classical, classical, interpreted, nonbranching, false, monadic, false, l_true, 0, star_height); } seq_util::rex::info seq_util::rex::info::complement() const { @@ -1764,7 +1768,7 @@ seq_util::rex::info seq_util::rex::info::complement() const { return *this; } -seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info & rhs, bool lhs_is_concat) const { +seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs, bool lhs_is_concat) const { if (is_known()) { if (rhs.is_known()) { unsigned m = min_length + rhs.min_length; @@ -1788,7 +1792,7 @@ seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info & rhs, bool return *this; } -seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info& rhs) const { +seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) const { if (is_known() || rhs.is_known()) { //works correctly if one of the arguments is unknown return info(classical & rhs.classical, @@ -1806,7 +1810,7 @@ seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info& rhs) const { return rhs; } -seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info& rhs) const { +seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) const { if (is_known()) { if (rhs.is_known()) { return info(false, @@ -1827,7 +1831,7 @@ seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info& rhs) const { return *this; } -seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info& rhs) const { +seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) const { if (is_known()) { if (rhs.is_known()) { return info(false, @@ -1848,7 +1852,7 @@ seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info& rhs) const { return *this; } -seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info& i) const { +seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) const { if (is_known()) { if (i.is_known()) { unsigned ite_min_length = std::min(min_length, i.min_length); @@ -1863,16 +1867,27 @@ seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info& i) const { return *this; } -seq_util::rex::info seq_util::rex::info::loop(unsigned lower) const { +seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) const { if (is_known()) { - //r{l,m} is not normalized if r is nullable but l > 0 unsigned m = min_length * lower; if (m > 0 && (m < min_length || m < lower)) m = UINT_MAX; - bool loop_normalized = (nullable == l_false && lower > 0 ? false : normalized); lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable); - //TBD: pass also upper bound and if upper bound is UINT_MAX then this is * and the loop is thus not normalized - return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height); + if (upper == UINT_MAX) { + //this means the loop is r{lower,*} and is therefore not normalized + //normalized regex would be r{lower,lower}r* and would in particular not use r{0,} for r* + return info(classical, classical, interpreted, nonbranching, false, singleton, false, loop_nullable, m, star_height + 1); + } + else { + bool loop_normalized = normalized; + //r{lower,upper} is not normalized if r is nullable but lower > 0 + //r{0,1} is not normalized: it should be ()|r + //r{1,1} is not normalized: it should be r + //r{lower,upper} is not normalized if lower > upper it should then be [] (empty) + if ((nullable == l_true && lower > 0) || upper == 1 || lower > upper) + loop_normalized = false; + return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height); + } } else return *this; diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index d94b1ec6f..0894d12f6 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -413,27 +413,27 @@ public: public: struct info { /* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/ - lbool known{ l_undef }; + lbool known { l_undef }; /* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */ - bool classical{ false }; + bool classical { false }; /* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */ - bool standard{ false }; + bool standard { false }; /* There are no uninterpreted symbols. */ - bool interpreted{ false }; + bool interpreted { false }; /* No if-then-else is used. */ - bool nonbranching{ false }; + bool nonbranching { false }; /* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */ - bool normalized{ false }; + bool normalized { false }; /* All bounded loops have a body that is a singleton. */ - bool monadic{ false }; + bool monadic { false }; /* Positive Boolean combination of ranges or predicates or singleton sequences. */ - bool singleton{ false }; + bool singleton { false }; /* If l_true then empty word is accepted, if l_false then empty word is not accepted. */ - lbool nullable{ l_undef }; + lbool nullable { l_undef }; /* Lower bound on the length of all accepted words. */ - unsigned min_length{ 0 }; + unsigned min_length { 0 }; /* Maximum nesting depth of Kleene stars. */ - unsigned star_height{ 0 }; + unsigned star_height { 0 }; /* Default constructor of invalid info. @@ -480,12 +480,12 @@ public: info plus() const; info opt() const; info complement() const; - info concat(info & rhs, bool lhs_is_concat) const; - info disj(info& rhs) const; - info conj(info& rhs) const; - info diff(info& rhs) const; - info orelse(info& rhs) const; - info loop(unsigned lower) const; + info concat(info const& rhs, bool lhs_is_concat) const; + info disj(info const& rhs) const; + info conj(info const& rhs) const; + info diff(info const& rhs) const; + info orelse(info const& rhs) const; + info loop(unsigned lower, unsigned upper) const; }; private: seq_util& u; @@ -493,8 +493,8 @@ public: family_id m_fid; vector mutable m_infos; expr_ref_vector mutable m_info_pinned; - info invalid_info{ info(l_undef) }; - info unknown_info{ info(l_false) }; + info invalid_info { info(l_undef) }; + info unknown_info { info(l_false) }; bool has_valid_info(expr* r) const; info get_info_rec(expr* r) const;