mirror of
https://github.com/Z3Prover/z3
synced 2025-04-06 17:44:08 +00:00
extended calculation of info for regexes (#4656)
* extended calculation of info for regexes, updated tracing of state_graph with regex info * took care of PR comments and fixed some info calculation bugs * fix rlimit for clang-10 (#4658) * merge Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> * extended calculation of info for regexes, updated tracing of state_graph with regex info * took care of PR comments and fixed some info calculation bugs * added missing return statements, reordered def of compl to match declaration order of methods * fixed loop lower bound bug in loop info and default nullable value in invalid_info * fixed type bug: bool to lbool * trying to remove invisible control characters * renamed compl method (compl is a reserved c++ keyword) to complement Co-authored-by: Arie Gurfinkel <arie.gurfinkel@uwaterloo.ca> Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
commit
db65381f33
|
@ -1624,8 +1624,9 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
|
|||
if (result.is_valid())
|
||||
return result;
|
||||
if (!is_app(e))
|
||||
return invalid_info;
|
||||
result = mk_info_rec(to_app(e));
|
||||
result = unknown_info;
|
||||
else
|
||||
result = mk_info_rec(to_app(e));
|
||||
m_infos.setx(e->get_id(), result, invalid_info);
|
||||
STRACE("re_info", tout << "compute_info(" << pp(u.re, e) << ")=" << result << std::endl;);
|
||||
return result;
|
||||
|
@ -1637,67 +1638,244 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
|
|||
*/
|
||||
seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
|
||||
info i1, i2;
|
||||
lbool nullable(l_false);
|
||||
unsigned min_length(0), lower_bound(0);
|
||||
bool is_value(false);
|
||||
bool normalized(false);
|
||||
if (e->get_family_id() == u.get_family_id()) {
|
||||
switch (e->get_decl()->get_decl_kind())
|
||||
{
|
||||
case OP_RE_EMPTY_SET:
|
||||
return info(UINT_MAX);
|
||||
return info(true, true, true, true, true, true, false, l_false, UINT_MAX, 0);
|
||||
case OP_RE_FULL_SEQ_SET:
|
||||
return info(true, true, true, true, true, true, false, l_true, 0, 1);
|
||||
case OP_RE_STAR:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
return i1.star();
|
||||
case OP_RE_OPTION:
|
||||
return info(0);
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
return i1.opt();
|
||||
case OP_RE_RANGE:
|
||||
case OP_RE_FULL_CHAR_SET:
|
||||
case OP_RE_OF_PRED:
|
||||
return info(1);
|
||||
//TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
|
||||
//TBD: check if the range is unsat
|
||||
return info(true, true, true, true, true, true, true, l_false, 1, 0);
|
||||
case OP_RE_CONCAT:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
i2 = get_info_rec(e->get_arg(1));
|
||||
return info(u.max_plus(i1.min_length, i2.min_length));
|
||||
return i1.concat(i2, u.re.is_concat(e->get_arg(0)));
|
||||
case OP_RE_UNION:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
i2 = get_info_rec(e->get_arg(1));
|
||||
return info(std::min(i1.min_length, i2.min_length));
|
||||
return i1.disj(i2);
|
||||
case OP_RE_INTERSECT:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
i2 = get_info_rec(e->get_arg(1));
|
||||
return info(std::max(i1.min_length, i2.min_length));
|
||||
return i1.conj(i2);
|
||||
case OP_SEQ_TO_RE:
|
||||
return info(u.str.min_length(e->get_arg(0)));
|
||||
min_length = u.str.min_length(e->get_arg(0));
|
||||
is_value = m.is_value(e->get_arg(0));
|
||||
nullable = (is_value && min_length == 0 ? l_true : (min_length > 0 ? l_false : l_undef));
|
||||
return info(true, true, is_value, true, true, true, (min_length == 1 && u.str.max_length(e->get_arg(0)) == 1), nullable, min_length, 0);
|
||||
case OP_RE_REVERSE:
|
||||
case OP_RE_PLUS:
|
||||
return get_info_rec(e->get_arg(0));
|
||||
case OP_RE_PLUS:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
return i1.plus();
|
||||
case OP_RE_COMPLEMENT:
|
||||
return info(0);
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
return i1.complement();
|
||||
case OP_RE_LOOP:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
return info(u.max_mul(i1.min_length, e->get_decl()->get_parameter(0).get_int()));
|
||||
lower_bound = e->get_decl()->get_parameter(0).get_int();
|
||||
return i1.loop(lower_bound);
|
||||
case OP_RE_DIFF:
|
||||
i1 = get_info_rec(e->get_arg(0));
|
||||
i2 = get_info_rec(e->get_arg(1));
|
||||
return info(std::max(i1.min_length, i2.min_length > 0 ? 0 : UINT_MAX));
|
||||
return i1.diff(i2);
|
||||
}
|
||||
return invalid_info;
|
||||
return unknown_info;
|
||||
}
|
||||
expr* c, * t, * f;
|
||||
if (u.m.is_ite(e, c, t, f)) {
|
||||
i1 = get_info_rec(t);
|
||||
i2 = get_info_rec(f);
|
||||
return info(std::min(i1.min_length, i2.min_length));
|
||||
return i1.orelse(i2);
|
||||
}
|
||||
return invalid_info;
|
||||
return unknown_info;
|
||||
}
|
||||
|
||||
std::ostream& seq_util::rex::info::display(std::ostream& out) const {
|
||||
if (valid)
|
||||
out << "info(" << "min_length=" << min_length << ")";
|
||||
if (is_known()) {
|
||||
out << "info("
|
||||
<< "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", "
|
||||
<< "classical=" << (classical ? "T" : "F") << ", "
|
||||
<< "standard=" << (standard ? "T" : "F") << ", "
|
||||
<< "nonbranching=" << (nonbranching ? "T" : "F") << ", "
|
||||
<< "normalized=" << (normalized ? "T" : "F") << ", "
|
||||
<< "monadic=" << (monadic ? "T" : "F") << ", "
|
||||
<< "singleton=" << (singleton ? "T" : "F") << ", "
|
||||
<< "min_length=" << min_length << ", "
|
||||
<< "star_height=" << star_height << ")";
|
||||
}
|
||||
else if (is_valid())
|
||||
out << "UNKNOWN";
|
||||
else
|
||||
out << "invalid_info";
|
||||
out << "INVALID";
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
String representation of the info.
|
||||
*/
|
||||
std::string seq_util::rex::info::str() const {
|
||||
std::ostringstream out;
|
||||
display(out);
|
||||
return out.str();
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::star() const {
|
||||
//if is_known() is false then all mentioned properties will remain false
|
||||
return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height + 1);
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::plus() const {
|
||||
if (is_known()) {
|
||||
//r+ is not normalized if r is nullable, the normalized form would be r*
|
||||
return info(classical, classical, interpreted, nonbranching, (nullable == l_false ? normalized : false), monadic, false, nullable, min_length, star_height + 1);
|
||||
}
|
||||
else
|
||||
return *this;
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::opt() const {
|
||||
//if is_known() is false then all mentioned properties will remain false
|
||||
return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height);
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::complement() const {
|
||||
if (is_known()) {
|
||||
lbool compl_nullable = (nullable == l_true ? l_false : (nullable == l_false ? l_true : l_undef));
|
||||
unsigned compl_min_length = (compl_nullable == l_false ? 1 : 0);
|
||||
return info(false, standard, interpreted, nonbranching, normalized, monadic, false, compl_nullable, compl_min_length, star_height);
|
||||
}
|
||||
else
|
||||
return *this;
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info & rhs, bool lhs_is_concat) const {
|
||||
if (is_known()) {
|
||||
if (rhs.is_known()) {
|
||||
unsigned m = min_length + rhs.min_length;
|
||||
if (m < min_length || m < rhs.min_length)
|
||||
m = UINT_MAX;
|
||||
return info(classical & rhs.classical,
|
||||
classical && rhs.classical, //both args of concat must be classical for it to be standard
|
||||
interpreted && rhs.interpreted,
|
||||
nonbranching && rhs.nonbranching,
|
||||
(normalized && !lhs_is_concat && rhs.normalized),
|
||||
monadic && rhs.monadic,
|
||||
false,
|
||||
((nullable == l_false || rhs.nullable == l_false) ? l_false : ((nullable == l_true && rhs.nullable == l_true) ? l_true : l_undef)),
|
||||
m,
|
||||
std::max(star_height, rhs.star_height));
|
||||
}
|
||||
else
|
||||
return rhs;
|
||||
}
|
||||
else
|
||||
return *this;
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info& rhs) const {
|
||||
if (is_known() || rhs.is_known()) {
|
||||
//works correctly if one of the arguments is unknown
|
||||
return info(classical & rhs.classical,
|
||||
standard && rhs.standard,
|
||||
interpreted && rhs.interpreted,
|
||||
nonbranching && rhs.nonbranching,
|
||||
normalized && rhs.normalized,
|
||||
monadic && rhs.monadic,
|
||||
singleton && rhs.singleton,
|
||||
((nullable == l_true || rhs.nullable == l_true) ? l_true : ((nullable == l_false && rhs.nullable == l_false) ? l_false : l_undef)),
|
||||
std::min(min_length, rhs.min_length),
|
||||
std::max(star_height, rhs.star_height));
|
||||
}
|
||||
else
|
||||
return rhs;
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info& rhs) const {
|
||||
if (is_known()) {
|
||||
if (rhs.is_known()) {
|
||||
return info(false,
|
||||
standard && rhs.standard,
|
||||
interpreted && rhs.interpreted,
|
||||
nonbranching && rhs.nonbranching,
|
||||
normalized && rhs.normalized,
|
||||
monadic && rhs.monadic,
|
||||
singleton && rhs.singleton,
|
||||
((nullable == l_true && rhs.nullable == l_true) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
|
||||
std::max(min_length, rhs.min_length),
|
||||
std::max(star_height, rhs.star_height));
|
||||
}
|
||||
else
|
||||
return rhs;
|
||||
}
|
||||
else
|
||||
return *this;
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info& rhs) const {
|
||||
if (is_known()) {
|
||||
if (rhs.is_known()) {
|
||||
return info(false,
|
||||
standard & rhs.standard,
|
||||
interpreted & rhs.interpreted,
|
||||
nonbranching & rhs.nonbranching,
|
||||
normalized & rhs.normalized,
|
||||
monadic & rhs.monadic,
|
||||
false,
|
||||
((nullable == l_true && rhs.nullable == l_false) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
|
||||
std::max(min_length, rhs.min_length),
|
||||
std::max(star_height, rhs.star_height));
|
||||
}
|
||||
else
|
||||
return rhs;
|
||||
}
|
||||
else
|
||||
return *this;
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info& i) const {
|
||||
if (is_known()) {
|
||||
if (i.is_known()) {
|
||||
unsigned ite_min_length = std::min(min_length, i.min_length);
|
||||
lbool ite_nullable = (nullable == i.nullable ? nullable : l_undef);
|
||||
//TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
|
||||
return info(false, false, false, false, normalized && i.normalized, monadic && i.monadic, singleton && i.singleton, nullable, min_length, std::max(star_height, i.star_height));
|
||||
}
|
||||
else
|
||||
return i;
|
||||
}
|
||||
else
|
||||
return *this;
|
||||
}
|
||||
|
||||
seq_util::rex::info seq_util::rex::info::loop(unsigned lower) const {
|
||||
if (is_known()) {
|
||||
//r{l,m} is not normalized if r is nullable but l > 0
|
||||
unsigned m = min_length * lower;
|
||||
if (m > 0 && (m < min_length || m < lower))
|
||||
m = UINT_MAX;
|
||||
bool loop_normalized = (nullable == l_false && lower > 0 ? false : normalized);
|
||||
lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
|
||||
//TBD: pass also upper bound and if upper bound is UINT_MAX then this is * and the loop is thus not normalized
|
||||
return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height);
|
||||
}
|
||||
else
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ Revision History:
|
|||
#include "ast/ast.h"
|
||||
#include "ast/bv_decl_plugin.h"
|
||||
#include <string>
|
||||
#include "util/lbool.h"
|
||||
|
||||
#define Z3_USE_UNICODE 0
|
||||
|
||||
|
@ -411,13 +412,80 @@ public:
|
|||
class rex {
|
||||
public:
|
||||
struct info {
|
||||
bool valid;
|
||||
unsigned min_length;
|
||||
info() : valid(false), min_length(0) {}
|
||||
info(unsigned k) : valid(true), min_length(k) {}
|
||||
bool is_valid() { return valid; }
|
||||
/* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/
|
||||
lbool known{ l_undef };
|
||||
/* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */
|
||||
bool classical{ false };
|
||||
/* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */
|
||||
bool standard{ false };
|
||||
/* There are no uninterpreted symbols. */
|
||||
bool interpreted{ false };
|
||||
/* No if-then-else is used. */
|
||||
bool nonbranching{ false };
|
||||
/* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */
|
||||
bool normalized{ false };
|
||||
/* All bounded loops have a body that is a singleton. */
|
||||
bool monadic{ false };
|
||||
/* Positive Boolean combination of ranges or predicates or singleton sequences. */
|
||||
bool singleton{ false };
|
||||
/* If l_true then empty word is accepted, if l_false then empty word is not accepted. */
|
||||
lbool nullable{ l_undef };
|
||||
/* Lower bound on the length of all accepted words. */
|
||||
unsigned min_length{ 0 };
|
||||
/* Maximum nesting depth of Kleene stars. */
|
||||
unsigned star_height{ 0 };
|
||||
|
||||
/*
|
||||
Default constructor of invalid info.
|
||||
*/
|
||||
info() {}
|
||||
|
||||
/*
|
||||
Used for constructing either an invalid info that is only used to indicate uninitialzed entry, or valid but unknown info value.
|
||||
*/
|
||||
info(lbool is_known) : known(is_known) {}
|
||||
|
||||
/*
|
||||
General info constructor.
|
||||
*/
|
||||
info(bool is_classical,
|
||||
bool is_standard,
|
||||
bool is_interpreted,
|
||||
bool is_nonbranching,
|
||||
bool is_normalized,
|
||||
bool is_monadic,
|
||||
bool is_singleton,
|
||||
lbool is_nullable,
|
||||
unsigned min_l,
|
||||
unsigned star_h) :
|
||||
known(l_true), classical(is_classical), standard(is_standard), interpreted(is_interpreted), nonbranching(is_nonbranching),
|
||||
normalized(is_normalized), monadic(is_monadic), singleton(is_singleton), nullable(is_nullable),
|
||||
min_length(min_l), star_height(star_h) {}
|
||||
|
||||
/*
|
||||
Appends a string representation of the info into the stream.
|
||||
*/
|
||||
std::ostream& display(std::ostream&) const;
|
||||
|
||||
/*
|
||||
Returns a string representation of the info.
|
||||
*/
|
||||
std::string str() const;
|
||||
|
||||
bool is_valid() const { return known != l_undef; }
|
||||
|
||||
bool is_known() const { return known == l_true; }
|
||||
|
||||
info star() const;
|
||||
info plus() const;
|
||||
info opt() const;
|
||||
info complement() const;
|
||||
info concat(info & rhs, bool lhs_is_concat) const;
|
||||
info disj(info& rhs) const;
|
||||
info conj(info& rhs) const;
|
||||
info diff(info& rhs) const;
|
||||
info orelse(info& rhs) const;
|
||||
info loop(unsigned lower) const;
|
||||
};
|
||||
private:
|
||||
seq_util& u;
|
||||
|
@ -425,7 +493,8 @@ public:
|
|||
family_id m_fid;
|
||||
vector<info> mutable m_infos;
|
||||
expr_ref_vector mutable m_info_pinned;
|
||||
info invalid_info;
|
||||
info invalid_info{ info(l_undef) };
|
||||
info unknown_info{ info(l_false) };
|
||||
|
||||
bool has_valid_info(expr* r) const;
|
||||
info get_info_rec(expr* r) const;
|
||||
|
@ -536,3 +605,4 @@ public:
|
|||
inline std::ostream& operator<<(std::ostream& out, seq_util::rex::pp const & p) { return p.display(out); }
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, seq_util::rex::info const& p) { return p.display(out); }
|
||||
|
||||
|
|
|
@ -793,11 +793,12 @@ namespace smt {
|
|||
}
|
||||
STRACE("seq_regex", tout << "Updating state graph for regex "
|
||||
<< mk_pp(r, m) << ") ";);
|
||||
if (!m_state_graph.is_seen(r_id))
|
||||
STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << seq_util::rex::pp(re(), r) << std::endl;);
|
||||
|
||||
STRACE("state_graph",
|
||||
if (!m_state_graph.is_seen(r_id))
|
||||
tout << std::endl << "state(" << r_id << ") = " << seq_util::rex::pp(re(), r) << std::endl << "info(" << r_id << ") = " << re().get_info(r) << std::endl;);
|
||||
// Add state
|
||||
m_state_graph.add_state(r_id);
|
||||
STRACE("state_graph", tout << "regex(" << r_id << ") = " << mk_pp(r, m) << std::endl;);
|
||||
STRACE("seq_regex", tout << "Updating state graph for regex "
|
||||
<< mk_pp(r, m) << ") " << std::endl;);
|
||||
STRACE("seq_regex_brief", tout << std::endl << "USG("
|
||||
|
@ -815,12 +816,12 @@ namespace smt {
|
|||
for (auto const& dr: derivatives) {
|
||||
unsigned dr_id = get_state_id(dr);
|
||||
STRACE("seq_regex_verbose", tout
|
||||
<< std::endl << " traversing deriv: " << dr_id << " ";);
|
||||
if (!m_state_graph.is_seen(dr_id))
|
||||
STRACE("state_graph", tout << "state(" << dr_id << ") = " << seq_util::rex::pp(re(), dr) << std::endl;);
|
||||
<< std::endl << " traversing deriv: " << dr_id << " ";);
|
||||
STRACE("state_graph",
|
||||
if (!m_state_graph.is_seen(dr_id))
|
||||
tout << "state(" << dr_id << ") = " << seq_util::rex::pp(re(), dr) << std::endl << "info(" << dr_id << ") = " << re().get_info(dr) << std::endl;);
|
||||
// Add state
|
||||
m_state_graph.add_state(dr_id);
|
||||
STRACE("state_graph", tout << "regex(" << dr_id << ") = " << mk_pp(dr, m) << std::endl;);
|
||||
bool maybecycle = can_be_in_cycle(r, dr);
|
||||
m_state_graph.add_edge(r_id, dr_id, maybecycle);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue