mirror of
https://github.com/Z3Prover/z3
synced 2025-04-06 17:44:08 +00:00
pp support for regex expressions in more-or-less standard syntax (#4638)
* pp support for regex expressions is more-or-less standard syntax * Regex solver updates (#4636) * std::cout debugging statements * comment out std::cout debugging as this is now a shared fork * convert std::cout to TRACE statements for seq_rewriter and seq_regex * add cases to min_length and max_length for regexes * bug fix * update min_length and max_length functions for REs * initial pass on simplifying derivative normal forms by eliminating redundant predicates locally * add seq_regex_brief trace statements * working on debugging ref count issue * fix ref count bug and convert trace statements to seq_regex_brief * add compact tracing for cache hits/misses * seq_regex fix cache hit/miss tracing and wrapper around is_nullable * minor * label and disable more experimental changes for testing * minor documentation / tracing * a few more @EXP annotations * dead state elimination skeleton code * progress on dead state elimination * more progress on dead state elimination * refactor dead state class to separate self-contained state_graph class * finish factoring state_graph to only work with unsigned values, and implement separate functionality for expr* logic * implement get_all_derivatives, add debug tracing * trace statements for debugging is_nullable loop bug * fix is_nullable loop bug * comment out local nullable change and mark experimental * pretty printing for state_graph * rewrite state graph to remove the fragile assumption that all edges from a state are added at a time * start of general cycle detection check + fix some comments * implement full cycle detection procedure * normalize derivative conditions to form 'ele <= a' * order derivative conditions by character code * fix confusing names m_to and m_from * assign increasing state IDs from 1 instead of using get_id on AST node * remove elim_condition call in get_dall_derivatives * use u_map instead of uint_map to avoid memory leak * remove unnecessary call to is_ground * debugging * small improvements to seq_regex_brief tracing * fix bug on evil2 example * save work * new propagate code * work in progress on using same seq sort for deriv calls * avoid re-computing derivatives: use same head var for every derivative call * use min_length on regexes to prune search * simple implementation of can_be_in_cycle using rank function idea * add a disabled experimental change * minor cleanup comments, etc. * seq_rewriter cleanup for PR * typo noticed by Nikolaj * move state graph to util/state_graph * re-add accidentally removed line * clean up seq_regex code removing obsolete functions and comments * a few more cleanup items * oops, missed merge change to fix compilation * disabled change to lift unions to the top level and treat them seperately in seq_regex solver * added get_overapprox_regex to over-approximate regex membership constraints * replace calls to is_epsilon with a centrally available method in seq_decl_plugin * simplifications and modifications in get_overapprox_regex and related * added approximation support for sequence expressions that use ite * removed is_app check that was redundant * tweak differences with upstream * rewrite derivative leaves * enable Antimorov-style derivatives via lifting unions in the solver * TODO placeholders for outputting state graph * change order in seq_regex propagate_in_re * implement a more restricted form of Antimorov derivatives via a special op code to indicate lifting unions * minor * new Antimorov optimizations based on BDD compatibility checking * seq regex tracing for # of derivatives * fix get_cofactors (currently this fix is buggy) * partially revert get_cofactors buggy change * re-implement get_cofactors to more efficiently explore nodes in the derivative expression * dgml generation for state graph * fix release build * improved dgml output * bug fixes in dgml generation * dot output support for state_graph and moved dgml and dot output under CASSERT * updated tracing of what regex corresponds to what state id with /tr:state_graph * clean up & document Antimorov derivative support * remove op cache tracing * remove re_rank experimental idea * small fix * fix Antimorov derivative (important change for the good performance) * remove unused and unnecessary code * implemented simpler efficient get_cofactors alternative mk_deriv_accept * simplifications in propagate_accept, and trace unusual cases * document the various seq_regex tracing & debugging command-line options * fix debug build (broken tracing) * guard eager Antimorov lifting for possible disabling * fix bug in propagate_accept Rule 1 * disable eager version of Antimorov lifting for performance reasons * remove some remaining obsolete comments Co-authored-by: calebstanford-msr <t-casta@microsoft.com> Co-authored-by: Margus Veanes <margus@microsoft.com> * typo Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> * took care of comments for related PR * #4637 Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> * build Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> * further PR comment fixes * updated detection of when parenthesis can be omitted to cover empty and epsilon * always reduce macro expansions in model evaluation #4588 Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> * fixed bug in seq_unit * pp support for regex expressions is more-or-less standard syntax * took care of comments for related PR * further PR comment fixes * updated detection of when parenthesis can be omitted to cover empty and epsilon * fixed bug in seq_unit Co-authored-by: Caleb Stanford <caleb.pirsquared@gmail.com> Co-authored-by: calebstanford-msr <t-casta@microsoft.com> Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
commit
363b416473
|
@ -1465,3 +1465,147 @@ bool seq_util::re::is_epsilon(expr* r) const {
|
|||
app* seq_util::re::mk_epsilon(sort* seq_sort) {
|
||||
return mk_to_re(u.str.mk_empty(seq_sort));
|
||||
}
|
||||
|
||||
/*
|
||||
Produces compact view of concrete concatenations such as (abcd).
|
||||
*/
|
||||
void seq_util::re::pp::compact_helper_seq(std::ostream& out, expr* s) const {
|
||||
SASSERT(re.u.is_seq(s));
|
||||
if (re.m.is_value(s)) {
|
||||
SASSERT(s->get_kind() == ast_kind::AST_APP);
|
||||
if (re.u.str.is_concat(s)) {
|
||||
expr_ref_vector es(re.m);
|
||||
re.u.str.get_concat(s, es);
|
||||
for (unsigned i = 0; i < es.size(); i++)
|
||||
if (re.u.str.is_unit(es.get(i)))
|
||||
seq_unit(out, es.get(i));
|
||||
else
|
||||
out << mk_pp(es.get(i), re.m);
|
||||
}
|
||||
else if (re.u.str.is_empty(s))
|
||||
out << "()";
|
||||
else
|
||||
seq_unit(out, s);
|
||||
}
|
||||
else
|
||||
out << mk_pp(s, re.m);
|
||||
}
|
||||
|
||||
/*
|
||||
Produces output such as [a-z] for a range.
|
||||
*/
|
||||
void seq_util::re::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s2) const {
|
||||
out << "[";
|
||||
if (re.u.str.is_unit(s1))
|
||||
seq_unit(out, s1);
|
||||
else
|
||||
out << mk_pp(s1, re.m);
|
||||
out << "-";
|
||||
if (re.u.str.is_unit(s2))
|
||||
seq_unit(out, s2);
|
||||
else
|
||||
out << mk_pp(s1, re.m);
|
||||
out << "]";
|
||||
}
|
||||
|
||||
/*
|
||||
Checks if parenthesis can be omitted in some cases in a loop body or in complement.
|
||||
*/
|
||||
bool seq_util::re::pp::can_skip_parenth(expr* r) const {
|
||||
expr* s;
|
||||
return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r) || re.is_empty(r) || re.is_epsilon(r));
|
||||
}
|
||||
|
||||
/*
|
||||
Specialize output for a unit sequence converting to visible ASCII characters if possible.
|
||||
*/
|
||||
void seq_util::re::pp::seq_unit(std::ostream& out, expr* s) const {
|
||||
expr* e;
|
||||
if (re.u.str.is_unit(s, e)) {
|
||||
rational r;
|
||||
unsigned sz;
|
||||
if (re.u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) {
|
||||
unsigned n = r.get_unsigned();
|
||||
if (32 < n && n < 127)
|
||||
out << (char)n;
|
||||
else if (n < 16)
|
||||
out << "\\x0" << std::hex << n;
|
||||
else
|
||||
out << "\\x" << std::hex << n;
|
||||
}
|
||||
else
|
||||
out << mk_pp(s, re.m);
|
||||
}
|
||||
else
|
||||
out << mk_pp(s, re.m);
|
||||
}
|
||||
|
||||
/*
|
||||
Pretty prints the regex r into the out stream
|
||||
*/
|
||||
std::ostream& seq_util::re::pp::display(std::ostream& out) const {
|
||||
expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
|
||||
unsigned lo = 0, hi = 0;
|
||||
if (re.is_full_char(e))
|
||||
return out << ".";
|
||||
else if (re.is_full_seq(e))
|
||||
return out << ".*";
|
||||
else if (re.is_to_re(e, s))
|
||||
{
|
||||
compact_helper_seq(out, s);
|
||||
return out;
|
||||
}
|
||||
else if (re.is_range(e, s, s2)) {
|
||||
compact_helper_range(out, s, s2);
|
||||
return out;
|
||||
}
|
||||
else if (re.is_epsilon(e))
|
||||
return out << "()";
|
||||
else if (re.is_empty(e))
|
||||
return out << "[]";
|
||||
else if (re.is_concat(e, r1, r2))
|
||||
return out << pp(re, r1) << pp(re, r2);
|
||||
else if (re.is_union(e, r1, r2))
|
||||
return out << pp(re, r1) << "|" << pp(re, r2);
|
||||
else if (re.is_intersection(e, r1, r2))
|
||||
return out << "(" << pp(re, r1) << ")&(" << pp(re, r2) << ")";
|
||||
else if (re.is_complement(e, r1))
|
||||
if (can_skip_parenth(r1))
|
||||
return out << "~" << pp(re, r1);
|
||||
else
|
||||
return out << "~(" << pp(re, r1) << ")";
|
||||
else if (re.is_plus(e, r1))
|
||||
if (can_skip_parenth(r1))
|
||||
return out << pp(re, r1) << "+";
|
||||
else
|
||||
return out << "(" << pp(re, r1) << ")+";
|
||||
else if (re.is_star(e, r1))
|
||||
if (can_skip_parenth(r1))
|
||||
return out << pp(re, r1) << "*";
|
||||
else
|
||||
return out << "(" << pp(re, r1) << ")*";
|
||||
else if (re.is_loop(e, r1, lo))
|
||||
if (can_skip_parenth(r1))
|
||||
return out << pp(re, r1) << "{" << lo << ",}";
|
||||
else
|
||||
return out << "(" << pp(re, r1) << "){" << lo << ",}";
|
||||
else if (re.is_loop(e, r1, lo, hi))
|
||||
if (can_skip_parenth(r1))
|
||||
return out << pp(re, r1) << "{" << lo << "," << hi << "}";
|
||||
else
|
||||
return out << "(" << pp(re, r1) << "){" << lo << "," << hi << "}";
|
||||
else if (re.is_diff(e, r1, r2))
|
||||
return out << "(" << pp(re, r1) << ")\\(" << pp(re, r2) << ")";
|
||||
else if (re.m.is_ite(e, s, r1, r2))
|
||||
return out << "if(" << mk_pp(s, re.m) << "," << pp(re, r1) << "," << pp(re, r2) << ")";
|
||||
else if (re.is_opt(e, r1))
|
||||
if (can_skip_parenth(r1))
|
||||
return out << pp(re, r1) << "?";
|
||||
else
|
||||
return out << "(" << pp(re, r1) << ")?";
|
||||
else if (re.is_reverse(e, r1))
|
||||
return out << "reverse(" << pp(re, r1) << ")";
|
||||
else
|
||||
// Else: derivative or is_of_pred
|
||||
return out << mk_pp(e, re.m);
|
||||
}
|
||||
|
|
|
@ -412,6 +412,7 @@ public:
|
|||
seq_util& u;
|
||||
ast_manager& m;
|
||||
family_id m_fid;
|
||||
|
||||
public:
|
||||
re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {}
|
||||
|
||||
|
@ -481,6 +482,20 @@ public:
|
|||
unsigned max_length(expr* r) const;
|
||||
bool is_epsilon(expr* r) const;
|
||||
app* mk_epsilon(sort* seq_sort);
|
||||
|
||||
class pp {
|
||||
seq_util::re& re;
|
||||
expr* e;
|
||||
bool can_skip_parenth(expr* r) const;
|
||||
void seq_unit(std::ostream& out, expr* s) const;
|
||||
void compact_helper_seq(std::ostream& out, expr* s) const;
|
||||
void compact_helper_range(std::ostream& out, expr* s1, expr* s2) const;
|
||||
|
||||
public:
|
||||
pp(seq_util::re& r, expr* e) : re(r), e(e) {}
|
||||
std::ostream& display(std::ostream&) const;
|
||||
};
|
||||
|
||||
};
|
||||
str str;
|
||||
re re;
|
||||
|
@ -499,4 +514,8 @@ public:
|
|||
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, seq_util::re::pp const & p) { return p.display(out); }
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -791,6 +791,10 @@ namespace smt {
|
|||
STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";);
|
||||
return false;
|
||||
}
|
||||
STRACE("seq_regex", tout << "Updating state graph for regex "
|
||||
<< mk_pp(r, m) << ") ";);
|
||||
if (!m_state_graph.is_seen(r_id))
|
||||
STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << seq_util::re::pp(re(), r) << std::endl;);
|
||||
// Add state
|
||||
m_state_graph.add_state(r_id);
|
||||
STRACE("state_graph", tout << "regex(" << r_id << ") = " << mk_pp(r, m) << std::endl;);
|
||||
|
@ -811,7 +815,10 @@ namespace smt {
|
|||
for (auto const& dr: derivatives) {
|
||||
unsigned dr_id = get_state_id(dr);
|
||||
STRACE("seq_regex_verbose", tout
|
||||
<< " traversing deriv: " << dr_id << " " << std::endl;);
|
||||
<< std::endl << " traversing deriv: " << dr_id << " ";);
|
||||
if (!m_state_graph.is_seen(dr_id))
|
||||
STRACE("state_graph", tout << "state(" << dr_id << ") = " << seq_util::re::pp(re(), dr) << std::endl;);
|
||||
// Add state
|
||||
m_state_graph.add_state(dr_id);
|
||||
STRACE("state_graph", tout << "regex(" << dr_id << ") = " << mk_pp(dr, m) << std::endl;);
|
||||
bool maybecycle = can_be_in_cycle(r, dr);
|
||||
|
|
Loading…
Reference in a new issue