3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-07-20 11:22:04 +00:00

Updated regex derivative engine (#5567)

* updated derivative engine

* some edit

* further improvements in derivative code

* more deriv code edits and re::to_str update

* optimized mk_deriv_accept

* fixed PR comments

* small syntax fix

* updated some simplifications

* bugfix:forgot to_re before reverse

* fixed PR comments

* more PR comment fixes

* more PR comment fixes

* forgot to delete

* deleting unused definition

* fixes

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* fixes

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Margus Veanes 2021-10-08 13:04:49 -07:00 committed by GitHub
parent c0c3e685e7
commit 146f4621c5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 893 additions and 280 deletions

View file

@ -839,7 +839,7 @@ bool seq_util::str::is_nth_i(expr const* n, expr*& s, unsigned& idx) const {
return arith_util(m).is_unsigned(i, idx);
}
app* seq_util::str::mk_nth_i(expr* s, unsigned i) const {
app* seq_util::str::mk_nth_c(expr* s, unsigned i) const {
return mk_nth_i(s, arith_util(m).mk_int(i));
}
@ -854,6 +854,48 @@ void seq_util::str::get_concat(expr* e, expr_ref_vector& es) const {
}
}
/*
Returns true if s is an expression of the form (l = |u|) |u|-k or (-k)+|u| or |u|+(-k).
Also returns true and assigns k=0 and l=s if s is |u|.
*/
bool seq_util::str::is_len_sub(expr const* s, expr*& l, expr*& u, rational& k) const {
expr* x;
rational v;
arith_util a(m);
if (is_length(s, l)) {
k = 0;
return true;
}
else if (a.is_sub(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonneg()) {
k = v;
return true;
}
else if (a.is_add(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
k = - v;
return true;
}
else if (a.is_add(s, x, l) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
k = - v;
return true;
}
else
return false;
}
bool seq_util::str::is_unit_string(expr const* s, expr_ref& c) const {
zstring z;
expr* ch = nullptr;
if (is_string(s, z) && z.length() == 1) {
c = mk_char(z[0]);
return true;
}
else if (is_unit(s, ch)) {
c = ch;
return true;
}
return false;
}
void seq_util::str::get_concat_units(expr* e, expr_ref_vector& es) const {
expr* e1, *e2;
while (is_concat(e, e1, e2)) {
@ -876,8 +918,6 @@ app* seq_util::str::mk_is_empty(expr* s) const {
return m.mk_eq(s, mk_empty(s->get_sort()));
}
unsigned seq_util::str::min_length(expr* s) const {
SASSERT(u.is_seq(s));
unsigned result = 0;
@ -1065,38 +1105,71 @@ app* seq_util::rex::mk_epsilon(sort* seq_sort) {
/*
Produces compact view of concrete concatenations such as (abcd).
*/
std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) const {
std::ostream& seq_util::rex::pp::print_seq(std::ostream& out, expr* s) const {
SASSERT(re.u.is_seq(s));
zstring z;
expr* x, * j, * k, * l, * i, * x_;
if (re.u.str.is_empty(s))
out << "()";
else if (re.u.str.is_unit(s))
seq_unit(out, s);
print_unit(out, s);
else if (re.u.str.is_concat(s)) {
expr_ref_vector es(re.m);
re.u.str.get_concat(s, es);
for (expr* e : es)
compact_helper_seq(out, e);
print_seq(out, e);
}
else if (re.u.str.is_string(s, z)) {
for (unsigned i = 0; i < z.length(); i++)
out << (char)z[i];
}
//using braces to indicate 'full' output
//for example an uninterpreted constant X will be printed as {X}
//while a unit sequence "X" will be printed as X
//thus for example (concat "X" "Y" Z "W") where Z is uninterpreted is printed as XY{Z}W
else out << "{" << mk_pp(s, re.m) << "}";
else if (re.u.str.is_extract(s, x, j, k)) {
rational jv, iv;
print(out, x);
if (arith_util(re.m).is_numeral(j, jv)) {
if (arith_util(re.m).is_numeral(k, iv)) {
// output X[j,k]
out << "[" << jv.get_int32() << "," << jv.get_int32() << "]";
}
else if (arith_util(re.m).is_sub(k, l, i) && re.u.str.is_length(l, x_) && x == x_ &&
arith_util(re.m).is_numeral(i, iv) && iv == jv) {
// case X[j,|X|-j] is denoted by X[j..]
out << "[" << jv.get_int32() << "..]";
}
else if (((arith_util(re.m).is_add(k, l, i) && re.u.str.is_length(l, x_)) ||
(arith_util(re.m).is_add(k, i, l) && re.u.str.is_length(l, x_))) && x == x_ &&
arith_util(re.m).is_numeral(i, iv) && iv.get_int32() + jv.get_int32() == 0) {
// case X[j,|X|-j] is denoted by X[j..]
out << "[" << jv.get_int32() << "..]";
}
else {
out << "[" << jv.get_int32() << ",";
print(out, k);
out << "]";
}
}
else {
out << "[";
print(out, j);
out << ",";
print(out, k);
out << "]";
}
}
else
out << mk_pp(s, re.m);
return out;
}
/*
Produces output such as [a-z] for a range.
*/
std::ostream& seq_util::rex::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s2) const {
std::ostream& seq_util::rex::pp::print_range(std::ostream& out, expr* s1, expr* s2) const {
out << "[";
seq_unit(out, s1) << "-";
seq_unit(out, s2) << "]";
print_unit(out, s1);
out << "-";
print_unit(out, s2);
out << "]";
return out;
}
@ -1111,8 +1184,8 @@ bool seq_util::rex::pp::can_skip_parenth(expr* r) const {
/*
Specialize output for a unit sequence converting to visible ASCII characters if possible.
*/
std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
expr* e;
std::ostream& seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const {
expr* e, * i;
unsigned n = 0;
if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) {
char c = (char)n;
@ -1122,22 +1195,21 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
out << "\\r";
else if (c == '\f')
out << "\\f";
else if (c == ' ')
out << "\\s";
else if (c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']' || c == '.' || c == '\\')
out << "\\" << c;
else if (32 < n && n < 127) {
else if (32 <= n && n < 127 && n != '\"' && n != ' '
&& n != '\\' && n != '\'' && n != '?' && n != '.' && n != '(' && n != ')' && n != '[' && n != ']'
&& n != '{' && n != '}' && n != '&') {
if (html_encode) {
if (c == '<')
out << "&lt;";
else if (c == '>')
out << "&gt;";
else if (c == '&')
out << "&amp;";
else if (c == '\"')
out << "&quot;";
//else if (c == '&')
// out << "&amp;";
//else if (c == '\"')
// out << "&quot;";
else
out << "\\x" << std::hex << n;
//out << "\\x" << std::hex << n;
out << c;
}
else
out << c;
@ -1151,92 +1223,188 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
else
out << "\\u" << std::hex << n;
}
else if (re.u.str.is_nth_i(s, e, i)) {
print(out, e);
out << "[" << mk_pp(i, re.m) << "]";
}
else if (re.m.is_value(e))
out << mk_pp(e, re.m);
else if (is_app(e)) {
out << "(" << to_app(e)->get_decl()->get_name().str();
for (expr * arg : *to_app(e))
print(out << " ", arg);
out << ")";
}
else
out << "{" << mk_pp(s, re.m) << "}";
out << mk_pp(s, re.m);
return out;
}
/*
Pretty prints the regex r into the out stream
Pretty prints the regex r into the ostream out
*/
std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const {
expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
unsigned lo = 0, hi = 0;
rational v;
if (re.u.is_char(e))
return seq_unit(out, e);
print_unit(out, e);
else if (re.u.is_seq(e))
return compact_helper_seq(out, e);
print_seq(out, e);
else if (re.is_full_char(e))
return out << ".";
out << ".";
else if (re.is_full_seq(e))
return out << ".*";
out << ".*";
else if (re.is_to_re(e, s))
return compact_helper_seq(out, s);
else if (re.is_range(e, s, s2))
return compact_helper_range(out, s, s2);
print_seq(out, s);
else if (re.is_range(e, s, s2))
print_range(out, s, s2);
else if (re.is_epsilon(e))
return out << "()";
// &#X03B5; = epsilon
out << (html_encode ? "&#X03B5;" : "()");
else if (re.is_empty(e))
return out << "[]";
else if (re.is_concat(e, r1, r2))
return out << pp(re, r1) << pp(re, r2);
else if (re.is_union(e, r1, r2))
return out << "(" << pp(re, r1) << "|" << pp(re, r2) << ")";
else if (re.is_intersection(e, r1, r2))
return out << "(" << pp(re, r1) << "&amp;" /*(html_encode ? ")&amp;(" : ")&(")*/ << pp(re, r2) << ")";
// &#X2205; = emptyset
out << (html_encode ? "&#X2205;" : "[]");
else if (re.is_concat(e, r1, r2)) {
print(out, r1);
print(out, r2);
}
else if (re.is_antimorov_union(e, r1, r2) || re.is_union(e, r1, r2)) {
out << "(";
print(out, r1);
out << (html_encode ? "&#X22C3;" : "|");
print(out, r2);
out << ")";
}
else if (re.is_intersection(e, r1, r2))
{
out << "(";
print(out, r1);
out << (html_encode ? "&#X22C2;" : "&");
print(out, r2);
out << ")";
}
else if (re.is_complement(e, r1)) {
out << "~";
if (can_skip_parenth(r1))
return out << "~" << pp(re, r1);
else
return out << "~(" << pp(re, r1) << ")";
print(out, r1);
else {
out << "(";
print(out, r1);
out << ")";
}
}
else if (re.is_plus(e, r1)) {
if (can_skip_parenth(r1))
return out << pp(re, r1) << "+";
else
return out << "(" << pp(re, r1) << ")+";
if (can_skip_parenth(r1)) {
print(out, r1);
out << "+";
}
else {
out << "(";
print(out, r1);
out << ")+";
}
}
else if (re.is_star(e, r1)) {
if (can_skip_parenth(r1))
return out << pp(re, r1) << "*";
else
return out << "(" << pp(re, r1) << ")*";
if (can_skip_parenth(r1)) {
print(out, r1);
out << "*";
}
else {
out << "(";
print(out, r1);
out << ")*";
}
}
else if (re.is_loop(e, r1, lo)) {
if (can_skip_parenth(r1))
return out << pp(re, r1) << "{" << lo << ",}";
else
return out << "(" << pp(re, r1) << "){" << lo << ",}";
if (can_skip_parenth(r1)) {
print(out, r1);
out << "{" << lo << ",}";
}
else
{
out << "(";
print(out, r1);
out << "){" << lo << ",}";
}
}
else if (re.is_loop(e, r1, lo, hi)) {
if (can_skip_parenth(r1)) {
print(out, r1);
if (lo == hi)
return out << pp(re, r1) << "{" << lo << "}";
else
return out << pp(re, r1) << "{" << lo << "," << hi << "}";
out << "{" << lo << "}";
else
out << "{" << lo << "," << hi << "}";
}
else {
out << "(";
print(out, r1);
if (lo == hi)
return out << "(" << pp(re, r1) << "){" << lo << "}";
out << "){" << lo << "}";
else
return out << "(" << pp(re, r1) << "){" << lo << "," << hi << "}";
out << "){" << lo << "," << hi << "}";
}
}
else if (re.is_diff(e, r1, r2))
return out << "(" << pp(re, r1) << ")\\(" << pp(re, r2) << ")";
else if (re.m.is_ite(e, s, r1, r2))
return out << "if(" << mk_pp(s, re.m) << "," << pp(re, r1) << "," << pp(re, r2) << ")";
else if (re.is_opt(e, r1)) {
if (can_skip_parenth(r1))
return out << pp(re, r1) << "?";
else
return out << "(" << pp(re, r1) << ")?";
else if (re.is_diff(e, r1, r2)) {
out << "(";
print(out, r1);
out << ")\\(";
print(out, r2);
out << ")";
}
else if (re.m.is_ite(e, s, r1, r2)) {
out << (html_encode ? "(&#X1D422;&#X1D41F; " : "(if ");
print(out, s);
out << (html_encode ? " &#X1D42D;&#X1D5F5;&#X1D41E;&#X1D427; " : " then ");
print(out, r1);
out << (html_encode ? " &#X1D41E;&#X1D425;&#X1D600;&#X1D41E; " : " else ");
print(out, r2);
out << ")";
}
else if (re.is_opt(e, r1)) {
if (can_skip_parenth(r1)) {
print(out, r1);
out << "?";
}
else {
out << "(";
print(out, r1);
out << ")?";
}
}
else if (re.is_reverse(e, r1)) {
out << "(reverse ";
print(out, r1);
out << ")";
}
else if (re.m.is_eq(e, r1, r2)) {
out << "(";
print(out, r1);
out << "=";
print(out, r2);
out << ")";
}
else if (re.m.is_not(e, r1)) {
out << "!";
print(out, r1);
}
else if (re.m.is_value(e))
out << mk_pp(e, re.m);
else if (is_app(e)) {
out << "(" << to_app(e)->get_decl()->get_name().str();
for (expr* arg : *to_app(e))
print(out << " ", arg);
out << ")";
}
else if (re.is_reverse(e, r1))
return out << "reverse(" << pp(re, r1) << ")";
else
// Else: derivative or is_of_pred
return out << "{" << mk_pp(e, re.m) << "}";
// for all remaining cases use the default pretty printer
out << mk_pp(e, re.m);
return out;
}
std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
print(out, ex);
return out;
}
/*
@ -1244,7 +1412,16 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
*/
std::string seq_util::rex::to_str(expr* r) const {
std::ostringstream out;
out << pp(u.re, r);
pp(u.re, r, false).display(out);
return out.str();
}
/*
Pretty prints the regex r into the output string that is htmlencoded
*/
std::string seq_util::rex::to_strh(expr* r) const {
std::ostringstream out;
pp(u.re, r, true).display(out);
return out.str();
}
@ -1290,7 +1467,7 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
else
result = mk_info_rec(to_app(e));
m_infos.setx(e->get_id(), result, invalid_info);
STRACE("re_info", tout << "compute_info(" << pp(u.re, e) << ")=" << result << std::endl;);
STRACE("re_info", tout << "compute_info(" << pp(u.re, e, false) << ")=" << result << std::endl;);
return result;
}