From 3ab75bdf3bb937be8d3309c4cbf1060fa6f53972 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 12:40:35 -0700 Subject: [PATCH 01/11] pp support for regex expressions is more-or-less standard syntax --- src/ast/seq_decl_plugin.cpp | 188 ++++++++++++++++++++++++++++++++++++ src/ast/seq_decl_plugin.h | 7 ++ src/smt/seq_regex.cpp | 5 + 3 files changed, 200 insertions(+) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index d359f010c..f5d708071 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1463,3 +1463,191 @@ bool seq_util::re::is_epsilon(expr* r) const { app* seq_util::re::mk_epsilon(sort* seq_sort) { return mk_to_re(u.str.mk_empty(seq_sort)); } + +/* + Provides a standard pretty printed view of the regex r when possible. +*/ +std::string seq_util::re::pp(expr* r) { + SASSERT(u.is_re(r)); + std::ostringstream buffer; + pp_compact_to_buffer(buffer, r); + return buffer.str(); +} + +void seq_util::re::pp_compact_to_buffer(std::ostringstream& buffer, expr* r) { + SASSERT(u.is_re(r)); + expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; + unsigned lo = 0, hi = 0; + if (is_full_char(r)) + buffer << "."; + else if (is_full_seq(r)) + buffer << ".*"; + else if (is_to_re(r, s)) + pp_compact_helper_seq(buffer, s); + else if (is_range(r, s, s2)) + pp_compact_helper_range(buffer, s, s2); + else if (is_epsilon(r)) + buffer << "()"; + else if (is_empty(r)) + buffer << "[]"; + else if (is_concat(r, r1, r2)) { + pp_compact_to_buffer(buffer, r1); + pp_compact_to_buffer(buffer, r2); + } + else if (is_union(r, r1, r2)) { + pp_compact_to_buffer(buffer, r1); + buffer << "|"; + pp_compact_to_buffer(buffer, r2); + } + else if (is_intersection(r, r1, r2)) { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")&("; + pp_compact_to_buffer(buffer, r2); + buffer << ")"; + } + else if (is_complement(r, r1)) { + buffer << "~"; + if (pp_can_skip_parenth(r1)) + pp_compact_to_buffer(buffer, r1); + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")"; + } + } + else if (is_plus(r, r1)) + if (pp_can_skip_parenth(r1)) { + pp_compact_to_buffer(buffer, r1); + buffer << "+"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")+"; + } + else if (is_star(r, r1)) + if (pp_can_skip_parenth(r1)) { + pp_compact_to_buffer(buffer, r1); + buffer << "*"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")*"; + } + else if (is_loop(r, r1, lo)) + if (pp_can_skip_parenth(r1)) + { + pp_compact_to_buffer(buffer, r1); + buffer << "{" << std::to_string(lo) << ",}"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << "){" << std::to_string(lo) << ",}"; + } + else if (is_loop(r, r1, lo, hi)) + if (pp_can_skip_parenth(r1)) + { + pp_compact_to_buffer(buffer, r1); + buffer << "{" << std::to_string(lo) << "," << std::to_string(hi) << "}"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << "){" << std::to_string(lo) << "," << std::to_string(hi) << "}"; + } + else if (is_diff(r, r1, r2)) { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")\\("; + pp_compact_to_buffer(buffer, r2); + buffer << ")"; + } + else if (m.is_ite(r, s, r1, r2)) { + buffer << "if(" << mk_pp(s, m) << ","; + pp_compact_to_buffer(buffer, r1); + buffer << ","; + pp_compact_to_buffer(buffer, r2); + buffer << ")"; + } + else if (is_opt(r, r1)) + if (pp_can_skip_parenth(r1)) { + pp_compact_to_buffer(buffer, r1); + buffer << "?"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")?"; + } + else if (is_reverse(r, r1)) { + buffer << "reverse("; + pp_compact_to_buffer(buffer, r1); + buffer << ")"; + } + else + // Else: derivative or is_of_pred + buffer << mk_pp(r, m); +} + +void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s) { + SASSERT(u.is_seq(s)); + if (m.is_value(s)) { + SASSERT(s->get_kind() == ast_kind::AST_APP); + if (u.str.is_concat(s)) { + expr_ref_vector es(m); + u.str.get_concat(s, es); + for (unsigned i = 0; i < es.size(); i++) + if (u.str.is_unit(es.get(i))) + pp_seq_unit(buffer, es.get(i)); + else + buffer << mk_pp(es.get(i), m); + } + else + pp_seq_unit(buffer, s); + } + else + buffer << mk_pp(s, m); +} + +void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2) { + buffer << "["; + if (u.str.is_unit(s1)) + pp_seq_unit(buffer, s1); + else + buffer << mk_pp(s1, m); + buffer << "-"; + if (u.str.is_unit(s2)) + pp_seq_unit(buffer, s2); + else + buffer << mk_pp(s1, m); + buffer << "]"; +} + +bool seq_util::re::pp_can_skip_parenth(expr* r) { + expr* s; + return ((is_to_re(r, s) && u.str.is_unit(s)) || is_range(r)); +} + +void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s) { + expr* e; + if (u.str.is_unit(s, e)) { + rational r; + unsigned sz; + if (u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) { + unsigned n = r.get_unsigned(); + if (32 < n && n < 127) + buffer << (char)n; + else if (n < 10) + buffer << "\\x0" << std::hex << n; + else + buffer << "\\x" << std::hex << n; + } + else + buffer << mk_pp(s, m); + } + else + buffer << mk_pp(s, m); +} diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 78286fac0..74fcfe5f4 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -410,6 +410,12 @@ public: seq_util& u; ast_manager& m; family_id m_fid; + void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s); + void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2); + bool seq_util::re::pp_can_skip_parenth(expr* r); + void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s); + void pp_compact_to_buffer(std::ostringstream& buffer, expr* r); + public: re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {} @@ -476,6 +482,7 @@ public: unsigned max_length(expr* r) const; bool is_epsilon(expr* r) const; app* mk_epsilon(sort* seq_sort); + std::string pp(expr* r); }; str str; re re; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 1803f5537..346acf811 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -651,6 +651,8 @@ namespace smt { } STRACE("seq_regex", tout << "Updating state graph for regex " << mk_pp(r, m) << ") ";); + if (!m_state_graph.is_seen(r_id)) + STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << re().pp(r) << std::endl;); // Add state m_state_graph.add_state(r_id); STRACE("seq_regex_brief", tout << std::endl << "USG(" @@ -669,6 +671,9 @@ namespace smt { unsigned dr_id = get_state_id(dr); STRACE("seq_regex_verbose", tout << std::endl << " traversing deriv: " << dr_id << " ";); + if (!m_state_graph.is_seen(dr_id)) + STRACE("state_graph", tout << "state(" << dr_id << ") = " << re().pp(dr) << std::endl;); + // Add state m_state_graph.add_state(dr_id); bool maybecycle = can_be_in_cycle(r, dr); m_state_graph.add_edge(r_id, dr_id, maybecycle); From 024ccf1b53d50b3804ef9d5fe38031d782af83a0 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 13:14:00 -0700 Subject: [PATCH 02/11] took care of comments for related PR --- src/ast/seq_decl_plugin.cpp | 169 ++++++++++++++++++------------------ src/ast/seq_decl_plugin.h | 8 +- 2 files changed, 90 insertions(+), 87 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index f5d708071..512af8d27 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1469,130 +1469,133 @@ app* seq_util::re::mk_epsilon(sort* seq_sort) { */ std::string seq_util::re::pp(expr* r) { SASSERT(u.is_re(r)); - std::ostringstream buffer; - pp_compact_to_buffer(buffer, r); - return buffer.str(); + std::ostringstream out; + pp(out, r); + return out.str(); } -void seq_util::re::pp_compact_to_buffer(std::ostringstream& buffer, expr* r) { +/* + Pretty prints a standard pretty printed view of the regex r into the out stream +*/ +void seq_util::re::pp(std::ostream& out, expr* r) { SASSERT(u.is_re(r)); expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; unsigned lo = 0, hi = 0; if (is_full_char(r)) - buffer << "."; + out << "."; else if (is_full_seq(r)) - buffer << ".*"; + out << ".*"; else if (is_to_re(r, s)) - pp_compact_helper_seq(buffer, s); + pp_compact_helper_seq(out, s); else if (is_range(r, s, s2)) - pp_compact_helper_range(buffer, s, s2); + pp_compact_helper_range(out, s, s2); else if (is_epsilon(r)) - buffer << "()"; + out << "()"; else if (is_empty(r)) - buffer << "[]"; + out << "[]"; else if (is_concat(r, r1, r2)) { - pp_compact_to_buffer(buffer, r1); - pp_compact_to_buffer(buffer, r2); + pp(out, r1); + pp(out, r2); } else if (is_union(r, r1, r2)) { - pp_compact_to_buffer(buffer, r1); - buffer << "|"; - pp_compact_to_buffer(buffer, r2); + pp(out, r1); + out << "|"; + pp(out, r2); } else if (is_intersection(r, r1, r2)) { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")&("; - pp_compact_to_buffer(buffer, r2); - buffer << ")"; + out << "("; + pp(out, r1); + out << ")&("; + pp(out, r2); + out << ")"; } else if (is_complement(r, r1)) { - buffer << "~"; + out << "~"; if (pp_can_skip_parenth(r1)) - pp_compact_to_buffer(buffer, r1); + pp(out, r1); else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")"; + out << "("; + pp(out, r1); + out << ")"; } } else if (is_plus(r, r1)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "+"; + pp(out, r1); + out << "+"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")+"; + out << "("; + pp(out, r1); + out << ")+"; } else if (is_star(r, r1)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "*"; + pp(out, r1); + out << "*"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")*"; + out << "("; + pp(out, r1); + out << ")*"; } else if (is_loop(r, r1, lo)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "{" << std::to_string(lo) << ",}"; + pp(out, r1); + out << "{" << lo << ",}"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << "){" << std::to_string(lo) << ",}"; + out << "("; + pp(out, r1); + out << "){" << lo << ",}"; } else if (is_loop(r, r1, lo, hi)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "{" << std::to_string(lo) << "," << std::to_string(hi) << "}"; + pp(out, r1); + out << "{" << lo << "," << hi << "}"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << "){" << std::to_string(lo) << "," << std::to_string(hi) << "}"; + out << "("; + pp(out, r1); + out << "){" << lo << "," << hi << "}"; } else if (is_diff(r, r1, r2)) { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")\\("; - pp_compact_to_buffer(buffer, r2); - buffer << ")"; + out << "("; + pp(out, r1); + out << ")\\("; + pp(out, r2); + out << ")"; } else if (m.is_ite(r, s, r1, r2)) { - buffer << "if(" << mk_pp(s, m) << ","; - pp_compact_to_buffer(buffer, r1); - buffer << ","; - pp_compact_to_buffer(buffer, r2); - buffer << ")"; + out << "if(" << mk_pp(s, m) << ","; + pp(out, r1); + out << ","; + pp(out, r2); + out << ")"; } else if (is_opt(r, r1)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "?"; + pp(out, r1); + out << "?"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")?"; + out << "("; + pp(out, r1); + out << ")?"; } else if (is_reverse(r, r1)) { - buffer << "reverse("; - pp_compact_to_buffer(buffer, r1); - buffer << ")"; + out << "reverse("; + pp(out, r1); + out << ")"; } else // Else: derivative or is_of_pred - buffer << mk_pp(r, m); + out << mk_pp(r, m); } -void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s) { +void seq_util::re::pp_compact_helper_seq(std::ostream& out, expr* s) { SASSERT(u.is_seq(s)); if (m.is_value(s)) { SASSERT(s->get_kind() == ast_kind::AST_APP); @@ -1601,29 +1604,29 @@ void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s) { u.str.get_concat(s, es); for (unsigned i = 0; i < es.size(); i++) if (u.str.is_unit(es.get(i))) - pp_seq_unit(buffer, es.get(i)); + pp_seq_unit(out, es.get(i)); else - buffer << mk_pp(es.get(i), m); + out << mk_pp(es.get(i), m); } else - pp_seq_unit(buffer, s); + pp_seq_unit(out, s); } else - buffer << mk_pp(s, m); + out << mk_pp(s, m); } -void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2) { - buffer << "["; +void seq_util::re::pp_compact_helper_range(std::ostream& out, expr* s1, expr* s2) { + out << "["; if (u.str.is_unit(s1)) - pp_seq_unit(buffer, s1); + pp_seq_unit(out, s1); else - buffer << mk_pp(s1, m); - buffer << "-"; + out << mk_pp(s1, m); + out << "-"; if (u.str.is_unit(s2)) - pp_seq_unit(buffer, s2); + pp_seq_unit(out, s2); else - buffer << mk_pp(s1, m); - buffer << "]"; + out << mk_pp(s1, m); + out << "]"; } bool seq_util::re::pp_can_skip_parenth(expr* r) { @@ -1631,7 +1634,7 @@ bool seq_util::re::pp_can_skip_parenth(expr* r) { return ((is_to_re(r, s) && u.str.is_unit(s)) || is_range(r)); } -void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s) { +void seq_util::re::pp_seq_unit(std::ostream& out, expr* s) { expr* e; if (u.str.is_unit(s, e)) { rational r; @@ -1639,15 +1642,15 @@ void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s) { if (u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) { unsigned n = r.get_unsigned(); if (32 < n && n < 127) - buffer << (char)n; + out << (char)n; else if (n < 10) - buffer << "\\x0" << std::hex << n; + out << "\\x0" << std::hex << n; else - buffer << "\\x" << std::hex << n; + out << "\\x" << std::hex << n; } else - buffer << mk_pp(s, m); + out << mk_pp(s, m); } else - buffer << mk_pp(s, m); + out << mk_pp(s, m); } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 74fcfe5f4..6c9e879ce 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -410,11 +410,10 @@ public: seq_util& u; ast_manager& m; family_id m_fid; - void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s); - void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2); + void seq_util::re::pp_compact_helper_seq(std::ostream& out, expr* s); + void seq_util::re::pp_compact_helper_range(std::ostream& out, expr* s1, expr* s2); bool seq_util::re::pp_can_skip_parenth(expr* r); - void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s); - void pp_compact_to_buffer(std::ostringstream& buffer, expr* r); + void seq_util::re::pp_seq_unit(std::ostream& out, expr* s); public: re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {} @@ -483,6 +482,7 @@ public: bool is_epsilon(expr* r) const; app* mk_epsilon(sort* seq_sort); std::string pp(expr* r); + void pp(std::ostream& out, expr* r); }; str str; re re; From be6f7bb4d7641d0feb12e0ed42b38ec6be380332 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 16:44:13 -0700 Subject: [PATCH 03/11] further PR comment fixes --- src/ast/seq_decl_plugin.cpp | 271 +++++++++++++++--------------------- src/ast/seq_decl_plugin.h | 24 +++- src/smt/seq_regex.cpp | 4 +- 3 files changed, 132 insertions(+), 167 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 512af8d27..edaee87c4 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1465,181 +1465,64 @@ app* seq_util::re::mk_epsilon(sort* seq_sort) { } /* - Provides a standard pretty printed view of the regex r when possible. + Produces compact view of concrete concatenations such as (abcd). */ -std::string seq_util::re::pp(expr* r) { - SASSERT(u.is_re(r)); - std::ostringstream out; - pp(out, r); - return out.str(); +void seq_util::re::pp::compact_helper_seq(std::ostream& out, expr* s) { + SASSERT(re.u.is_seq(s)); + if (re.m.is_value(s)) { + SASSERT(s->get_kind() == ast_kind::AST_APP); + if (re.u.str.is_concat(s)) { + expr_ref_vector es(re.m); + re.u.str.get_concat(s, es); + for (unsigned i = 0; i < es.size(); i++) + if (re.u.str.is_unit(es.get(i))) + seq_unit(out, es.get(i)); + else + out << mk_pp(es.get(i), re.m); + } + else if (re.u.str.is_empty(s)) + out << "()"; + else + seq_unit(out, s); + } + else + out << mk_pp(s, re.m); } /* - Pretty prints a standard pretty printed view of the regex r into the out stream + Produces output such as [a-z] for a range. */ -void seq_util::re::pp(std::ostream& out, expr* r) { - SASSERT(u.is_re(r)); - expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; - unsigned lo = 0, hi = 0; - if (is_full_char(r)) - out << "."; - else if (is_full_seq(r)) - out << ".*"; - else if (is_to_re(r, s)) - pp_compact_helper_seq(out, s); - else if (is_range(r, s, s2)) - pp_compact_helper_range(out, s, s2); - else if (is_epsilon(r)) - out << "()"; - else if (is_empty(r)) - out << "[]"; - else if (is_concat(r, r1, r2)) { - pp(out, r1); - pp(out, r2); - } - else if (is_union(r, r1, r2)) { - pp(out, r1); - out << "|"; - pp(out, r2); - } - else if (is_intersection(r, r1, r2)) { - out << "("; - pp(out, r1); - out << ")&("; - pp(out, r2); - out << ")"; - } - else if (is_complement(r, r1)) { - out << "~"; - if (pp_can_skip_parenth(r1)) - pp(out, r1); - else { - out << "("; - pp(out, r1); - out << ")"; - } - } - else if (is_plus(r, r1)) - if (pp_can_skip_parenth(r1)) { - pp(out, r1); - out << "+"; - } - else { - out << "("; - pp(out, r1); - out << ")+"; - } - else if (is_star(r, r1)) - if (pp_can_skip_parenth(r1)) { - pp(out, r1); - out << "*"; - } - else { - out << "("; - pp(out, r1); - out << ")*"; - } - else if (is_loop(r, r1, lo)) - if (pp_can_skip_parenth(r1)) - { - pp(out, r1); - out << "{" << lo << ",}"; - } - else { - out << "("; - pp(out, r1); - out << "){" << lo << ",}"; - } - else if (is_loop(r, r1, lo, hi)) - if (pp_can_skip_parenth(r1)) - { - pp(out, r1); - out << "{" << lo << "," << hi << "}"; - } - else { - out << "("; - pp(out, r1); - out << "){" << lo << "," << hi << "}"; - } - else if (is_diff(r, r1, r2)) { - out << "("; - pp(out, r1); - out << ")\\("; - pp(out, r2); - out << ")"; - } - else if (m.is_ite(r, s, r1, r2)) { - out << "if(" << mk_pp(s, m) << ","; - pp(out, r1); - out << ","; - pp(out, r2); - out << ")"; - } - else if (is_opt(r, r1)) - if (pp_can_skip_parenth(r1)) { - pp(out, r1); - out << "?"; - } - else { - out << "("; - pp(out, r1); - out << ")?"; - } - else if (is_reverse(r, r1)) { - out << "reverse("; - pp(out, r1); - out << ")"; - } - else - // Else: derivative or is_of_pred - out << mk_pp(r, m); -} - -void seq_util::re::pp_compact_helper_seq(std::ostream& out, expr* s) { - SASSERT(u.is_seq(s)); - if (m.is_value(s)) { - SASSERT(s->get_kind() == ast_kind::AST_APP); - if (u.str.is_concat(s)) { - expr_ref_vector es(m); - u.str.get_concat(s, es); - for (unsigned i = 0; i < es.size(); i++) - if (u.str.is_unit(es.get(i))) - pp_seq_unit(out, es.get(i)); - else - out << mk_pp(es.get(i), m); - } - else - pp_seq_unit(out, s); - } - else - out << mk_pp(s, m); -} - -void seq_util::re::pp_compact_helper_range(std::ostream& out, expr* s1, expr* s2) { +void seq_util::re::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s2) { out << "["; - if (u.str.is_unit(s1)) - pp_seq_unit(out, s1); + if (re.u.str.is_unit(s1)) + seq_unit(out, s1); else - out << mk_pp(s1, m); + out << mk_pp(s1, re.m); out << "-"; - if (u.str.is_unit(s2)) - pp_seq_unit(out, s2); + if (re.u.str.is_unit(s2)) + seq_unit(out, s2); else - out << mk_pp(s1, m); + out << mk_pp(s1, re.m); out << "]"; } -bool seq_util::re::pp_can_skip_parenth(expr* r) { +/* + Checks if parenthesis can be omitted in some cases in a loop body or in complement. +*/ +bool seq_util::re::pp::can_skip_parenth(expr* r) { expr* s; - return ((is_to_re(r, s) && u.str.is_unit(s)) || is_range(r)); + return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r)); } -void seq_util::re::pp_seq_unit(std::ostream& out, expr* s) { +/* + Specialize output for a unit sequence converting to visible ASCII characters if possible. +*/ +void seq_util::re::pp::seq_unit(std::ostream& out, expr* s) { expr* e; - if (u.str.is_unit(s, e)) { + if (re.u.str.is_unit(s, e)) { rational r; unsigned sz; - if (u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) { + if (re.u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) { unsigned n = r.get_unsigned(); if (32 < n && n < 127) out << (char)n; @@ -1649,8 +1532,78 @@ void seq_util::re::pp_seq_unit(std::ostream& out, expr* s) { out << "\\x" << std::hex << n; } else - out << mk_pp(s, m); + out << mk_pp(s, re.m); } else - out << mk_pp(s, m); + out << mk_pp(s, re.m); +} + +/* + Pretty prints the regex r into the out stream +*/ +std::ostream& seq_util::re::pp::display(std::ostream& out) { + expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; + unsigned lo = 0, hi = 0; + if (re.is_full_char(e)) + return out << "."; + else if (re.is_full_seq(e)) + return out << ".*"; + else if (re.is_to_re(e, s)) + { + compact_helper_seq(out, s); + return out; + } + else if (re.is_range(e, s, s2)) { + compact_helper_range(out, s, s2); + return out; + } + else if (re.is_epsilon(e)) + return out << "()"; + else if (re.is_empty(e)) + return out << "[]"; + else if (re.is_concat(e, r1, r2)) + return out << pp(re, r1) << pp(re, r2); + else if (re.is_union(e, r1, r2)) + return out << pp(re, r1) << "|" << pp(re, r2); + else if (re.is_intersection(e, r1, r2)) + return out << "(" << pp(re, r1) << ")&(" << pp(re, r2) << ")"; + else if (re.is_complement(e, r1)) + if (can_skip_parenth(r1)) + return out << "~" << pp(re, r1); + else + return out << "~(" << pp(re, r1) << ")"; + else if (re.is_plus(e, r1)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "+"; + else + return out << "(" << pp(re, r1) << ")+"; + else if (re.is_star(e, r1)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "*"; + else + return out << "(" << pp(re, r1) << ")*"; + else if (re.is_loop(e, r1, lo)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "{" << lo << ",}"; + else + return out << "(" << pp(re, r1) << "){" << lo << ",}"; + else if (re.is_loop(e, r1, lo, hi)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "{" << lo << "," << hi << "}"; + else + return out << "(" << pp(re, r1) << "){" << lo << "," << hi << "}"; + else if (re.is_diff(e, r1, r2)) + return out << "(" << pp(re, r1) << ")\\(" << pp(re, r2) << ")"; + else if (re.m.is_ite(e, s, r1, r2)) + return out << "if(" << mk_pp(s, re.m) << "," << pp(re, r1) << "," << pp(re, r2) << ")"; + else if (re.is_opt(e, r1)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "?"; + else + return out << "(" << pp(re, r1) << ")?"; + else if (re.is_reverse(e, r1)) + return out << "reverse(" << pp(re, r1) << ")"; + else + // Else: derivative or is_of_pred + return out << mk_pp(e, re.m); } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 6c9e879ce..6ab017cec 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -410,10 +410,6 @@ public: seq_util& u; ast_manager& m; family_id m_fid; - void seq_util::re::pp_compact_helper_seq(std::ostream& out, expr* s); - void seq_util::re::pp_compact_helper_range(std::ostream& out, expr* s1, expr* s2); - bool seq_util::re::pp_can_skip_parenth(expr* r); - void seq_util::re::pp_seq_unit(std::ostream& out, expr* s); public: re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {} @@ -481,8 +477,20 @@ public: unsigned max_length(expr* r) const; bool is_epsilon(expr* r) const; app* mk_epsilon(sort* seq_sort); - std::string pp(expr* r); - void pp(std::ostream& out, expr* r); + + class pp { + seq_util::re& re; + expr* e; + bool can_skip_parenth(expr* r); + void seq_unit(std::ostream& out, expr* s); + void compact_helper_seq(std::ostream& out, expr* s); + void compact_helper_range(std::ostream& out, expr* s1, expr* s2); + + public: + pp(seq_util::re& r, expr* e) : re(r), e(e) {} + std::ostream& display(std::ostream&); + }; + }; str str; re re; @@ -501,4 +509,8 @@ public: }; +inline std::ostream& operator<<(std::ostream& out, seq_util::re::pp& p) { return p.display(out); } + + + diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 346acf811..8c07ffb10 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -652,7 +652,7 @@ namespace smt { STRACE("seq_regex", tout << "Updating state graph for regex " << mk_pp(r, m) << ") ";); if (!m_state_graph.is_seen(r_id)) - STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << re().pp(r) << std::endl;); + STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << seq_util::re::pp(re(), r) << std::endl;); // Add state m_state_graph.add_state(r_id); STRACE("seq_regex_brief", tout << std::endl << "USG(" @@ -672,7 +672,7 @@ namespace smt { STRACE("seq_regex_verbose", tout << std::endl << " traversing deriv: " << dr_id << " ";); if (!m_state_graph.is_seen(dr_id)) - STRACE("state_graph", tout << "state(" << dr_id << ") = " << re().pp(dr) << std::endl;); + STRACE("state_graph", tout << "state(" << dr_id << ") = " << seq_util::re::pp(re(), dr) << std::endl;); // Add state m_state_graph.add_state(dr_id); bool maybecycle = can_be_in_cycle(r, dr); From 5b663aad7086168da889c0f35e209510f1a7ad97 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 17:12:59 -0700 Subject: [PATCH 04/11] updated detection of when parenthesis can be omitted to cover empty and epsilon --- src/ast/seq_decl_plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index edaee87c4..ddaa87d6d 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1511,7 +1511,7 @@ void seq_util::re::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s */ bool seq_util::re::pp::can_skip_parenth(expr* r) { expr* s; - return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r)); + return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r) || re.is_empty(r) || re.is_epsilon(r)); } /* From bfae1c420586b545be8058927cacfe8266a51bea Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 17:45:35 -0700 Subject: [PATCH 05/11] fixed bug in seq_unit --- src/ast/seq_decl_plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index ddaa87d6d..6f85eff7c 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1526,7 +1526,7 @@ void seq_util::re::pp::seq_unit(std::ostream& out, expr* s) { unsigned n = r.get_unsigned(); if (32 < n && n < 127) out << (char)n; - else if (n < 10) + else if (n < 16) out << "\\x0" << std::hex << n; else out << "\\x" << std::hex << n; From 2c33bd6fafeded0c2c9dbfc8b76e1b872cc44ad1 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 12:40:35 -0700 Subject: [PATCH 06/11] pp support for regex expressions is more-or-less standard syntax --- src/ast/seq_decl_plugin.cpp | 188 ++++++++++++++++++++++++++++++++++++ src/ast/seq_decl_plugin.h | 7 ++ src/smt/seq_regex.cpp | 9 +- 3 files changed, 203 insertions(+), 1 deletion(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index afa3a023f..bf55b4154 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1465,3 +1465,191 @@ bool seq_util::re::is_epsilon(expr* r) const { app* seq_util::re::mk_epsilon(sort* seq_sort) { return mk_to_re(u.str.mk_empty(seq_sort)); } + +/* + Provides a standard pretty printed view of the regex r when possible. +*/ +std::string seq_util::re::pp(expr* r) { + SASSERT(u.is_re(r)); + std::ostringstream buffer; + pp_compact_to_buffer(buffer, r); + return buffer.str(); +} + +void seq_util::re::pp_compact_to_buffer(std::ostringstream& buffer, expr* r) { + SASSERT(u.is_re(r)); + expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; + unsigned lo = 0, hi = 0; + if (is_full_char(r)) + buffer << "."; + else if (is_full_seq(r)) + buffer << ".*"; + else if (is_to_re(r, s)) + pp_compact_helper_seq(buffer, s); + else if (is_range(r, s, s2)) + pp_compact_helper_range(buffer, s, s2); + else if (is_epsilon(r)) + buffer << "()"; + else if (is_empty(r)) + buffer << "[]"; + else if (is_concat(r, r1, r2)) { + pp_compact_to_buffer(buffer, r1); + pp_compact_to_buffer(buffer, r2); + } + else if (is_union(r, r1, r2)) { + pp_compact_to_buffer(buffer, r1); + buffer << "|"; + pp_compact_to_buffer(buffer, r2); + } + else if (is_intersection(r, r1, r2)) { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")&("; + pp_compact_to_buffer(buffer, r2); + buffer << ")"; + } + else if (is_complement(r, r1)) { + buffer << "~"; + if (pp_can_skip_parenth(r1)) + pp_compact_to_buffer(buffer, r1); + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")"; + } + } + else if (is_plus(r, r1)) + if (pp_can_skip_parenth(r1)) { + pp_compact_to_buffer(buffer, r1); + buffer << "+"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")+"; + } + else if (is_star(r, r1)) + if (pp_can_skip_parenth(r1)) { + pp_compact_to_buffer(buffer, r1); + buffer << "*"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")*"; + } + else if (is_loop(r, r1, lo)) + if (pp_can_skip_parenth(r1)) + { + pp_compact_to_buffer(buffer, r1); + buffer << "{" << std::to_string(lo) << ",}"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << "){" << std::to_string(lo) << ",}"; + } + else if (is_loop(r, r1, lo, hi)) + if (pp_can_skip_parenth(r1)) + { + pp_compact_to_buffer(buffer, r1); + buffer << "{" << std::to_string(lo) << "," << std::to_string(hi) << "}"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << "){" << std::to_string(lo) << "," << std::to_string(hi) << "}"; + } + else if (is_diff(r, r1, r2)) { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")\\("; + pp_compact_to_buffer(buffer, r2); + buffer << ")"; + } + else if (m.is_ite(r, s, r1, r2)) { + buffer << "if(" << mk_pp(s, m) << ","; + pp_compact_to_buffer(buffer, r1); + buffer << ","; + pp_compact_to_buffer(buffer, r2); + buffer << ")"; + } + else if (is_opt(r, r1)) + if (pp_can_skip_parenth(r1)) { + pp_compact_to_buffer(buffer, r1); + buffer << "?"; + } + else { + buffer << "("; + pp_compact_to_buffer(buffer, r1); + buffer << ")?"; + } + else if (is_reverse(r, r1)) { + buffer << "reverse("; + pp_compact_to_buffer(buffer, r1); + buffer << ")"; + } + else + // Else: derivative or is_of_pred + buffer << mk_pp(r, m); +} + +void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s) { + SASSERT(u.is_seq(s)); + if (m.is_value(s)) { + SASSERT(s->get_kind() == ast_kind::AST_APP); + if (u.str.is_concat(s)) { + expr_ref_vector es(m); + u.str.get_concat(s, es); + for (unsigned i = 0; i < es.size(); i++) + if (u.str.is_unit(es.get(i))) + pp_seq_unit(buffer, es.get(i)); + else + buffer << mk_pp(es.get(i), m); + } + else + pp_seq_unit(buffer, s); + } + else + buffer << mk_pp(s, m); +} + +void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2) { + buffer << "["; + if (u.str.is_unit(s1)) + pp_seq_unit(buffer, s1); + else + buffer << mk_pp(s1, m); + buffer << "-"; + if (u.str.is_unit(s2)) + pp_seq_unit(buffer, s2); + else + buffer << mk_pp(s1, m); + buffer << "]"; +} + +bool seq_util::re::pp_can_skip_parenth(expr* r) { + expr* s; + return ((is_to_re(r, s) && u.str.is_unit(s)) || is_range(r)); +} + +void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s) { + expr* e; + if (u.str.is_unit(s, e)) { + rational r; + unsigned sz; + if (u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) { + unsigned n = r.get_unsigned(); + if (32 < n && n < 127) + buffer << (char)n; + else if (n < 10) + buffer << "\\x0" << std::hex << n; + else + buffer << "\\x" << std::hex << n; + } + else + buffer << mk_pp(s, m); + } + else + buffer << mk_pp(s, m); +} diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index eb2feba7a..e4f55a7af 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -412,6 +412,12 @@ public: seq_util& u; ast_manager& m; family_id m_fid; + void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s); + void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2); + bool seq_util::re::pp_can_skip_parenth(expr* r); + void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s); + void pp_compact_to_buffer(std::ostringstream& buffer, expr* r); + public: re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {} @@ -481,6 +487,7 @@ public: unsigned max_length(expr* r) const; bool is_epsilon(expr* r) const; app* mk_epsilon(sort* seq_sort); + std::string pp(expr* r); }; str str; re re; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 06b5e7f89..99ddfbc5b 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -791,6 +791,10 @@ namespace smt { STRACE("seq_regex_brief", tout << "(MAX SIZE REACHED) ";); return false; } + STRACE("seq_regex", tout << "Updating state graph for regex " + << mk_pp(r, m) << ") ";); + if (!m_state_graph.is_seen(r_id)) + STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << re().pp(r) << std::endl;); // Add state m_state_graph.add_state(r_id); STRACE("state_graph", tout << "regex(" << r_id << ") = " << mk_pp(r, m) << std::endl;); @@ -811,7 +815,10 @@ namespace smt { for (auto const& dr: derivatives) { unsigned dr_id = get_state_id(dr); STRACE("seq_regex_verbose", tout - << " traversing deriv: " << dr_id << " " << std::endl;); + << std::endl << " traversing deriv: " << dr_id << " ";); + if (!m_state_graph.is_seen(dr_id)) + STRACE("state_graph", tout << "state(" << dr_id << ") = " << re().pp(dr) << std::endl;); + // Add state m_state_graph.add_state(dr_id); STRACE("state_graph", tout << "regex(" << dr_id << ") = " << mk_pp(dr, m) << std::endl;); bool maybecycle = can_be_in_cycle(r, dr); From 5f9a3269105601a91627efc560f8752d2e8803fc Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 13:14:00 -0700 Subject: [PATCH 07/11] took care of comments for related PR --- src/ast/seq_decl_plugin.cpp | 169 ++++++++++++++++++------------------ src/ast/seq_decl_plugin.h | 8 +- 2 files changed, 90 insertions(+), 87 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index bf55b4154..cbee3abef 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1471,130 +1471,133 @@ app* seq_util::re::mk_epsilon(sort* seq_sort) { */ std::string seq_util::re::pp(expr* r) { SASSERT(u.is_re(r)); - std::ostringstream buffer; - pp_compact_to_buffer(buffer, r); - return buffer.str(); + std::ostringstream out; + pp(out, r); + return out.str(); } -void seq_util::re::pp_compact_to_buffer(std::ostringstream& buffer, expr* r) { +/* + Pretty prints a standard pretty printed view of the regex r into the out stream +*/ +void seq_util::re::pp(std::ostream& out, expr* r) { SASSERT(u.is_re(r)); expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; unsigned lo = 0, hi = 0; if (is_full_char(r)) - buffer << "."; + out << "."; else if (is_full_seq(r)) - buffer << ".*"; + out << ".*"; else if (is_to_re(r, s)) - pp_compact_helper_seq(buffer, s); + pp_compact_helper_seq(out, s); else if (is_range(r, s, s2)) - pp_compact_helper_range(buffer, s, s2); + pp_compact_helper_range(out, s, s2); else if (is_epsilon(r)) - buffer << "()"; + out << "()"; else if (is_empty(r)) - buffer << "[]"; + out << "[]"; else if (is_concat(r, r1, r2)) { - pp_compact_to_buffer(buffer, r1); - pp_compact_to_buffer(buffer, r2); + pp(out, r1); + pp(out, r2); } else if (is_union(r, r1, r2)) { - pp_compact_to_buffer(buffer, r1); - buffer << "|"; - pp_compact_to_buffer(buffer, r2); + pp(out, r1); + out << "|"; + pp(out, r2); } else if (is_intersection(r, r1, r2)) { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")&("; - pp_compact_to_buffer(buffer, r2); - buffer << ")"; + out << "("; + pp(out, r1); + out << ")&("; + pp(out, r2); + out << ")"; } else if (is_complement(r, r1)) { - buffer << "~"; + out << "~"; if (pp_can_skip_parenth(r1)) - pp_compact_to_buffer(buffer, r1); + pp(out, r1); else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")"; + out << "("; + pp(out, r1); + out << ")"; } } else if (is_plus(r, r1)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "+"; + pp(out, r1); + out << "+"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")+"; + out << "("; + pp(out, r1); + out << ")+"; } else if (is_star(r, r1)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "*"; + pp(out, r1); + out << "*"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")*"; + out << "("; + pp(out, r1); + out << ")*"; } else if (is_loop(r, r1, lo)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "{" << std::to_string(lo) << ",}"; + pp(out, r1); + out << "{" << lo << ",}"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << "){" << std::to_string(lo) << ",}"; + out << "("; + pp(out, r1); + out << "){" << lo << ",}"; } else if (is_loop(r, r1, lo, hi)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "{" << std::to_string(lo) << "," << std::to_string(hi) << "}"; + pp(out, r1); + out << "{" << lo << "," << hi << "}"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << "){" << std::to_string(lo) << "," << std::to_string(hi) << "}"; + out << "("; + pp(out, r1); + out << "){" << lo << "," << hi << "}"; } else if (is_diff(r, r1, r2)) { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")\\("; - pp_compact_to_buffer(buffer, r2); - buffer << ")"; + out << "("; + pp(out, r1); + out << ")\\("; + pp(out, r2); + out << ")"; } else if (m.is_ite(r, s, r1, r2)) { - buffer << "if(" << mk_pp(s, m) << ","; - pp_compact_to_buffer(buffer, r1); - buffer << ","; - pp_compact_to_buffer(buffer, r2); - buffer << ")"; + out << "if(" << mk_pp(s, m) << ","; + pp(out, r1); + out << ","; + pp(out, r2); + out << ")"; } else if (is_opt(r, r1)) if (pp_can_skip_parenth(r1)) { - pp_compact_to_buffer(buffer, r1); - buffer << "?"; + pp(out, r1); + out << "?"; } else { - buffer << "("; - pp_compact_to_buffer(buffer, r1); - buffer << ")?"; + out << "("; + pp(out, r1); + out << ")?"; } else if (is_reverse(r, r1)) { - buffer << "reverse("; - pp_compact_to_buffer(buffer, r1); - buffer << ")"; + out << "reverse("; + pp(out, r1); + out << ")"; } else // Else: derivative or is_of_pred - buffer << mk_pp(r, m); + out << mk_pp(r, m); } -void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s) { +void seq_util::re::pp_compact_helper_seq(std::ostream& out, expr* s) { SASSERT(u.is_seq(s)); if (m.is_value(s)) { SASSERT(s->get_kind() == ast_kind::AST_APP); @@ -1603,29 +1606,29 @@ void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s) { u.str.get_concat(s, es); for (unsigned i = 0; i < es.size(); i++) if (u.str.is_unit(es.get(i))) - pp_seq_unit(buffer, es.get(i)); + pp_seq_unit(out, es.get(i)); else - buffer << mk_pp(es.get(i), m); + out << mk_pp(es.get(i), m); } else - pp_seq_unit(buffer, s); + pp_seq_unit(out, s); } else - buffer << mk_pp(s, m); + out << mk_pp(s, m); } -void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2) { - buffer << "["; +void seq_util::re::pp_compact_helper_range(std::ostream& out, expr* s1, expr* s2) { + out << "["; if (u.str.is_unit(s1)) - pp_seq_unit(buffer, s1); + pp_seq_unit(out, s1); else - buffer << mk_pp(s1, m); - buffer << "-"; + out << mk_pp(s1, m); + out << "-"; if (u.str.is_unit(s2)) - pp_seq_unit(buffer, s2); + pp_seq_unit(out, s2); else - buffer << mk_pp(s1, m); - buffer << "]"; + out << mk_pp(s1, m); + out << "]"; } bool seq_util::re::pp_can_skip_parenth(expr* r) { @@ -1633,7 +1636,7 @@ bool seq_util::re::pp_can_skip_parenth(expr* r) { return ((is_to_re(r, s) && u.str.is_unit(s)) || is_range(r)); } -void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s) { +void seq_util::re::pp_seq_unit(std::ostream& out, expr* s) { expr* e; if (u.str.is_unit(s, e)) { rational r; @@ -1641,15 +1644,15 @@ void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s) { if (u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) { unsigned n = r.get_unsigned(); if (32 < n && n < 127) - buffer << (char)n; + out << (char)n; else if (n < 10) - buffer << "\\x0" << std::hex << n; + out << "\\x0" << std::hex << n; else - buffer << "\\x" << std::hex << n; + out << "\\x" << std::hex << n; } else - buffer << mk_pp(s, m); + out << mk_pp(s, m); } else - buffer << mk_pp(s, m); + out << mk_pp(s, m); } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index e4f55a7af..772618fd8 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -412,11 +412,10 @@ public: seq_util& u; ast_manager& m; family_id m_fid; - void seq_util::re::pp_compact_helper_seq(std::ostringstream& buffer, expr* s); - void seq_util::re::pp_compact_helper_range(std::ostringstream& buffer, expr* s1, expr* s2); + void seq_util::re::pp_compact_helper_seq(std::ostream& out, expr* s); + void seq_util::re::pp_compact_helper_range(std::ostream& out, expr* s1, expr* s2); bool seq_util::re::pp_can_skip_parenth(expr* r); - void seq_util::re::pp_seq_unit(std::ostringstream& buffer, expr* s); - void pp_compact_to_buffer(std::ostringstream& buffer, expr* r); + void seq_util::re::pp_seq_unit(std::ostream& out, expr* s); public: re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {} @@ -488,6 +487,7 @@ public: bool is_epsilon(expr* r) const; app* mk_epsilon(sort* seq_sort); std::string pp(expr* r); + void pp(std::ostream& out, expr* r); }; str str; re re; From ae413365e9f98af9a2e3559ff80b07056fe22581 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 16:44:13 -0700 Subject: [PATCH 08/11] further PR comment fixes --- src/ast/seq_decl_plugin.cpp | 271 +++++++++++++++--------------------- src/ast/seq_decl_plugin.h | 24 +++- src/smt/seq_regex.cpp | 4 +- 3 files changed, 132 insertions(+), 167 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index cbee3abef..2f2222b62 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1467,181 +1467,64 @@ app* seq_util::re::mk_epsilon(sort* seq_sort) { } /* - Provides a standard pretty printed view of the regex r when possible. + Produces compact view of concrete concatenations such as (abcd). */ -std::string seq_util::re::pp(expr* r) { - SASSERT(u.is_re(r)); - std::ostringstream out; - pp(out, r); - return out.str(); +void seq_util::re::pp::compact_helper_seq(std::ostream& out, expr* s) { + SASSERT(re.u.is_seq(s)); + if (re.m.is_value(s)) { + SASSERT(s->get_kind() == ast_kind::AST_APP); + if (re.u.str.is_concat(s)) { + expr_ref_vector es(re.m); + re.u.str.get_concat(s, es); + for (unsigned i = 0; i < es.size(); i++) + if (re.u.str.is_unit(es.get(i))) + seq_unit(out, es.get(i)); + else + out << mk_pp(es.get(i), re.m); + } + else if (re.u.str.is_empty(s)) + out << "()"; + else + seq_unit(out, s); + } + else + out << mk_pp(s, re.m); } /* - Pretty prints a standard pretty printed view of the regex r into the out stream + Produces output such as [a-z] for a range. */ -void seq_util::re::pp(std::ostream& out, expr* r) { - SASSERT(u.is_re(r)); - expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; - unsigned lo = 0, hi = 0; - if (is_full_char(r)) - out << "."; - else if (is_full_seq(r)) - out << ".*"; - else if (is_to_re(r, s)) - pp_compact_helper_seq(out, s); - else if (is_range(r, s, s2)) - pp_compact_helper_range(out, s, s2); - else if (is_epsilon(r)) - out << "()"; - else if (is_empty(r)) - out << "[]"; - else if (is_concat(r, r1, r2)) { - pp(out, r1); - pp(out, r2); - } - else if (is_union(r, r1, r2)) { - pp(out, r1); - out << "|"; - pp(out, r2); - } - else if (is_intersection(r, r1, r2)) { - out << "("; - pp(out, r1); - out << ")&("; - pp(out, r2); - out << ")"; - } - else if (is_complement(r, r1)) { - out << "~"; - if (pp_can_skip_parenth(r1)) - pp(out, r1); - else { - out << "("; - pp(out, r1); - out << ")"; - } - } - else if (is_plus(r, r1)) - if (pp_can_skip_parenth(r1)) { - pp(out, r1); - out << "+"; - } - else { - out << "("; - pp(out, r1); - out << ")+"; - } - else if (is_star(r, r1)) - if (pp_can_skip_parenth(r1)) { - pp(out, r1); - out << "*"; - } - else { - out << "("; - pp(out, r1); - out << ")*"; - } - else if (is_loop(r, r1, lo)) - if (pp_can_skip_parenth(r1)) - { - pp(out, r1); - out << "{" << lo << ",}"; - } - else { - out << "("; - pp(out, r1); - out << "){" << lo << ",}"; - } - else if (is_loop(r, r1, lo, hi)) - if (pp_can_skip_parenth(r1)) - { - pp(out, r1); - out << "{" << lo << "," << hi << "}"; - } - else { - out << "("; - pp(out, r1); - out << "){" << lo << "," << hi << "}"; - } - else if (is_diff(r, r1, r2)) { - out << "("; - pp(out, r1); - out << ")\\("; - pp(out, r2); - out << ")"; - } - else if (m.is_ite(r, s, r1, r2)) { - out << "if(" << mk_pp(s, m) << ","; - pp(out, r1); - out << ","; - pp(out, r2); - out << ")"; - } - else if (is_opt(r, r1)) - if (pp_can_skip_parenth(r1)) { - pp(out, r1); - out << "?"; - } - else { - out << "("; - pp(out, r1); - out << ")?"; - } - else if (is_reverse(r, r1)) { - out << "reverse("; - pp(out, r1); - out << ")"; - } - else - // Else: derivative or is_of_pred - out << mk_pp(r, m); -} - -void seq_util::re::pp_compact_helper_seq(std::ostream& out, expr* s) { - SASSERT(u.is_seq(s)); - if (m.is_value(s)) { - SASSERT(s->get_kind() == ast_kind::AST_APP); - if (u.str.is_concat(s)) { - expr_ref_vector es(m); - u.str.get_concat(s, es); - for (unsigned i = 0; i < es.size(); i++) - if (u.str.is_unit(es.get(i))) - pp_seq_unit(out, es.get(i)); - else - out << mk_pp(es.get(i), m); - } - else - pp_seq_unit(out, s); - } - else - out << mk_pp(s, m); -} - -void seq_util::re::pp_compact_helper_range(std::ostream& out, expr* s1, expr* s2) { +void seq_util::re::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s2) { out << "["; - if (u.str.is_unit(s1)) - pp_seq_unit(out, s1); + if (re.u.str.is_unit(s1)) + seq_unit(out, s1); else - out << mk_pp(s1, m); + out << mk_pp(s1, re.m); out << "-"; - if (u.str.is_unit(s2)) - pp_seq_unit(out, s2); + if (re.u.str.is_unit(s2)) + seq_unit(out, s2); else - out << mk_pp(s1, m); + out << mk_pp(s1, re.m); out << "]"; } -bool seq_util::re::pp_can_skip_parenth(expr* r) { +/* + Checks if parenthesis can be omitted in some cases in a loop body or in complement. +*/ +bool seq_util::re::pp::can_skip_parenth(expr* r) { expr* s; - return ((is_to_re(r, s) && u.str.is_unit(s)) || is_range(r)); + return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r)); } -void seq_util::re::pp_seq_unit(std::ostream& out, expr* s) { +/* + Specialize output for a unit sequence converting to visible ASCII characters if possible. +*/ +void seq_util::re::pp::seq_unit(std::ostream& out, expr* s) { expr* e; - if (u.str.is_unit(s, e)) { + if (re.u.str.is_unit(s, e)) { rational r; unsigned sz; - if (u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) { + if (re.u.bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned()) { unsigned n = r.get_unsigned(); if (32 < n && n < 127) out << (char)n; @@ -1651,8 +1534,78 @@ void seq_util::re::pp_seq_unit(std::ostream& out, expr* s) { out << "\\x" << std::hex << n; } else - out << mk_pp(s, m); + out << mk_pp(s, re.m); } else - out << mk_pp(s, m); + out << mk_pp(s, re.m); +} + +/* + Pretty prints the regex r into the out stream +*/ +std::ostream& seq_util::re::pp::display(std::ostream& out) { + expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; + unsigned lo = 0, hi = 0; + if (re.is_full_char(e)) + return out << "."; + else if (re.is_full_seq(e)) + return out << ".*"; + else if (re.is_to_re(e, s)) + { + compact_helper_seq(out, s); + return out; + } + else if (re.is_range(e, s, s2)) { + compact_helper_range(out, s, s2); + return out; + } + else if (re.is_epsilon(e)) + return out << "()"; + else if (re.is_empty(e)) + return out << "[]"; + else if (re.is_concat(e, r1, r2)) + return out << pp(re, r1) << pp(re, r2); + else if (re.is_union(e, r1, r2)) + return out << pp(re, r1) << "|" << pp(re, r2); + else if (re.is_intersection(e, r1, r2)) + return out << "(" << pp(re, r1) << ")&(" << pp(re, r2) << ")"; + else if (re.is_complement(e, r1)) + if (can_skip_parenth(r1)) + return out << "~" << pp(re, r1); + else + return out << "~(" << pp(re, r1) << ")"; + else if (re.is_plus(e, r1)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "+"; + else + return out << "(" << pp(re, r1) << ")+"; + else if (re.is_star(e, r1)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "*"; + else + return out << "(" << pp(re, r1) << ")*"; + else if (re.is_loop(e, r1, lo)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "{" << lo << ",}"; + else + return out << "(" << pp(re, r1) << "){" << lo << ",}"; + else if (re.is_loop(e, r1, lo, hi)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "{" << lo << "," << hi << "}"; + else + return out << "(" << pp(re, r1) << "){" << lo << "," << hi << "}"; + else if (re.is_diff(e, r1, r2)) + return out << "(" << pp(re, r1) << ")\\(" << pp(re, r2) << ")"; + else if (re.m.is_ite(e, s, r1, r2)) + return out << "if(" << mk_pp(s, re.m) << "," << pp(re, r1) << "," << pp(re, r2) << ")"; + else if (re.is_opt(e, r1)) + if (can_skip_parenth(r1)) + return out << pp(re, r1) << "?"; + else + return out << "(" << pp(re, r1) << ")?"; + else if (re.is_reverse(e, r1)) + return out << "reverse(" << pp(re, r1) << ")"; + else + // Else: derivative or is_of_pred + return out << mk_pp(e, re.m); } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 772618fd8..25472015e 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -412,10 +412,6 @@ public: seq_util& u; ast_manager& m; family_id m_fid; - void seq_util::re::pp_compact_helper_seq(std::ostream& out, expr* s); - void seq_util::re::pp_compact_helper_range(std::ostream& out, expr* s1, expr* s2); - bool seq_util::re::pp_can_skip_parenth(expr* r); - void seq_util::re::pp_seq_unit(std::ostream& out, expr* s); public: re(seq_util& u): u(u), m(u.m), m_fid(u.m_fid) {} @@ -486,8 +482,20 @@ public: unsigned max_length(expr* r) const; bool is_epsilon(expr* r) const; app* mk_epsilon(sort* seq_sort); - std::string pp(expr* r); - void pp(std::ostream& out, expr* r); + + class pp { + seq_util::re& re; + expr* e; + bool can_skip_parenth(expr* r); + void seq_unit(std::ostream& out, expr* s); + void compact_helper_seq(std::ostream& out, expr* s); + void compact_helper_range(std::ostream& out, expr* s1, expr* s2); + + public: + pp(seq_util::re& r, expr* e) : re(r), e(e) {} + std::ostream& display(std::ostream&); + }; + }; str str; re re; @@ -506,4 +514,8 @@ public: }; +inline std::ostream& operator<<(std::ostream& out, seq_util::re::pp& p) { return p.display(out); } + + + diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 99ddfbc5b..d025c2eee 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -794,7 +794,7 @@ namespace smt { STRACE("seq_regex", tout << "Updating state graph for regex " << mk_pp(r, m) << ") ";); if (!m_state_graph.is_seen(r_id)) - STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << re().pp(r) << std::endl;); + STRACE("state_graph", tout << std::endl << "state(" << r_id << ") = " << seq_util::re::pp(re(), r) << std::endl;); // Add state m_state_graph.add_state(r_id); STRACE("state_graph", tout << "regex(" << r_id << ") = " << mk_pp(r, m) << std::endl;); @@ -817,7 +817,7 @@ namespace smt { STRACE("seq_regex_verbose", tout << std::endl << " traversing deriv: " << dr_id << " ";); if (!m_state_graph.is_seen(dr_id)) - STRACE("state_graph", tout << "state(" << dr_id << ") = " << re().pp(dr) << std::endl;); + STRACE("state_graph", tout << "state(" << dr_id << ") = " << seq_util::re::pp(re(), dr) << std::endl;); // Add state m_state_graph.add_state(dr_id); STRACE("state_graph", tout << "regex(" << dr_id << ") = " << mk_pp(dr, m) << std::endl;); From e80b143e71fba78e2cd2e940c5de65be906c56c0 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 17:12:59 -0700 Subject: [PATCH 09/11] updated detection of when parenthesis can be omitted to cover empty and epsilon --- src/ast/seq_decl_plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 2f2222b62..7c884fbc5 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1513,7 +1513,7 @@ void seq_util::re::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s */ bool seq_util::re::pp::can_skip_parenth(expr* r) { expr* s; - return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r)); + return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r) || re.is_empty(r) || re.is_epsilon(r)); } /* From 1567587b979b445cc2091ce32a4711a23efa54d7 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 17:45:35 -0700 Subject: [PATCH 10/11] fixed bug in seq_unit --- src/ast/seq_decl_plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 7c884fbc5..d69a0bc85 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1528,7 +1528,7 @@ void seq_util::re::pp::seq_unit(std::ostream& out, expr* s) { unsigned n = r.get_unsigned(); if (32 < n && n < 127) out << (char)n; - else if (n < 10) + else if (n < 16) out << "\\x0" << std::hex << n; else out << "\\x" << std::hex << n; From 1233cb4621939cbc86af6b575102c6d12c496573 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Thu, 13 Aug 2020 20:04:35 -0700 Subject: [PATCH 11/11] added missing const declarations that caused build failure on some platforms --- src/ast/seq_decl_plugin.cpp | 10 +++++----- src/ast/seq_decl_plugin.h | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index d69a0bc85..008f2b76c 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1469,7 +1469,7 @@ app* seq_util::re::mk_epsilon(sort* seq_sort) { /* Produces compact view of concrete concatenations such as (abcd). */ -void seq_util::re::pp::compact_helper_seq(std::ostream& out, expr* s) { +void seq_util::re::pp::compact_helper_seq(std::ostream& out, expr* s) const { SASSERT(re.u.is_seq(s)); if (re.m.is_value(s)) { SASSERT(s->get_kind() == ast_kind::AST_APP); @@ -1494,7 +1494,7 @@ void seq_util::re::pp::compact_helper_seq(std::ostream& out, expr* s) { /* Produces output such as [a-z] for a range. */ -void seq_util::re::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s2) { +void seq_util::re::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s2) const { out << "["; if (re.u.str.is_unit(s1)) seq_unit(out, s1); @@ -1511,7 +1511,7 @@ void seq_util::re::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s /* Checks if parenthesis can be omitted in some cases in a loop body or in complement. */ -bool seq_util::re::pp::can_skip_parenth(expr* r) { +bool seq_util::re::pp::can_skip_parenth(expr* r) const { expr* s; return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r) || re.is_empty(r) || re.is_epsilon(r)); } @@ -1519,7 +1519,7 @@ bool seq_util::re::pp::can_skip_parenth(expr* r) { /* Specialize output for a unit sequence converting to visible ASCII characters if possible. */ -void seq_util::re::pp::seq_unit(std::ostream& out, expr* s) { +void seq_util::re::pp::seq_unit(std::ostream& out, expr* s) const { expr* e; if (re.u.str.is_unit(s, e)) { rational r; @@ -1543,7 +1543,7 @@ void seq_util::re::pp::seq_unit(std::ostream& out, expr* s) { /* Pretty prints the regex r into the out stream */ -std::ostream& seq_util::re::pp::display(std::ostream& out) { +std::ostream& seq_util::re::pp::display(std::ostream& out) const { expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; unsigned lo = 0, hi = 0; if (re.is_full_char(e)) diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 25472015e..9e1639729 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -486,14 +486,14 @@ public: class pp { seq_util::re& re; expr* e; - bool can_skip_parenth(expr* r); - void seq_unit(std::ostream& out, expr* s); - void compact_helper_seq(std::ostream& out, expr* s); - void compact_helper_range(std::ostream& out, expr* s1, expr* s2); + bool can_skip_parenth(expr* r) const; + void seq_unit(std::ostream& out, expr* s) const; + void compact_helper_seq(std::ostream& out, expr* s) const; + void compact_helper_range(std::ostream& out, expr* s1, expr* s2) const; public: pp(seq_util::re& r, expr* e) : re(r), e(e) {} - std::ostream& display(std::ostream&); + std::ostream& display(std::ostream&) const; }; }; @@ -514,7 +514,7 @@ public: }; -inline std::ostream& operator<<(std::ostream& out, seq_util::re::pp& p) { return p.display(out); } +inline std::ostream& operator<<(std::ostream& out, seq_util::re::pp const & p) { return p.display(out); }