mirror of
https://github.com/Z3Prover/z3
synced 2025-04-23 09:05:31 +00:00
add range / loop handling for re. Fix regression reading mixed numerals reported by Trentin
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
parent
9a6fe93e6c
commit
9909c056f0
8 changed files with 200 additions and 58 deletions
|
@ -24,18 +24,35 @@ Notes:
|
|||
#include"uint_set.h"
|
||||
#include"automaton.h"
|
||||
#include"well_sorted.h"
|
||||
#include"var_subst.h"
|
||||
|
||||
expr_ref sym_expr::accept(expr* e) {
|
||||
ast_manager& m = m_t.get_manager();
|
||||
expr_ref result(m);
|
||||
if (m_is_pred) {
|
||||
var_subst subst(m);
|
||||
subst(m_t, 1, &e, result);
|
||||
}
|
||||
else {
|
||||
result = m.mk_eq(e, m_t);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::ostream& sym_expr::display(std::ostream& out) const {
|
||||
return out << m_t;
|
||||
}
|
||||
|
||||
struct display_expr1 {
|
||||
ast_manager& m;
|
||||
display_expr1(ast_manager& m): m(m) {}
|
||||
std::ostream& display(std::ostream& out, expr* e) const {
|
||||
return out << mk_pp(e, m);
|
||||
std::ostream& display(std::ostream& out, sym_expr* e) const {
|
||||
return e->display(out);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
re2automaton::re2automaton(ast_manager& m): m(m), u(m) {}
|
||||
re2automaton::re2automaton(ast_manager& m): m(m), u(m), bv(m) {}
|
||||
|
||||
eautomaton* re2automaton::operator()(expr* e) {
|
||||
eautomaton* r = re2aut(e);
|
||||
|
@ -53,6 +70,7 @@ eautomaton* re2automaton::re2aut(expr* e) {
|
|||
expr* e1, *e2;
|
||||
scoped_ptr<eautomaton> a, b;
|
||||
unsigned lo, hi;
|
||||
zstring s1, s2;
|
||||
if (u.re.is_to_re(e, e1)) {
|
||||
return seq2aut(e1);
|
||||
}
|
||||
|
@ -76,12 +94,27 @@ eautomaton* re2automaton::re2aut(expr* e) {
|
|||
a = eautomaton::mk_opt(*a);
|
||||
return a.detach();
|
||||
}
|
||||
else if (u.re.is_range(e)) {
|
||||
// TBD
|
||||
else if (u.re.is_range(e, e1, e2)) {
|
||||
if (u.str.is_string(e1, s1) && u.str.is_string(e2, s2) &&
|
||||
s1.length() == 1 && s2.length() == 1) {
|
||||
unsigned start = s1[0];
|
||||
unsigned stop = s2[0];
|
||||
unsigned nb = s1.num_bits();
|
||||
sort_ref s(bv.mk_sort(nb), m);
|
||||
expr_ref v(m.mk_var(0, s), m);
|
||||
expr_ref _start(bv.mk_numeral(start, nb), m);
|
||||
expr_ref _stop(bv.mk_numeral(stop, nb), m);
|
||||
expr_ref cond(m.mk_and(bv.mk_ule(_start, v), bv.mk_ule(v, _stop)), m);
|
||||
a = alloc(eautomaton, sm, sym_expr::mk_pred(cond));
|
||||
return a.detach();
|
||||
}
|
||||
else {
|
||||
TRACE("seq", tout << "Range expression is not handled: " << mk_pp(e, m) << "\n";);
|
||||
}
|
||||
}
|
||||
else if (u.re.is_loop(e, e1, lo, hi) && (a = re2aut(e1))) {
|
||||
scoped_ptr<eautomaton> eps = eautomaton::mk_epsilon(m);
|
||||
b = eautomaton::mk_epsilon(m);
|
||||
scoped_ptr<eautomaton> eps = eautomaton::mk_epsilon(sm);
|
||||
b = eautomaton::mk_epsilon(sm);
|
||||
while (hi > lo) {
|
||||
scoped_ptr<eautomaton> c = eautomaton::mk_concat(*a, *b);
|
||||
b = eautomaton::mk_union(*eps, *c);
|
||||
|
@ -94,10 +127,12 @@ eautomaton* re2automaton::re2aut(expr* e) {
|
|||
return b.detach();
|
||||
}
|
||||
#if 0
|
||||
else if (u.re.is_intersect(e, e1, e2)) {
|
||||
|
||||
}
|
||||
else if (u.re.is_empty(e)) {
|
||||
return alloc(eautomaton, m);
|
||||
}
|
||||
else if (u.re.is_full(e)) {
|
||||
}
|
||||
else if (u.re.is_intersect(e, e1, e2)) {
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -114,10 +149,10 @@ eautomaton* re2automaton::seq2aut(expr* e) {
|
|||
return eautomaton::mk_concat(*a, *b);
|
||||
}
|
||||
else if (u.str.is_unit(e, e1)) {
|
||||
return alloc(eautomaton, m, e1);
|
||||
return alloc(eautomaton, sm, sym_expr::mk_char(m, e1));
|
||||
}
|
||||
else if (u.str.is_empty(e)) {
|
||||
return eautomaton::mk_epsilon(m);
|
||||
return eautomaton::mk_epsilon(sm);
|
||||
}
|
||||
else if (u.str.is_string(e, s)) {
|
||||
unsigned init = 0;
|
||||
|
@ -126,9 +161,9 @@ eautomaton* re2automaton::seq2aut(expr* e) {
|
|||
final.push_back(s.length());
|
||||
for (unsigned k = 0; k < s.length(); ++k) {
|
||||
// reference count?
|
||||
mvs.push_back(eautomaton::move(m, k, k+1, u.str.mk_char(s, k)));
|
||||
mvs.push_back(eautomaton::move(sm, k, k+1, sym_expr::mk_char(m, u.str.mk_char(s, k))));
|
||||
}
|
||||
return alloc(eautomaton, m, init, final, mvs);
|
||||
return alloc(eautomaton, sm, init, final, mvs);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -679,11 +714,11 @@ bool seq_rewriter::is_sequence(eautomaton& aut, expr_ref_vector& seq) {
|
|||
return false;
|
||||
}
|
||||
visited.insert(state);
|
||||
expr* t = mvs[0].t();
|
||||
if (!t) {
|
||||
sym_expr* t = mvs[0].t();
|
||||
if (!t || !t->is_char()) {
|
||||
return false;
|
||||
}
|
||||
seq.push_back(m_util.str.mk_unit(t));
|
||||
seq.push_back(m_util.str.mk_unit(t->get_char()));
|
||||
state = mvs[0].dst();
|
||||
mvs.reset();
|
||||
aut.get_moves_from(state, mvs, true);
|
||||
|
@ -727,7 +762,7 @@ bool seq_rewriter::is_sequence(expr* e, expr_ref_vector& seq) {
|
|||
br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
|
||||
scoped_ptr<eautomaton> aut;
|
||||
expr_ref_vector seq(m());
|
||||
if (!(aut = re2automaton(m())(b))) {
|
||||
if (!(aut = m_re2aut(b))) {
|
||||
return BR_FAILED;
|
||||
}
|
||||
|
||||
|
@ -769,8 +804,8 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
|
|||
aut->get_moves_from(state, mvs, false);
|
||||
for (unsigned j = 0; j < mvs.size(); ++j) {
|
||||
eautomaton::move const& mv = mvs[j];
|
||||
if (m().is_value(mv.t()) && m().is_value(ch)) {
|
||||
if (mv.t() == ch) {
|
||||
if (mv.t()->is_char() && m().is_value(mv.t()->get_char()) && m().is_value(ch)) {
|
||||
if (mv.t()->get_char() == ch) {
|
||||
add_next(next, mv.dst(), acc);
|
||||
}
|
||||
else {
|
||||
|
@ -778,7 +813,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
|
|||
}
|
||||
}
|
||||
else {
|
||||
cond = m().mk_eq(mv.t(), ch);
|
||||
cond = mv.t()->accept(ch);
|
||||
if (!m().is_true(acc)) cond = m().mk_and(acc, cond);
|
||||
add_next(next, mv.dst(), cond);
|
||||
}
|
||||
|
|
|
@ -26,11 +26,37 @@ Notes:
|
|||
#include"lbool.h"
|
||||
#include"automaton.h"
|
||||
|
||||
class sym_expr {
|
||||
bool m_is_pred;
|
||||
expr_ref m_t;
|
||||
unsigned m_ref;
|
||||
sym_expr(bool is_pred, expr_ref& t) : m_is_pred(is_pred), m_t(t), m_ref(0) {}
|
||||
public:
|
||||
expr_ref accept(expr* e);
|
||||
static sym_expr* mk_char(expr_ref& t) { return alloc(sym_expr, false, t); }
|
||||
static sym_expr* mk_char(ast_manager& m, expr* t) { expr_ref tr(t, m); return alloc(sym_expr, false, tr); }
|
||||
static sym_expr* mk_pred(expr_ref& t) { return alloc(sym_expr, true, t); }
|
||||
void inc_ref() { ++m_ref; }
|
||||
void dec_ref() { --m_ref; if (m_ref == 0) dealloc(this); }
|
||||
std::ostream& display(std::ostream& out) const;
|
||||
bool is_char() const { return !m_is_pred; }
|
||||
bool is_pred() const { return m_is_pred; }
|
||||
expr* get_char() const { SASSERT(is_char()); return m_t; }
|
||||
|
||||
typedef automaton<expr, ast_manager> eautomaton;
|
||||
};
|
||||
|
||||
class sym_expr_manager {
|
||||
public:
|
||||
void inc_ref(sym_expr* s) { if (s) s->inc_ref(); }
|
||||
void dec_ref(sym_expr* s) { if (s) s->dec_ref(); }
|
||||
};
|
||||
|
||||
typedef automaton<sym_expr, sym_expr_manager> eautomaton;
|
||||
class re2automaton {
|
||||
ast_manager& m;
|
||||
seq_util u;
|
||||
sym_expr_manager sm;
|
||||
seq_util u;
|
||||
bv_util bv;
|
||||
eautomaton* re2aut(expr* e);
|
||||
eautomaton* seq2aut(expr* e);
|
||||
public:
|
||||
|
@ -44,6 +70,7 @@ class re2automaton {
|
|||
class seq_rewriter {
|
||||
seq_util m_util;
|
||||
arith_util m_autil;
|
||||
re2automaton m_re2aut;
|
||||
expr_ref_vector m_es, m_lhs, m_rhs;
|
||||
|
||||
br_status mk_seq_concat(expr* a, expr* b, expr_ref& result);
|
||||
|
@ -80,7 +107,7 @@ class seq_rewriter {
|
|||
|
||||
public:
|
||||
seq_rewriter(ast_manager & m, params_ref const & p = params_ref()):
|
||||
m_util(m), m_autil(m), m_es(m), m_lhs(m), m_rhs(m) {
|
||||
m_util(m), m_autil(m), m_re2aut(m), m_es(m), m_lhs(m), m_rhs(m) {
|
||||
}
|
||||
ast_manager & m() const { return m_util.get_manager(); }
|
||||
family_id get_fid() const { return m_util.get_family_id(); }
|
||||
|
|
|
@ -22,12 +22,46 @@ Revision History:
|
|||
#include "ast_pp.h"
|
||||
#include <sstream>
|
||||
|
||||
static bool is_hex_digit(char ch, unsigned& d) {
|
||||
if ('0' <= ch && ch <= '9') {
|
||||
d = ch - '0';
|
||||
return true;
|
||||
}
|
||||
if ('A' <= ch && ch <= 'F') {
|
||||
d = 10 + ch - 'A';
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_escape_char(char const *& s, unsigned& result) {
|
||||
unsigned d1, d2;
|
||||
if (*s == '\\' && *(s + 1) == 'x' &&
|
||||
is_hex_digit(*(s + 2), d1) && is_hex_digit(*(s + 3), d2)) {
|
||||
result = d1*16 + d2;
|
||||
s += 4;
|
||||
return true;
|
||||
}
|
||||
if (*s == '\\' && *(s + 1) == '\\') {
|
||||
result = '\\';
|
||||
s += 2;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
zstring::zstring(encoding enc): m_encoding(enc) {}
|
||||
|
||||
zstring::zstring(char const* s, encoding enc): m_encoding(enc) {
|
||||
while (*s) {
|
||||
m_buffer.push_back(*s);
|
||||
++s;
|
||||
unsigned ch;
|
||||
if (is_escape_char(s, ch)) {
|
||||
m_buffer.push_back(ch);
|
||||
}
|
||||
else {
|
||||
m_buffer.push_back(*s);
|
||||
++s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,9 +114,10 @@ zstring zstring::replace(zstring const& src, zstring const& dst) const {
|
|||
return result;
|
||||
}
|
||||
|
||||
static const char esc_table[32][3] =
|
||||
{ "\\0", "^A", "^B", "^C", "^D", "^E", "^F", "\\a", "\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "^N",
|
||||
"^O", "^P", "^Q", "^R", "^S", "^T", "^U", "^V","^W","^X","^Y","^Z","\\e","^\\","^]","^^","^_"};
|
||||
static const char esc_table[32][6] =
|
||||
{ "\\x00", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07", "\\x08", "\\x09", "\\n", "\\v", "\\f", "\\r", "\\x0E", "\\x0F",
|
||||
"\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", "\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F"
|
||||
};
|
||||
|
||||
std::string zstring::encode() const {
|
||||
SASSERT(m_encoding == ascii);
|
||||
|
@ -91,7 +126,7 @@ std::string zstring::encode() const {
|
|||
unsigned char ch = m_buffer[i];
|
||||
if (0 <= ch && ch < 32) {
|
||||
strm << esc_table[ch];
|
||||
}
|
||||
}
|
||||
else if (ch == 127) {
|
||||
strm << "^?";
|
||||
}
|
||||
|
|
|
@ -98,6 +98,7 @@ public:
|
|||
zstring& operator=(zstring const& other);
|
||||
zstring replace(zstring const& src, zstring const& dst) const;
|
||||
unsigned num_bits() const { return (m_encoding==ascii)?8:16; }
|
||||
encoding get_encoding() const { return m_encoding; }
|
||||
std::string encode() const;
|
||||
unsigned length() const { return m_buffer.size(); }
|
||||
unsigned operator[](unsigned i) const { return m_buffer[i]; }
|
||||
|
@ -298,17 +299,19 @@ public:
|
|||
bool is_to_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); }
|
||||
bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_RE_CONCAT); }
|
||||
bool is_union(expr const* n) const { return is_app_of(n, m_fid, OP_RE_UNION); }
|
||||
bool is_inter(expr const* n) const { return is_app_of(n, m_fid, OP_RE_INTERSECT); }
|
||||
bool is_intersection(expr const* n) const { return is_app_of(n, m_fid, OP_RE_INTERSECT); }
|
||||
bool is_star(expr const* n) const { return is_app_of(n, m_fid, OP_RE_STAR); }
|
||||
bool is_plus(expr const* n) const { return is_app_of(n, m_fid, OP_RE_PLUS); }
|
||||
bool is_opt(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OPTION); }
|
||||
bool is_range(expr const* n) const { return is_app_of(n, m_fid, OP_RE_RANGE); }
|
||||
bool is_loop(expr const* n) const { return is_app_of(n, m_fid, OP_RE_LOOP); }
|
||||
|
||||
bool is_empty(expr const* n) const { return is_app_of(n, m_fid, OP_RE_EMPTY_SET); }
|
||||
bool is_full(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_SET); }
|
||||
MATCH_UNARY(is_to_re);
|
||||
MATCH_BINARY(is_concat);
|
||||
MATCH_BINARY(is_union);
|
||||
MATCH_BINARY(is_inter);
|
||||
MATCH_BINARY(is_intersection);
|
||||
MATCH_BINARY(is_range);
|
||||
MATCH_UNARY(is_star);
|
||||
MATCH_UNARY(is_plus);
|
||||
MATCH_UNARY(is_opt);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue