3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-08 10:25:18 +00:00

add range / loop handling for re. Fix regression reading mixed numerals reported by Trentin

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2016-01-13 00:49:31 -08:00
parent 9a6fe93e6c
commit 9909c056f0
8 changed files with 200 additions and 58 deletions

View file

@ -24,18 +24,35 @@ Notes:
#include"uint_set.h"
#include"automaton.h"
#include"well_sorted.h"
#include"var_subst.h"
expr_ref sym_expr::accept(expr* e) {
ast_manager& m = m_t.get_manager();
expr_ref result(m);
if (m_is_pred) {
var_subst subst(m);
subst(m_t, 1, &e, result);
}
else {
result = m.mk_eq(e, m_t);
}
return result;
}
std::ostream& sym_expr::display(std::ostream& out) const {
return out << m_t;
}
struct display_expr1 {
ast_manager& m;
display_expr1(ast_manager& m): m(m) {}
std::ostream& display(std::ostream& out, expr* e) const {
return out << mk_pp(e, m);
std::ostream& display(std::ostream& out, sym_expr* e) const {
return e->display(out);
}
};
re2automaton::re2automaton(ast_manager& m): m(m), u(m) {}
re2automaton::re2automaton(ast_manager& m): m(m), u(m), bv(m) {}
eautomaton* re2automaton::operator()(expr* e) {
eautomaton* r = re2aut(e);
@ -53,6 +70,7 @@ eautomaton* re2automaton::re2aut(expr* e) {
expr* e1, *e2;
scoped_ptr<eautomaton> a, b;
unsigned lo, hi;
zstring s1, s2;
if (u.re.is_to_re(e, e1)) {
return seq2aut(e1);
}
@ -76,12 +94,27 @@ eautomaton* re2automaton::re2aut(expr* e) {
a = eautomaton::mk_opt(*a);
return a.detach();
}
else if (u.re.is_range(e)) {
// TBD
else if (u.re.is_range(e, e1, e2)) {
if (u.str.is_string(e1, s1) && u.str.is_string(e2, s2) &&
s1.length() == 1 && s2.length() == 1) {
unsigned start = s1[0];
unsigned stop = s2[0];
unsigned nb = s1.num_bits();
sort_ref s(bv.mk_sort(nb), m);
expr_ref v(m.mk_var(0, s), m);
expr_ref _start(bv.mk_numeral(start, nb), m);
expr_ref _stop(bv.mk_numeral(stop, nb), m);
expr_ref cond(m.mk_and(bv.mk_ule(_start, v), bv.mk_ule(v, _stop)), m);
a = alloc(eautomaton, sm, sym_expr::mk_pred(cond));
return a.detach();
}
else {
TRACE("seq", tout << "Range expression is not handled: " << mk_pp(e, m) << "\n";);
}
}
else if (u.re.is_loop(e, e1, lo, hi) && (a = re2aut(e1))) {
scoped_ptr<eautomaton> eps = eautomaton::mk_epsilon(m);
b = eautomaton::mk_epsilon(m);
scoped_ptr<eautomaton> eps = eautomaton::mk_epsilon(sm);
b = eautomaton::mk_epsilon(sm);
while (hi > lo) {
scoped_ptr<eautomaton> c = eautomaton::mk_concat(*a, *b);
b = eautomaton::mk_union(*eps, *c);
@ -94,10 +127,12 @@ eautomaton* re2automaton::re2aut(expr* e) {
return b.detach();
}
#if 0
else if (u.re.is_intersect(e, e1, e2)) {
}
else if (u.re.is_empty(e)) {
return alloc(eautomaton, m);
}
else if (u.re.is_full(e)) {
}
else if (u.re.is_intersect(e, e1, e2)) {
}
#endif
@ -114,10 +149,10 @@ eautomaton* re2automaton::seq2aut(expr* e) {
return eautomaton::mk_concat(*a, *b);
}
else if (u.str.is_unit(e, e1)) {
return alloc(eautomaton, m, e1);
return alloc(eautomaton, sm, sym_expr::mk_char(m, e1));
}
else if (u.str.is_empty(e)) {
return eautomaton::mk_epsilon(m);
return eautomaton::mk_epsilon(sm);
}
else if (u.str.is_string(e, s)) {
unsigned init = 0;
@ -126,9 +161,9 @@ eautomaton* re2automaton::seq2aut(expr* e) {
final.push_back(s.length());
for (unsigned k = 0; k < s.length(); ++k) {
// reference count?
mvs.push_back(eautomaton::move(m, k, k+1, u.str.mk_char(s, k)));
mvs.push_back(eautomaton::move(sm, k, k+1, sym_expr::mk_char(m, u.str.mk_char(s, k))));
}
return alloc(eautomaton, m, init, final, mvs);
return alloc(eautomaton, sm, init, final, mvs);
}
return 0;
}
@ -679,11 +714,11 @@ bool seq_rewriter::is_sequence(eautomaton& aut, expr_ref_vector& seq) {
return false;
}
visited.insert(state);
expr* t = mvs[0].t();
if (!t) {
sym_expr* t = mvs[0].t();
if (!t || !t->is_char()) {
return false;
}
seq.push_back(m_util.str.mk_unit(t));
seq.push_back(m_util.str.mk_unit(t->get_char()));
state = mvs[0].dst();
mvs.reset();
aut.get_moves_from(state, mvs, true);
@ -727,7 +762,7 @@ bool seq_rewriter::is_sequence(expr* e, expr_ref_vector& seq) {
br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
scoped_ptr<eautomaton> aut;
expr_ref_vector seq(m());
if (!(aut = re2automaton(m())(b))) {
if (!(aut = m_re2aut(b))) {
return BR_FAILED;
}
@ -769,8 +804,8 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
aut->get_moves_from(state, mvs, false);
for (unsigned j = 0; j < mvs.size(); ++j) {
eautomaton::move const& mv = mvs[j];
if (m().is_value(mv.t()) && m().is_value(ch)) {
if (mv.t() == ch) {
if (mv.t()->is_char() && m().is_value(mv.t()->get_char()) && m().is_value(ch)) {
if (mv.t()->get_char() == ch) {
add_next(next, mv.dst(), acc);
}
else {
@ -778,7 +813,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
}
}
else {
cond = m().mk_eq(mv.t(), ch);
cond = mv.t()->accept(ch);
if (!m().is_true(acc)) cond = m().mk_and(acc, cond);
add_next(next, mv.dst(), cond);
}

View file

@ -26,11 +26,37 @@ Notes:
#include"lbool.h"
#include"automaton.h"
class sym_expr {
bool m_is_pred;
expr_ref m_t;
unsigned m_ref;
sym_expr(bool is_pred, expr_ref& t) : m_is_pred(is_pred), m_t(t), m_ref(0) {}
public:
expr_ref accept(expr* e);
static sym_expr* mk_char(expr_ref& t) { return alloc(sym_expr, false, t); }
static sym_expr* mk_char(ast_manager& m, expr* t) { expr_ref tr(t, m); return alloc(sym_expr, false, tr); }
static sym_expr* mk_pred(expr_ref& t) { return alloc(sym_expr, true, t); }
void inc_ref() { ++m_ref; }
void dec_ref() { --m_ref; if (m_ref == 0) dealloc(this); }
std::ostream& display(std::ostream& out) const;
bool is_char() const { return !m_is_pred; }
bool is_pred() const { return m_is_pred; }
expr* get_char() const { SASSERT(is_char()); return m_t; }
typedef automaton<expr, ast_manager> eautomaton;
};
class sym_expr_manager {
public:
void inc_ref(sym_expr* s) { if (s) s->inc_ref(); }
void dec_ref(sym_expr* s) { if (s) s->dec_ref(); }
};
typedef automaton<sym_expr, sym_expr_manager> eautomaton;
class re2automaton {
ast_manager& m;
seq_util u;
sym_expr_manager sm;
seq_util u;
bv_util bv;
eautomaton* re2aut(expr* e);
eautomaton* seq2aut(expr* e);
public:
@ -44,6 +70,7 @@ class re2automaton {
class seq_rewriter {
seq_util m_util;
arith_util m_autil;
re2automaton m_re2aut;
expr_ref_vector m_es, m_lhs, m_rhs;
br_status mk_seq_concat(expr* a, expr* b, expr_ref& result);
@ -80,7 +107,7 @@ class seq_rewriter {
public:
seq_rewriter(ast_manager & m, params_ref const & p = params_ref()):
m_util(m), m_autil(m), m_es(m), m_lhs(m), m_rhs(m) {
m_util(m), m_autil(m), m_re2aut(m), m_es(m), m_lhs(m), m_rhs(m) {
}
ast_manager & m() const { return m_util.get_manager(); }
family_id get_fid() const { return m_util.get_family_id(); }

View file

@ -22,12 +22,46 @@ Revision History:
#include "ast_pp.h"
#include <sstream>
static bool is_hex_digit(char ch, unsigned& d) {
if ('0' <= ch && ch <= '9') {
d = ch - '0';
return true;
}
if ('A' <= ch && ch <= 'F') {
d = 10 + ch - 'A';
return true;
}
return false;
}
static bool is_escape_char(char const *& s, unsigned& result) {
unsigned d1, d2;
if (*s == '\\' && *(s + 1) == 'x' &&
is_hex_digit(*(s + 2), d1) && is_hex_digit(*(s + 3), d2)) {
result = d1*16 + d2;
s += 4;
return true;
}
if (*s == '\\' && *(s + 1) == '\\') {
result = '\\';
s += 2;
return true;
}
return false;
}
zstring::zstring(encoding enc): m_encoding(enc) {}
zstring::zstring(char const* s, encoding enc): m_encoding(enc) {
while (*s) {
m_buffer.push_back(*s);
++s;
unsigned ch;
if (is_escape_char(s, ch)) {
m_buffer.push_back(ch);
}
else {
m_buffer.push_back(*s);
++s;
}
}
}
@ -80,9 +114,10 @@ zstring zstring::replace(zstring const& src, zstring const& dst) const {
return result;
}
static const char esc_table[32][3] =
{ "\\0", "^A", "^B", "^C", "^D", "^E", "^F", "\\a", "\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "^N",
"^O", "^P", "^Q", "^R", "^S", "^T", "^U", "^V","^W","^X","^Y","^Z","\\e","^\\","^]","^^","^_"};
static const char esc_table[32][6] =
{ "\\x00", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07", "\\x08", "\\x09", "\\n", "\\v", "\\f", "\\r", "\\x0E", "\\x0F",
"\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", "\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F"
};
std::string zstring::encode() const {
SASSERT(m_encoding == ascii);
@ -91,7 +126,7 @@ std::string zstring::encode() const {
unsigned char ch = m_buffer[i];
if (0 <= ch && ch < 32) {
strm << esc_table[ch];
}
}
else if (ch == 127) {
strm << "^?";
}

View file

@ -98,6 +98,7 @@ public:
zstring& operator=(zstring const& other);
zstring replace(zstring const& src, zstring const& dst) const;
unsigned num_bits() const { return (m_encoding==ascii)?8:16; }
encoding get_encoding() const { return m_encoding; }
std::string encode() const;
unsigned length() const { return m_buffer.size(); }
unsigned operator[](unsigned i) const { return m_buffer[i]; }
@ -298,17 +299,19 @@ public:
bool is_to_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_TO_RE); }
bool is_concat(expr const* n) const { return is_app_of(n, m_fid, OP_RE_CONCAT); }
bool is_union(expr const* n) const { return is_app_of(n, m_fid, OP_RE_UNION); }
bool is_inter(expr const* n) const { return is_app_of(n, m_fid, OP_RE_INTERSECT); }
bool is_intersection(expr const* n) const { return is_app_of(n, m_fid, OP_RE_INTERSECT); }
bool is_star(expr const* n) const { return is_app_of(n, m_fid, OP_RE_STAR); }
bool is_plus(expr const* n) const { return is_app_of(n, m_fid, OP_RE_PLUS); }
bool is_opt(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OPTION); }
bool is_range(expr const* n) const { return is_app_of(n, m_fid, OP_RE_RANGE); }
bool is_loop(expr const* n) const { return is_app_of(n, m_fid, OP_RE_LOOP); }
bool is_empty(expr const* n) const { return is_app_of(n, m_fid, OP_RE_EMPTY_SET); }
bool is_full(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_SET); }
MATCH_UNARY(is_to_re);
MATCH_BINARY(is_concat);
MATCH_BINARY(is_union);
MATCH_BINARY(is_inter);
MATCH_BINARY(is_intersection);
MATCH_BINARY(is_range);
MATCH_UNARY(is_star);
MATCH_UNARY(is_plus);
MATCH_UNARY(is_opt);

View file

@ -151,7 +151,7 @@ public:
m_final_states(other.m_final_states)
{}
// create the automaton that accepts the empty string only.
// create the automaton that accepts the empty string/sequence only.
static automaton* mk_epsilon(M& m) {
moves mvs;
unsigned_vector final;
@ -168,10 +168,24 @@ public:
return alloc(automaton, m, 0, final, mvs);
}
static automaton* clone(automaton const& a) {
moves mvs;
unsigned_vector final;
append_moves(0, a, mvs);
append_final(0, a, final);
return alloc(automaton, a.m, a.init(), final, mvs);
}
// create the sum of disjoint automata
static automaton* mk_union(automaton const& a, automaton const& b) {
SASSERT(&a.m == &b.m);
M& m = a.m;
if (a.is_empty()) {
return clone(b);
}
if (b.is_empty()) {
return clone(a);
}
moves mvs;
unsigned_vector final;
unsigned offset1 = 1;
@ -196,6 +210,10 @@ public:
init = 0;
mvs.push_back(move(m, 0, a.init() + offset));
}
if (a.is_empty()) {
return clone(a);
}
mvs.push_back(move(m, init, a.final_state() + offset));
append_moves(offset, a, mvs);
append_final(offset, a, final);
@ -206,6 +224,19 @@ public:
static automaton* mk_concat(automaton const& a, automaton const& b) {
SASSERT(&a.m == &b.m);
M& m = a.m;
if (a.is_empty()) {
return clone(a);
}
if (b.is_empty()) {
return clone(b);
}
if (a.is_epsilon()) {
return clone(b);
}
if (b.is_epsilon()) {
return clone(a);
}
moves mvs;
unsigned_vector final;
unsigned init = 0;
@ -295,6 +326,10 @@ public:
// src - e -> dst - ET -> Dst1 => src - ET -> Dst1 if in_degree(dst) = 1, src != dst
// Src - E -> dst - et -> dst1 => Src - et -> dst1 if out_degree(dst) = 1, src != dst
//
// Some missing:
// src - et -> dst - E -> Dst1 => src - et -> Dst1 if in_degree(dst) = 1
// Src - ET -> dst - e -> dst1 => Src - ET -> dst1 if out_degree(dst) = 1,
//
void compress() {
for (unsigned i = 0; i < m_delta.size(); ++i) {
for (unsigned j = 0; j < m_delta[i].size(); ++j) {
@ -446,6 +481,7 @@ public:
}
bool is_empty() const { return m_final_states.empty(); }
bool is_epsilon() const { return m_final_states.size() == 1 && m_final_states.back() == init() && m_delta.empty(); }
unsigned final_state() const { return m_final_states[0]; }
bool has_single_final_sink() const { return m_final_states.size() == 1 && m_delta[final_state()].empty(); }
unsigned num_states() const { return m_delta.size(); }

View file

@ -399,7 +399,7 @@ namespace smt2 {
void check_keyword(char const * msg) { if (!curr_is_keyword()) throw parser_exception(msg); }
void check_string(char const * msg) { if (!curr_is_string()) throw parser_exception(msg); }
void check_int(char const * msg) { if (!curr_is_int()) throw parser_exception(msg); }
void check_int_or_float(char const * msg) { if (!curr_is_int() || !curr_is_float()) throw parser_exception(msg); }
void check_int_or_float(char const * msg) { if (!curr_is_int() && !curr_is_float()) throw parser_exception(msg); }
void check_float(char const * msg) { if (!curr_is_float()) throw parser_exception(msg); }
void error(unsigned line, unsigned pos, char const * msg) {
@ -2341,7 +2341,7 @@ namespace smt2 {
break;
}
case CPK_NUMERAL:
check_int("invalid command argument, numeral expected");
check_int_or_float("invalid command argument, numeral expected");
m_curr_cmd->set_next_arg(m_ctx, curr_numeral());
next();
break;

View file

@ -31,8 +31,8 @@ using namespace smt;
struct display_expr {
ast_manager& m;
display_expr(ast_manager& m): m(m) {}
std::ostream& display(std::ostream& out, expr* e) const {
return out << mk_pp(e, m);
std::ostream& display(std::ostream& out, sym_expr* e) const {
return e->display(out);
}
};
@ -169,7 +169,8 @@ theory_seq::theory_seq(ast_manager& m):
m_lhs(m), m_rhs(m),
m_atoms_qhead(0),
m_new_solution(false),
m_new_propagation(false) {
m_new_propagation(false),
m_mk_aut(m) {
m_prefix = "seq.prefix.suffix";
m_suffix = "seq.suffix.prefix";
m_contains_left = "seq.contains.left";
@ -1176,7 +1177,9 @@ void theory_seq::display(std::ostream & out) const {
for (; it != end; ++it) {
out << mk_pp(it->m_key, m) << "\n";
display_expr disp(m);
it->m_value->display(out, disp);
if (it->m_value) {
it->m_value->display(out, disp);
}
}
}
if (!m_rep.empty()) {
@ -1952,11 +1955,10 @@ void theory_seq::add_at_axiom(expr* e) {
void theory_seq::propagate_step(literal lit, expr* step) {
context& ctx = get_context();
SASSERT(ctx.get_assignment(lit) == l_true);
expr* re, *t, *s, *idx, *i, *j;
VERIFY(is_step(step, s, idx, re, i, j, t));
expr_ref nth = mk_nth(s, idx);
TRACE("seq", tout << mk_pp(step, m) << " -> " << mk_pp(t, m) << " = " << nth << "\n";);
propagate_eq(lit, t, nth);
expr* re, *acc, *s, *idx, *i, *j;
VERIFY(is_step(step, s, idx, re, i, j, acc));
TRACE("seq", tout << mk_pp(step, m) << " -> " << mk_pp(acc, m) << "\n";);
propagate_lit(0, 1, &lit, mk_literal(acc));
rational lo;
rational _idx;
if (lower_bound(s, lo) && lo.is_unsigned() && m_autil.is_numeral(idx, _idx) && lo >= _idx) {
@ -2250,15 +2252,14 @@ eautomaton* theory_seq::get_automaton(expr* re) {
if (m_re2aut.find(re, result)) {
return result;
}
result = re2automaton(m)(re);
result = m_mk_aut(re);
if (result) {
display_expr disp(m);
TRACE("seq", result->display(tout, disp););
}
if (result) {
m_automata.push_back(result);
m_trail_stack.push(push_back_vector<theory_seq, scoped_ptr_vector<eautomaton> >(m_automata));
}
m_automata.push_back(result);
m_trail_stack.push(push_back_vector<theory_seq, scoped_ptr_vector<eautomaton> >(m_automata));
m_re2aut.insert(re, result);
m_trail_stack.push(insert_obj_map<theory_seq, expr, eautomaton*>(m_re2aut, re));
return result;
@ -2281,10 +2282,11 @@ bool theory_seq::is_acc_rej(symbol const& ar, expr* e, expr*& s, expr*& idx, exp
s = to_app(e)->get_arg(0);
idx = to_app(e)->get_arg(1);
re = to_app(e)->get_arg(2);
TRACE("seq", tout << mk_pp(re, m) << "\n";);
VERIFY(m_autil.is_numeral(to_app(e)->get_arg(3), r));
SASSERT(r.is_unsigned());
i = r.get_unsigned();
aut = m_re2aut[re];
aut = get_automaton(re);
return true;
}
else {
@ -2311,12 +2313,13 @@ bool theory_seq::is_step(expr* e, expr*& s, expr*& idx, expr*& re, expr*& i, exp
}
}
expr_ref theory_seq::mk_step(expr* s, expr* idx, expr* re, unsigned i, unsigned j, expr* t) {
expr_ref theory_seq::mk_step(expr* s, expr* idx, expr* re, unsigned i, unsigned j, expr* acc) {
SASSERT(m.is_bool(acc));
expr_ref_vector args(m);
args.push_back(s).push_back(idx).push_back(re);
args.push_back(m_autil.mk_int(i));
args.push_back(m_autil.mk_int(j));
args.push_back(t);
args.push_back(acc);
return expr_ref(m_util.mk_skolem(m_aut_step, args.size(), args.c_ptr(), m.mk_bool_sort()), m);
}
@ -2390,7 +2393,9 @@ bool theory_seq::add_accept2step(expr* acc) {
for (unsigned i = 0; i < mvs.size(); ++i) {
unsigned j = (i + start) % mvs.size();
eautomaton::move mv = mvs[j];
step = mk_step(e, idx, re, src, mv.dst(), mv.t());
expr_ref nth = mk_nth(e, idx);
expr_ref acc = mv.t()->accept(nth);
step = mk_step(e, idx, re, src, mv.dst(), acc);
lits.push_back(mk_literal(step));
switch (ctx.get_assignment(lits.back())) {
case l_true:
@ -2433,8 +2438,8 @@ bool theory_seq::add_accept2step(expr* acc) {
bool theory_seq::add_step2accept(expr* step) {
context& ctx = get_context();
SASSERT(ctx.get_assignment(step) == l_true);
expr* re, *t, *s, *idx, *i, *j;
VERIFY(is_step(step, s, idx, re, i, j, t));
expr* re, *_acc, *s, *idx, *i, *j;
VERIFY(is_step(step, s, idx, re, i, j, _acc));
literal acc1 = mk_accept(s, idx, re, i);
switch (ctx.get_assignment(acc1)) {
case l_false:
@ -2508,7 +2513,7 @@ bool theory_seq::add_reject2reject(expr* rej) {
bool has_undef = false;
for (unsigned i = 0; i < mvs.size(); ++i) {
eautomaton::move const& mv = mvs[i];
literal eq = mk_eq(nth, mv.t(), false);
literal eq = mk_literal(mv.t()->accept(nth));
switch (ctx.get_assignment(eq)) {
case l_false:
case l_true:

View file

@ -278,6 +278,7 @@ namespace smt {
unsigned m_atoms_qhead;
bool m_new_solution; // new solution added
bool m_new_propagation; // new propagation to core
re2automaton m_mk_aut;
virtual final_check_status final_check_eh();
virtual bool internalize_atom(app* atom, bool) { return internalize_term(atom); }
@ -411,7 +412,7 @@ namespace smt {
return is_acc_rej(m_reject, rej, s, idx, re, i, aut);
}
bool is_acc_rej(symbol const& ar, expr* e, expr*& s, expr*& idx, expr*& re, unsigned& i, eautomaton*& aut);
expr_ref mk_step(expr* s, expr* tail, expr* re, unsigned i, unsigned j, expr* t);
expr_ref mk_step(expr* s, expr* tail, expr* re, unsigned i, unsigned j, expr* acc);
bool is_step(expr* e, expr*& s, expr*& tail, expr*& re, expr*& i, expr*& j, expr*& t) const;
bool is_step(expr* e) const;
void propagate_step(literal lit, expr* n);