3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-13 20:38:43 +00:00
z3/src/ast/rewriter/seq_rewriter.cpp
Nikolaj Bjorner b02c698284 align variable names with dimacs input
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2018-11-08 16:52:10 -08:00

2145 lines
64 KiB
C++

/*++
Copyright (c) 2015 Microsoft Corporation
Module Name:
seq_rewriter.cpp
Abstract:
Basic rewriting rules for sequences constraints.
Author:
Nikolaj Bjorner (nbjorner) 2015-12-5
Murphy Berzish 2017-02-21
Notes:
--*/
#include "ast/rewriter/seq_rewriter.h"
#include "ast/arith_decl_plugin.h"
#include "ast/ast_pp.h"
#include "ast/ast_util.h"
#include "util/uint_set.h"
#include "math/automata/automaton.h"
#include "ast/well_sorted.h"
#include "ast/rewriter/var_subst.h"
#include "ast/rewriter/bool_rewriter.h"
#include "math/automata/symbolic_automata_def.h"
expr_ref sym_expr::accept(expr* e) {
ast_manager& m = m_t.get_manager();
expr_ref result(m);
switch (m_ty) {
case t_pred: {
var_subst subst(m);
result = subst(m_t, 1, &e);
break;
}
case t_char:
SASSERT(m.get_sort(e) == m.get_sort(m_t));
SASSERT(m.get_sort(e) == m_sort);
result = m.mk_eq(e, m_t);
break;
case t_range: {
bv_util bv(m);
rational r1, r2, r3;
unsigned sz;
if (bv.is_numeral(m_t, r1, sz) && bv.is_numeral(e, r2, sz) && bv.is_numeral(m_s, r3, sz)) {
result = m.mk_bool_val((r1 <= r2) && (r2 <= r3));
}
else {
result = m.mk_and(bv.mk_ule(m_t, e), bv.mk_ule(e, m_s));
}
break;
}
}
return result;
}
std::ostream& sym_expr::display(std::ostream& out) const {
switch (m_ty) {
case t_char: return out << m_t;
case t_range: return out << m_t << ":" << m_s;
case t_pred: return out << m_t;
}
return out << "expression type not recognized";
}
struct display_expr1 {
ast_manager& m;
display_expr1(ast_manager& m): m(m) {}
std::ostream& display(std::ostream& out, sym_expr* e) const {
return e->display(out);
}
};
class sym_expr_boolean_algebra : public boolean_algebra<sym_expr*> {
ast_manager& m;
expr_solver& m_solver;
typedef sym_expr* T;
public:
sym_expr_boolean_algebra(ast_manager& m, expr_solver& s):
m(m), m_solver(s) {}
T mk_false() override {
expr_ref fml(m.mk_false(), m);
return sym_expr::mk_pred(fml, m.mk_bool_sort()); // use of Bool sort for bound variable is arbitrary
}
T mk_true() override {
expr_ref fml(m.mk_true(), m);
return sym_expr::mk_pred(fml, m.mk_bool_sort());
}
T mk_and(T x, T y) override {
if (x->is_char() && y->is_char()) {
if (x->get_char() == y->get_char()) {
return x;
}
if (m.are_distinct(x->get_char(), y->get_char())) {
expr_ref fml(m.mk_false(), m);
return sym_expr::mk_pred(fml, x->get_sort());
}
}
sort* s = x->get_sort();
if (m.is_bool(s)) s = y->get_sort();
var_ref v(m.mk_var(0, s), m);
expr_ref fml1 = x->accept(v);
expr_ref fml2 = y->accept(v);
if (m.is_true(fml1)) {
return y;
}
if (m.is_true(fml2)) return x;
if (fml1 == fml2) return x;
bool_rewriter br(m);
expr_ref fml(m);
br.mk_and(fml1, fml2, fml);
return sym_expr::mk_pred(fml, x->get_sort());
}
T mk_or(T x, T y) override {
if (x->is_char() && y->is_char() &&
x->get_char() == y->get_char()) {
return x;
}
if (x == y) return x;
var_ref v(m.mk_var(0, x->get_sort()), m);
expr_ref fml1 = x->accept(v);
expr_ref fml2 = y->accept(v);
if (m.is_false(fml1)) return y;
if (m.is_false(fml2)) return x;
bool_rewriter br(m);
expr_ref fml(m);
br.mk_or(fml1, fml2, fml);
return sym_expr::mk_pred(fml, x->get_sort());
}
T mk_and(unsigned sz, T const* ts) override {
switch (sz) {
case 0: return mk_true();
case 1: return ts[0];
default: {
T t = ts[0];
for (unsigned i = 1; i < sz; ++i) {
t = mk_and(t, ts[i]);
}
return t;
}
}
}
T mk_or(unsigned sz, T const* ts) override {
switch (sz) {
case 0: return mk_false();
case 1: return ts[0];
default: {
T t = ts[0];
for (unsigned i = 1; i < sz; ++i) {
t = mk_or(t, ts[i]);
}
return t;
}
}
}
lbool is_sat(T x) override {
if (x->is_char()) {
return l_true;
}
if (x->is_range()) {
// TBD check lower is below upper.
}
expr_ref v(m.mk_fresh_const("x", x->get_sort()), m);
expr_ref fml = x->accept(v);
if (m.is_true(fml)) {
return l_true;
}
if (m.is_false(fml)) {
return l_false;
}
return m_solver.check_sat(fml);
}
T mk_not(T x) override {
var_ref v(m.mk_var(0, x->get_sort()), m);
expr_ref fml(m.mk_not(x->accept(v)), m);
return sym_expr::mk_pred(fml, x->get_sort());
}
/*virtual vector<std::pair<vector<bool>, T>> generate_min_terms(vector<T> constraints){
return 0;
}*/
};
re2automaton::re2automaton(ast_manager& m): m(m), u(m), bv(m), m_ba(nullptr), m_sa(nullptr) {}
re2automaton::~re2automaton() {}
void re2automaton::set_solver(expr_solver* solver) {
m_solver = solver;
m_ba = alloc(sym_expr_boolean_algebra, m, *solver);
m_sa = alloc(symbolic_automata_t, sm, *m_ba.get());
}
eautomaton* re2automaton::mk_product(eautomaton* a1, eautomaton* a2) {
return m_sa->mk_product(*a1, *a2);
}
eautomaton* re2automaton::operator()(expr* e) {
eautomaton* r = re2aut(e);
if (r) {
r->compress();
bool_rewriter br(m);
TRACE("seq", display_expr1 disp(m); r->display(tout, disp););
}
return r;
}
eautomaton* re2automaton::re2aut(expr* e) {
SASSERT(u.is_re(e));
expr *e0, *e1, *e2;
scoped_ptr<eautomaton> a, b;
unsigned lo, hi;
zstring s1, s2;
if (u.re.is_to_re(e, e1)) {
return seq2aut(e1);
}
else if (u.re.is_concat(e, e1, e2) && (a = re2aut(e1)) && (b = re2aut(e2))) {
return eautomaton::mk_concat(*a, *b);
}
else if (u.re.is_union(e, e1, e2) && (a = re2aut(e1)) && (b = re2aut(e2))) {
return eautomaton::mk_union(*a, *b);
}
else if (u.re.is_star(e, e1) && (a = re2aut(e1))) {
a->add_final_to_init_moves();
a->add_init_to_final_states();
return a.detach();
}
else if (u.re.is_plus(e, e1) && (a = re2aut(e1))) {
a->add_final_to_init_moves();
return a.detach();
}
else if (u.re.is_opt(e, e1) && (a = re2aut(e1))) {
a = eautomaton::mk_opt(*a);
return a.detach();
}
else if (u.re.is_range(e, e1, e2)) {
if (u.str.is_string(e1, s1) && u.str.is_string(e2, s2) &&
s1.length() == 1 && s2.length() == 1) {
unsigned start = s1[0];
unsigned stop = s2[0];
unsigned nb = s1.num_bits();
expr_ref _start(bv.mk_numeral(start, nb), m);
expr_ref _stop(bv.mk_numeral(stop, nb), m);
TRACE("seq", tout << "Range: " << start << " " << stop << "\n";);
a = alloc(eautomaton, sm, sym_expr::mk_range(_start, _stop));
return a.detach();
}
else {
TRACE("seq", tout << "Range expression is not handled: " << mk_pp(e, m) << "\n";);
}
}
else if (u.re.is_complement(e, e0) && (a = re2aut(e0)) && m_sa) {
return m_sa->mk_complement(*a);
}
else if (u.re.is_loop(e, e1, lo, hi) && (a = re2aut(e1))) {
scoped_ptr<eautomaton> eps = eautomaton::mk_epsilon(sm);
b = eautomaton::mk_epsilon(sm);
while (hi > lo) {
scoped_ptr<eautomaton> c = eautomaton::mk_concat(*a, *b);
b = eautomaton::mk_union(*eps, *c);
--hi;
}
while (lo > 0) {
b = eautomaton::mk_concat(*a, *b);
--lo;
}
return b.detach();
}
else if (u.re.is_loop(e, e1, lo) && (a = re2aut(e1))) {
b = eautomaton::clone(*a);
b->add_final_to_init_moves();
b->add_init_to_final_states();
while (lo > 0) {
b = eautomaton::mk_concat(*a, *b);
--lo;
}
return b.detach();
}
else if (u.re.is_empty(e)) {
return alloc(eautomaton, sm);
}
else if (u.re.is_full_seq(e)) {
expr_ref tt(m.mk_true(), m);
sort *seq_s = nullptr, *char_s = nullptr;
VERIFY (u.is_re(m.get_sort(e), seq_s));
VERIFY (u.is_seq(seq_s, char_s));
sym_expr* _true = sym_expr::mk_pred(tt, char_s);
return eautomaton::mk_loop(sm, _true);
}
else if (u.re.is_full_char(e)) {
expr_ref tt(m.mk_true(), m);
sort *seq_s = nullptr, *char_s = nullptr;
VERIFY (u.is_re(m.get_sort(e), seq_s));
VERIFY (u.is_seq(seq_s, char_s));
sym_expr* _true = sym_expr::mk_pred(tt, char_s);
a = alloc(eautomaton, sm, _true);
return a.detach();
}
else if (u.re.is_intersection(e, e1, e2) && m_sa && (a = re2aut(e1)) && (b = re2aut(e2))) {
return m_sa->mk_product(*a, *b);
}
return nullptr;
}
eautomaton* re2automaton::seq2aut(expr* e) {
SASSERT(u.is_seq(e));
zstring s;
expr* e1, *e2;
scoped_ptr<eautomaton> a, b;
if (u.str.is_concat(e, e1, e2) && (a = seq2aut(e1)) && (b = seq2aut(e2))) {
return eautomaton::mk_concat(*a, *b);
}
else if (u.str.is_unit(e, e1)) {
return alloc(eautomaton, sm, sym_expr::mk_char(m, e1));
}
else if (u.str.is_empty(e)) {
return eautomaton::mk_epsilon(sm);
}
else if (u.str.is_string(e, s)) {
unsigned init = 0;
eautomaton::moves mvs;
unsigned_vector final;
final.push_back(s.length());
for (unsigned k = 0; k < s.length(); ++k) {
// reference count?
mvs.push_back(eautomaton::move(sm, k, k+1, sym_expr::mk_char(m, u.str.mk_char(s, k))));
}
return alloc(eautomaton, sm, init, final, mvs);
}
return nullptr;
}
br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result) {
SASSERT(f->get_family_id() == get_fid());
switch(f->get_decl_kind()) {
case OP_SEQ_UNIT:
SASSERT(num_args == 1);
return mk_seq_unit(args[0], result);
case OP_SEQ_EMPTY:
return BR_FAILED;
case OP_RE_PLUS:
SASSERT(num_args == 1);
return mk_re_plus(args[0], result);
case OP_RE_STAR:
SASSERT(num_args == 1);
return mk_re_star(args[0], result);
case OP_RE_OPTION:
SASSERT(num_args == 1);
return mk_re_opt(args[0], result);
case OP_RE_CONCAT:
if (num_args == 1) {
result = args[0]; return BR_DONE;
}
SASSERT(num_args == 2);
return mk_re_concat(args[0], args[1], result);
case OP_RE_UNION:
if (num_args == 1) {
result = args[0]; return BR_DONE;
}
SASSERT(num_args == 2);
return mk_re_union(args[0], args[1], result);
case OP_RE_RANGE:
SASSERT(num_args == 2);
return mk_re_range(args[0], args[1], result);
case OP_RE_INTERSECT:
SASSERT(num_args == 2);
return mk_re_inter(args[0], args[1], result);
case OP_RE_COMPLEMENT:
SASSERT(num_args == 1);
return mk_re_complement(args[0], result);
case OP_RE_LOOP:
return mk_re_loop(num_args, args, result);
case OP_RE_EMPTY_SET:
return BR_FAILED;
case OP_RE_FULL_SEQ_SET:
return BR_FAILED;
case OP_RE_FULL_CHAR_SET:
return BR_FAILED;
case OP_RE_OF_PRED:
return BR_FAILED;
case _OP_SEQ_SKOLEM:
return BR_FAILED;
case OP_SEQ_CONCAT:
if (num_args == 1) {
result = args[0];
return BR_DONE;
}
SASSERT(num_args == 2);
return mk_seq_concat(args[0], args[1], result);
case OP_SEQ_LENGTH:
SASSERT(num_args == 1);
return mk_seq_length(args[0], result);
case OP_SEQ_EXTRACT:
SASSERT(num_args == 3);
return mk_seq_extract(args[0], args[1], args[2], result);
case OP_SEQ_CONTAINS:
SASSERT(num_args == 2);
return mk_seq_contains(args[0], args[1], result);
case OP_SEQ_AT:
SASSERT(num_args == 2);
return mk_seq_at(args[0], args[1], result);
case OP_SEQ_PREFIX:
SASSERT(num_args == 2);
return mk_seq_prefix(args[0], args[1], result);
case OP_SEQ_SUFFIX:
SASSERT(num_args == 2);
return mk_seq_suffix(args[0], args[1], result);
case OP_SEQ_INDEX:
if (num_args == 2) {
expr_ref arg3(m_autil.mk_int(0), m());
result = m_util.str.mk_index(args[0], args[1], arg3);
return BR_REWRITE1;
}
SASSERT(num_args == 3);
return mk_seq_index(args[0], args[1], args[2], result);
case OP_SEQ_REPLACE:
SASSERT(num_args == 3);
return mk_seq_replace(args[0], args[1], args[2], result);
case OP_SEQ_TO_RE:
SASSERT(num_args == 1);
return mk_str_to_regexp(args[0], result);
case OP_SEQ_IN_RE:
SASSERT(num_args == 2);
return mk_str_in_regexp(args[0], args[1], result);
case OP_STRING_CONST:
return BR_FAILED;
case OP_STRING_ITOS:
SASSERT(num_args == 1);
return mk_str_itos(args[0], result);
case OP_STRING_STOI:
SASSERT(num_args == 1);
return mk_str_stoi(args[0], result);
case _OP_STRING_CONCAT:
case _OP_STRING_PREFIX:
case _OP_STRING_SUFFIX:
case _OP_STRING_STRCTN:
case _OP_STRING_LENGTH:
case _OP_STRING_CHARAT:
case _OP_STRING_IN_REGEXP:
case _OP_STRING_TO_REGEXP:
case _OP_STRING_SUBSTR:
case _OP_STRING_STRREPL:
case _OP_STRING_STRIDOF:
UNREACHABLE();
}
return BR_FAILED;
}
/*
* (seq.unit (_ BitVector 8)) ==> String constant
*/
br_status seq_rewriter::mk_seq_unit(expr* e, expr_ref& result) {
bv_util bvu(m());
rational n_val;
unsigned int n_size;
// specifically we want (_ BitVector 8)
if (bvu.is_bv(e) && bvu.is_numeral(e, n_val, n_size) && n_size == 8) {
// convert to string constant
zstring str(n_val.get_unsigned());
TRACE("seq_verbose", tout << "rewrite seq.unit of 8-bit value " << n_val.to_string() << " to string constant \"" << str<< "\"" << std::endl;);
result = m_util.str.mk_string(str);
return BR_DONE;
}
return BR_FAILED;
}
/*
string + string = string
a + (b + c) = (a + b) + c
a + "" = a
"" + a = a
(a + string) + string = a + string
*/
br_status seq_rewriter::mk_seq_concat(expr* a, expr* b, expr_ref& result) {
zstring s1, s2;
expr* c, *d;
bool isc1 = m_util.str.is_string(a, s1);
bool isc2 = m_util.str.is_string(b, s2);
if (isc1 && isc2) {
result = m_util.str.mk_string(s1 + s2);
return BR_DONE;
}
if (m_util.str.is_concat(a, c, d)) {
result = m_util.str.mk_concat(c, m_util.str.mk_concat(d, b));
return BR_REWRITE2;
}
if (m_util.str.is_empty(a)) {
result = b;
return BR_DONE;
}
if (m_util.str.is_empty(b)) {
result = a;
return BR_DONE;
}
// TBD concatenation is right-associative
if (isc2 && m_util.str.is_concat(a, c, d) && m_util.str.is_string(d, s1)) {
result = m_util.str.mk_concat(c, m_util.str.mk_string(s1 + s2));
return BR_DONE;
}
if (isc1 && m_util.str.is_concat(b, c, d) && m_util.str.is_string(c, s2)) {
result = m_util.str.mk_concat(m_util.str.mk_string(s1 + s2), d);
return BR_DONE;
}
return BR_FAILED;
}
br_status seq_rewriter::mk_seq_length(expr* a, expr_ref& result) {
zstring b;
m_es.reset();
m_util.str.get_concat(a, m_es);
unsigned len = 0;
unsigned j = 0;
for (unsigned i = 0; i < m_es.size(); ++i) {
if (m_util.str.is_string(m_es[i].get(), b)) {
len += b.length();
}
else if (m_util.str.is_unit(m_es[i].get())) {
len += 1;
}
else if (m_util.str.is_empty(m_es[i].get())) {
// skip
}
else {
m_es[j] = m_es[i].get();
++j;
}
}
if (j == 0) {
result = m_autil.mk_numeral(rational(len, rational::ui64()), true);
return BR_DONE;
}
if (j != m_es.size() || j != 1) {
expr_ref_vector es(m());
for (unsigned i = 0; i < j; ++i) {
es.push_back(m_util.str.mk_length(m_es[i].get()));
}
if (len != 0) {
es.push_back(m_autil.mk_numeral(rational(len, rational::ui64()), true));
}
result = m_autil.mk_add(es.size(), es.c_ptr());
return BR_REWRITE2;
}
return BR_FAILED;
}
br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& result) {
zstring s;
rational pos, len;
bool constantBase = m_util.str.is_string(a, s);
bool constantPos = m_autil.is_numeral(b, pos);
bool constantLen = m_autil.is_numeral(c, len);
// case 1: pos<0 or len<=0
// rewrite to ""
if ( (constantPos && pos.is_neg()) || (constantLen && !len.is_pos()) ) {
result = m_util.str.mk_empty(m().get_sort(a));
return BR_DONE;
}
// case 1.1: pos >= length(base)
// rewrite to ""
if (constantBase && constantPos && pos >= rational(s.length())) {
result = m_util.str.mk_empty(m().get_sort(a));
return BR_DONE;
}
constantPos &= pos.is_unsigned();
constantLen &= len.is_unsigned();
if (constantBase && constantPos && constantLen) {
if (pos.get_unsigned() + len.get_unsigned() >= s.length()) {
// case 2: pos+len goes past the end of the string
unsigned _len = s.length() - pos.get_unsigned() + 1;
result = m_util.str.mk_string(s.extract(pos.get_unsigned(), _len));
} else {
// case 3: pos+len still within string
result = m_util.str.mk_string(s.extract(pos.get_unsigned(), len.get_unsigned()));
}
return BR_DONE;
}
if (constantPos && constantLen) {
unsigned _pos = pos.get_unsigned();
unsigned _len = len.get_unsigned();
SASSERT(_len > 0);
expr_ref_vector as(m()), bs(m());
m_util.str.get_concat(a, as);
for (unsigned i = 0; i < as.size() && _len > 0; ++i) {
if (m_util.str.is_unit(as[i].get())) {
if (_pos == 0) {
bs.push_back(as[i].get());
--_len;
}
else {
--_pos;
}
}
else {
return BR_FAILED;
}
}
result = m_util.str.mk_concat(bs);
return BR_DONE;
}
return BR_FAILED;
}
bool seq_rewriter::cannot_contain_suffix(expr* a, expr* b) {
if (m_util.str.is_unit(a) && m_util.str.is_unit(b) && m().are_distinct(a, b)) {
return true;
}
zstring A, B;
if (m_util.str.is_string(a, A) && m_util.str.is_string(b, B)) {
// some prefix of a is a suffix of b
bool found = false;
for (unsigned i = 1; !found && i <= A.length(); ++i) {
found = A.extract(0, i).suffixof(B);
}
return !found;
}
return false;
}
bool seq_rewriter::cannot_contain_prefix(expr* a, expr* b) {
if (m_util.str.is_unit(a) && m_util.str.is_unit(b) && m().are_distinct(a, b)) {
return true;
}
zstring A, B;
if (m_util.str.is_string(a, A) && m_util.str.is_string(b, B)) {
// some suffix of a is a prefix of b
bool found = false;
for (unsigned i = 0; !found && i < A.length(); ++i) {
found = A.extract(i, A.length()-i).suffixof(B);
}
return !found;
}
return false;
}
br_status seq_rewriter::mk_seq_contains(expr* a, expr* b, expr_ref& result) {
zstring c, d;
if (m_util.str.is_string(a, c) && m_util.str.is_string(b, d)) {
result = m().mk_bool_val(c.contains(d));
return BR_DONE;
}
// check if subsequence of a is in b.
expr_ref_vector as(m()), bs(m());
m_util.str.get_concat(a, as);
m_util.str.get_concat(b, bs);
bool all_values = true;
TRACE("seq", tout << mk_pp(a, m()) << " contains " << mk_pp(b, m()) << "\n";);
if (bs.empty()) {
result = m().mk_true();
return BR_DONE;
}
for (unsigned i = 0; all_values && i < bs.size(); ++i) {
all_values = m().is_value(bs[i].get());
}
bool found = false;
for (unsigned i = 0; !found && i < as.size(); ++i) {
all_values &= m().is_value(as[i].get());
if (bs.size() <= as.size() - i) {
unsigned j = 0;
for (; j < bs.size() && as[j+i].get() == bs[j].get(); ++j) {};
found = j == bs.size();
}
}
if (found) {
result = m().mk_true();
return BR_DONE;
}
if (all_values) {
result = m().mk_false();
return BR_DONE;
}
unsigned lenA = 0, lenB = 0;
bool lA = min_length(as.size(), as.c_ptr(), lenA);
if (lA) {
min_length(bs.size(), bs.c_ptr(), lenB);
if (lenB > lenA) {
result = m().mk_false();
return BR_DONE;
}
}
if (as.empty()) {
result = m().mk_eq(b, m_util.str.mk_empty(m().get_sort(b)));
return BR_REWRITE2;
}
if (bs.size() == 1 && m_util.str.is_string(bs[0].get(), c)) {
for (auto a_i : as) {
if (m_util.str.is_string(a_i, d) && d.contains(c)) {
result = m().mk_true();
return BR_DONE;
}
}
}
unsigned offs = 0;
unsigned sz = as.size();
expr* b0 = bs[0].get();
expr* bL = bs[bs.size()-1].get();
for (; offs < as.size() && cannot_contain_prefix(as[offs].get(), b0); ++offs) {}
for (; sz > offs && cannot_contain_suffix(as[sz-1].get(), bL); --sz) {}
if (offs == sz) {
result = m().mk_eq(b, m_util.str.mk_empty(m().get_sort(b)));
return BR_REWRITE2;
}
if (offs > 0 || sz < as.size()) {
SASSERT(sz > offs);
result = m_util.str.mk_contains(m_util.str.mk_concat(sz-offs, as.c_ptr()+offs), b);
return BR_REWRITE2;
}
expr* x, *y, *z;
if (m_util.str.is_extract(b, x, y, z) && x == a) {
result = m().mk_true();
return BR_DONE;
}
bool all_units = true;
for (unsigned i = 0; i < bs.size(); ++i) {
all_units = m_util.str.is_unit(bs[i].get());
}
for (unsigned i = 0; i < as.size(); ++i) {
all_units = m_util.str.is_unit(as[i].get());
}
if (all_units) {
if (as.size() < bs.size()) {
result = m().mk_false();
return BR_DONE;
}
expr_ref_vector ors(m());
for (unsigned i = 0; i < as.size() - bs.size() + 1; ++i) {
expr_ref_vector ands(m());
for (unsigned j = 0; j < bs.size(); ++j) {
ands.push_back(m().mk_eq(as[i + j].get(), bs[j].get()));
}
ors.push_back(::mk_and(ands));
}
result = ::mk_or(ors);
return BR_REWRITE_FULL;
}
return BR_FAILED;
}
/*
* (str.at s i), constants s/i, i < 0 or i >= |s| ==> (str.at s i) = ""
*/
br_status seq_rewriter::mk_seq_at(expr* a, expr* b, expr_ref& result) {
zstring c;
rational r;
if (m_autil.is_numeral(b, r)) {
if (r.is_neg()) {
result = m_util.str.mk_empty(m().get_sort(a));
return BR_DONE;
}
unsigned len = 0;
bool bounded = min_length(1, &a, len);
if (bounded && r >= rational(len)) {
result = m_util.str.mk_empty(m().get_sort(a));
return BR_DONE;
}
if (m_util.str.is_string(a, c)) {
if (r.is_unsigned() && r < rational(c.length())) {
result = m_util.str.mk_string(c.extract(r.get_unsigned(), 1));
}
else {
result = m_util.str.mk_empty(m().get_sort(a));
}
return BR_DONE;
}
if (r.is_unsigned()) {
len = r.get_unsigned();
expr_ref_vector as(m());
m_util.str.get_concat(a, as);
for (unsigned i = 0; i < as.size(); ++i) {
if (m_util.str.is_unit(as[i].get())) {
if (len == 0) {
result = as[i].get();
return BR_DONE;
}
--len;
}
else {
return BR_FAILED;
}
}
}
}
return BR_FAILED;
}
br_status seq_rewriter::mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result) {
zstring s1, s2;
rational r;
bool isc1 = m_util.str.is_string(a, s1);
bool isc2 = m_util.str.is_string(b, s2);
if (isc1 && isc2 && m_autil.is_numeral(c, r) && r.is_unsigned()) {
int idx = s1.indexof(s2, r.get_unsigned());
result = m_autil.mk_numeral(rational(idx), true);
return BR_DONE;
}
if (m_autil.is_numeral(c, r) && r.is_neg()) {
result = m_autil.mk_numeral(rational(-1), true);
return BR_DONE;
}
if (m_util.str.is_empty(b) && m_autil.is_numeral(c, r) && r.is_zero()) {
result = c;
return BR_DONE;
}
// Enhancement: walk segments of a, determine which segments must overlap, must not overlap, may overlap.
return BR_FAILED;
}
// (str.replace s t t') is the string obtained by replacing the first occurrence
// of t in s, if any, by t'. Note that if t is empty, the result is to prepend
// t' to s; also, if t does not occur in s then the result is s.
br_status seq_rewriter::mk_seq_replace(expr* a, expr* b, expr* c, expr_ref& result) {
zstring s1, s2, s3;
if (m_util.str.is_string(a, s1) && m_util.str.is_string(b, s2) &&
m_util.str.is_string(c, s3)) {
result = m_util.str.mk_string(s1.replace(s2, s3));
return BR_DONE;
}
if (b == c) {
result = a;
return BR_DONE;
}
if (m_util.str.is_empty(b)) {
result = m_util.str.mk_concat(c, a);
return BR_REWRITE1;
}
return BR_FAILED;
}
br_status seq_rewriter::mk_seq_prefix(expr* a, expr* b, expr_ref& result) {
TRACE("seq", tout << mk_pp(a, m()) << " " << mk_pp(b, m()) << "\n";);
zstring s1, s2;
bool isc1 = m_util.str.is_string(a, s1);
bool isc2 = m_util.str.is_string(b, s2);
if (isc1 && isc2) {
result = m().mk_bool_val(s1.prefixof(s2));
TRACE("seq", tout << result << "\n";);
return BR_DONE;
}
if (m_util.str.is_empty(a)) {
result = m().mk_true();
return BR_DONE;
}
expr* a1 = m_util.str.get_leftmost_concat(a);
expr* b1 = m_util.str.get_leftmost_concat(b);
isc1 = m_util.str.is_string(a1, s1);
isc2 = m_util.str.is_string(b1, s2);
expr_ref_vector as(m()), bs(m());
if (a1 != b1 && isc1 && isc2) {
if (s1.length() <= s2.length()) {
if (s1.prefixof(s2)) {
if (a == a1) {
result = m().mk_true();
TRACE("seq", tout << s1 << " " << s2 << " " << result << "\n";);
return BR_DONE;
}
m_util.str.get_concat(a, as);
m_util.str.get_concat(b, bs);
SASSERT(as.size() > 1);
s2 = s2.extract(s1.length(), s2.length()-s1.length());
bs[0] = m_util.str.mk_string(s2);
result = m_util.str.mk_prefix(m_util.str.mk_concat(as.size()-1, as.c_ptr()+1),
m_util.str.mk_concat(bs.size(), bs.c_ptr()));
TRACE("seq", tout << s1 << " " << s2 << " " << result << "\n";);
return BR_REWRITE_FULL;
}
else {
result = m().mk_false();
TRACE("seq", tout << s1 << " " << s2 << " " << result << "\n";);
return BR_DONE;
}
}
else {
if (s2.prefixof(s1)) {
if (b == b1) {
result = m().mk_false();
TRACE("seq", tout << s1 << " " << s2 << " " << result << "\n";);
return BR_DONE;
}
m_util.str.get_concat(a, as);
m_util.str.get_concat(b, bs);
SASSERT(bs.size() > 1);
s1 = s1.extract(s2.length(), s1.length() - s2.length());
as[0] = m_util.str.mk_string(s1);
result = m_util.str.mk_prefix(m_util.str.mk_concat(as.size(), as.c_ptr()),
m_util.str.mk_concat(bs.size()-1, bs.c_ptr()+1));
TRACE("seq", tout << s1 << " " << s2 << " " << result << "\n";);
return BR_REWRITE_FULL;
}
else {
result = m().mk_false();
TRACE("seq", tout << s1 << " " << s2 << " " << result << "\n";);
return BR_DONE;
}
}
}
m_util.str.get_concat(a, as);
m_util.str.get_concat(b, bs);
unsigned i = 0;
expr_ref_vector eqs(m());
for (; i < as.size() && i < bs.size(); ++i) {
expr* a = as[i].get(), *b = bs[i].get();
if (a == b) {
continue;
}
if (m_util.str.is_unit(a) && m_util.str.is_unit(b)) {
eqs.push_back(m().mk_eq(a, b));
continue;
}
if (m().is_value(a) && m().is_value(b) && m_util.str.is_string(a) && m_util.str.is_string(b)) {
TRACE("seq", tout << mk_pp(a, m()) << " != " << mk_pp(b, m()) << "\n";);
result = m().mk_false();
return BR_DONE;
}
break;
}
if (i == as.size()) {
result = mk_and(eqs);
TRACE("seq", tout << result << "\n";);
return BR_REWRITE3;
}
SASSERT(i < as.size());
if (i == bs.size()) {
for (unsigned j = i; j < as.size(); ++j) {
eqs.push_back(m().mk_eq(m_util.str.mk_empty(m().get_sort(a)), as[j].get()));
}
result = mk_and(eqs);
TRACE("seq", tout << result << "\n";);
return BR_REWRITE3;
}
if (i > 0) {
SASSERT(i < as.size() && i < bs.size());
a = m_util.str.mk_concat(as.size() - i, as.c_ptr() + i);
b = m_util.str.mk_concat(bs.size() - i, bs.c_ptr() + i);
result = m_util.str.mk_prefix(a, b);
TRACE("seq", tout << result << "\n";);
return BR_DONE;
}
else {
return BR_FAILED;
}
}
br_status seq_rewriter::mk_seq_suffix(expr* a, expr* b, expr_ref& result) {
if (a == b) {
result = m().mk_true();
return BR_DONE;
}
zstring s1, s2;
if (m_util.str.is_empty(a)) {
result = m().mk_true();
return BR_DONE;
}
if (m_util.str.is_empty(b)) {
result = m().mk_eq(m_util.str.mk_empty(m().get_sort(a)), a);
return BR_REWRITE3;
}
bool isc1 = false;
bool isc2 = false;
expr *a1 = nullptr, *a2 = nullptr, *b1 = nullptr, *b2 = nullptr;
if (m_util.str.is_concat(a, a1, a2) && m_util.str.is_string(a2, s1)) {
isc1 = true;
}
else if (m_util.str.is_string(a, s1)) {
isc1 = true;
a2 = a;
a1 = nullptr;
}
if (m_util.str.is_concat(b, b1, b2) && m_util.str.is_string(b2, s2)) {
isc2 = true;
}
else if (m_util.str.is_string(b, s2)) {
isc2 = true;
b2 = b;
b1 = nullptr;
}
if (isc1 && isc2) {
if (s1.length() == s2.length()) {
//SASSERT(s1 != s2);
result = m().mk_false();
return BR_DONE;
}
else if (s1.length() < s2.length()) {
bool suffix = s1.suffixof(s2);
if (suffix && a1 == nullptr) {
result = m().mk_true();
return BR_DONE;
}
else if (suffix) {
s2 = s2.extract(0, s2.length()-s1.length());
b2 = m_util.str.mk_string(s2);
result = m_util.str.mk_suffix(a1, b1?m_util.str.mk_concat(b1, b2):b2);
return BR_DONE;
}
else {
result = m().mk_false();
return BR_DONE;
}
}
else {
SASSERT(s1.length() > s2.length());
if (b1 == nullptr) {
result = m().mk_false();
return BR_DONE;
}
bool suffix = s2.suffixof(s1);
if (suffix) {
s1 = s1.extract(0, s1.length()-s2.length());
a2 = m_util.str.mk_string(s1);
result = m_util.str.mk_suffix(a1?m_util.str.mk_concat(a1, a2):a2, b1);
return BR_DONE;
}
else {
result = m().mk_false();
return BR_DONE;
}
}
}
expr_ref_vector as(m()), bs(m());
m_util.str.get_concat(a, as);
m_util.str.get_concat(b, bs);
bool change = false;
while (as.size() > 0 && bs.size() > 0 && as.back() == bs.back()) {
as.pop_back();
bs.pop_back();
change = true;
}
if (as.size() > 0 && bs.size() > 0 && m().is_value(as.back()) && m().is_value(bs.back())) {
result = m().mk_false();
return BR_DONE;
}
if (change) {
// suffix("", bs) <- true
if (as.empty()) {
result = m().mk_true();
return BR_DONE;
}
// suffix(as, "") iff as = ""
if (bs.empty()) {
for (unsigned j = 0; j < as.size(); ++j) {
bs.push_back(m().mk_eq(m_util.str.mk_empty(m().get_sort(a)), as[j].get()));
}
result = mk_and(bs);
return BR_REWRITE3;
}
result = m_util.str.mk_suffix(m_util.str.mk_concat(as.size(), as.c_ptr()),
m_util.str.mk_concat(bs.size(), bs.c_ptr()));
return BR_DONE;
}
return BR_FAILED;
}
br_status seq_rewriter::mk_str_itos(expr* a, expr_ref& result) {
rational r;
if (m_autil.is_numeral(a, r)) {
if (r.is_int() && !r.is_neg()) {
result = m_util.str.mk_string(symbol(r.to_string().c_str()));
}
else {
result = m_util.str.mk_string(symbol(""));
}
return BR_DONE;
}
return BR_FAILED;
}
/**
\brief rewrite str.to.int according to the rules:
- if the expression is a string which is a non-empty
sequence of digits 0-9 extract the corresponding numeral.
- if the expression is a string that contains any other character
or is empty, produce -1
- if the expression is int.to.str(x) produce
ite(x >= 0, x, -1)
*/
br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) {
zstring str;
if (m_util.str.is_string(a, str)) {
std::string s = str.encode();
if (s.length() == 0) {
result = m_autil.mk_int(-1);
return BR_DONE;
}
for (unsigned i = 0; i < s.length(); ++i) {
if (!('0' <= s[i] && s[i] <= '9')) {
result = m_autil.mk_int(-1);
return BR_DONE;
}
}
rational r(s.c_str());
result = m_autil.mk_numeral(r, true);
return BR_DONE;
}
expr* b;
if (m_util.str.is_itos(a, b)) {
result = m().mk_ite(m_autil.mk_ge(b, m_autil.mk_int(0)), b, m_autil.mk_int(-1));
return BR_DONE;
}
return BR_FAILED;
}
void seq_rewriter::add_next(u_map<expr*>& next, expr_ref_vector& trail, unsigned idx, expr* cond) {
expr* acc;
if (!m().is_true(cond) && next.find(idx, acc)) {
expr* args[2] = { cond, acc };
cond = mk_or(m(), 2, args);
}
trail.push_back(cond);
next.insert(idx, cond);
}
bool seq_rewriter::is_sequence(eautomaton& aut, expr_ref_vector& seq) {
seq.reset();
unsigned state = aut.init();
uint_set visited;
eautomaton::moves mvs;
unsigned_vector states;
aut.get_epsilon_closure(state, states);
bool has_final = false;
for (unsigned i = 0; !has_final && i < states.size(); ++i) {
has_final = aut.is_final_state(states[i]);
}
aut.get_moves_from(state, mvs, true);
while (!has_final) {
if (mvs.size() != 1) {
return false;
}
if (visited.contains(state)) {
return false;
}
if (aut.is_final_state(mvs[0].src())) {
return false;
}
visited.insert(state);
sym_expr* t = mvs[0].t();
if (!t || !t->is_char()) {
return false;
}
seq.push_back(m_util.str.mk_unit(t->get_char()));
state = mvs[0].dst();
mvs.reset();
aut.get_moves_from(state, mvs, true);
states.reset();
has_final = false;
aut.get_epsilon_closure(state, states);
for (unsigned i = 0; !has_final && i < states.size(); ++i) {
has_final = aut.is_final_state(states[i]);
}
}
return mvs.empty();
}
bool seq_rewriter::is_sequence(expr* e, expr_ref_vector& seq) {
seq.reset();
zstring s;
ptr_vector<expr> todo;
expr *e1, *e2;
todo.push_back(e);
while (!todo.empty()) {
e = todo.back();
todo.pop_back();
if (m_util.str.is_string(e, s)) {
for (unsigned i = 0; i < s.length(); ++i) {
seq.push_back(m_util.str.mk_char(s, i));
}
}
else if (m_util.str.is_empty(e)) {
continue;
}
else if (m_util.str.is_unit(e, e1)) {
seq.push_back(e1);
}
else if (m_util.str.is_concat(e, e1, e2)) {
todo.push_back(e2);
todo.push_back(e1);
}
else {
return false;
}
}
return true;
}
br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
if (m_util.re.is_empty(b)) {
result = m().mk_false();
return BR_DONE;
}
if (m_util.re.is_full_seq(b)) {
result = m().mk_true();
return BR_DONE;
}
scoped_ptr<eautomaton> aut;
expr_ref_vector seq(m());
if (!(aut = m_re2aut(b))) {
return BR_FAILED;
}
if (is_sequence(*aut, seq)) {
TRACE("seq", tout << seq << "\n";);
if (seq.empty()) {
result = m().mk_eq(a, m_util.str.mk_empty(m().get_sort(a)));
}
else {
result = m().mk_eq(a, m_util.str.mk_concat(seq));
}
return BR_REWRITE_FULL;
}
if (!is_sequence(a, seq)) {
return BR_FAILED;
}
expr_ref_vector trail(m());
u_map<expr*> maps[2];
bool select_map = false;
expr_ref ch(m()), cond(m());
eautomaton::moves mvs;
maps[0].insert(aut->init(), m().mk_true());
// is_accepted(a, aut) & some state in frontier is final.
for (unsigned i = 0; i < seq.size(); ++i) {
u_map<expr*>& frontier = maps[select_map];
u_map<expr*>& next = maps[!select_map];
select_map = !select_map;
ch = seq[i].get();
next.reset();
u_map<expr*>::iterator it = frontier.begin(), end = frontier.end();
for (; it != end; ++it) {
mvs.reset();
unsigned state = it->m_key;
expr* acc = it->m_value;
aut->get_moves_from(state, mvs, false);
for (unsigned j = 0; j < mvs.size(); ++j) {
eautomaton::move const& mv = mvs[j];
SASSERT(mv.t());
if (mv.t()->is_char() && m().is_value(mv.t()->get_char()) && m().is_value(ch)) {
if (mv.t()->get_char() == ch) {
add_next(next, trail, mv.dst(), acc);
}
else {
continue;
}
}
else {
cond = mv.t()->accept(ch);
if (m().is_false(cond)) {
continue;
}
if (m().is_true(cond)) {
add_next(next, trail, mv.dst(), acc);
continue;
}
expr* args[2] = { cond, acc };
cond = mk_and(m(), 2, args);
add_next(next, trail, mv.dst(), cond);
}
}
}
}
u_map<expr*> const& frontier = maps[select_map];
u_map<expr*>::iterator it = frontier.begin(), end = frontier.end();
expr_ref_vector ors(m());
for (; it != end; ++it) {
unsigned_vector states;
bool has_final = false;
aut->get_epsilon_closure(it->m_key, states);
for (unsigned i = 0; i < states.size() && !has_final; ++i) {
has_final = aut->is_final_state(states[i]);
}
if (has_final) {
ors.push_back(it->m_value);
}
}
result = mk_or(ors);
TRACE("seq", tout << result << "\n";);
return BR_REWRITE_FULL;
}
br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) {
return BR_FAILED;
}
br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
if (m_util.re.is_full_seq(a) && m_util.re.is_full_seq(b)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_empty(a)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_empty(b)) {
result = b;
return BR_DONE;
}
if (is_epsilon(a)) {
result = b;
return BR_DONE;
}
if (is_epsilon(b)) {
result = a;
return BR_DONE;
}
return BR_FAILED;
}
/*
(a + a) = a
(a + eps) = a
(eps + a) = a
*/
br_status seq_rewriter::mk_re_union(expr* a, expr* b, expr_ref& result) {
if (a == b) {
result = a;
return BR_DONE;
}
if (m_util.re.is_empty(a)) {
result = b;
return BR_DONE;
}
if (m_util.re.is_empty(b)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_full_seq(a)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_full_seq(b)) {
result = b;
return BR_DONE;
}
if (m_util.re.is_star(a) && is_epsilon(b)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_star(b) && is_epsilon(a)) {
result = b;
return BR_DONE;
}
return BR_FAILED;
}
br_status seq_rewriter::mk_re_complement(expr* a, expr_ref& result) {
expr* e1, *e2;
if (m_util.re.is_intersection(a, e1, e2)) {
result = m_util.re.mk_union(m_util.re.mk_complement(e1), m_util.re.mk_complement(e2));
return BR_REWRITE2;
}
if (m_util.re.is_union(a, e1, e2)) {
result = m_util.re.mk_inter(m_util.re.mk_complement(e1), m_util.re.mk_complement(e2));
return BR_REWRITE2;
}
if (m_util.re.is_empty(a)) {
result = m_util.re.mk_full_seq(m().get_sort(a));
return BR_DONE;
}
if (m_util.re.is_full_seq(a)) {
result = m_util.re.mk_empty(m().get_sort(a));
return BR_DONE;
}
return BR_FAILED;
}
/**
(emp n r) = emp
(r n emp) = emp
(all n r) = r
(r n all) = r
(r n r) = r
*/
br_status seq_rewriter::mk_re_inter(expr* a, expr* b, expr_ref& result) {
if (a == b) {
result = a;
return BR_DONE;
}
if (m_util.re.is_empty(a)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_empty(b)) {
result = b;
return BR_DONE;
}
if (m_util.re.is_full_seq(a)) {
result = b;
return BR_DONE;
}
if (m_util.re.is_full_seq(b)) {
result = a;
return BR_DONE;
}
return BR_FAILED;
}
br_status seq_rewriter::mk_re_loop(unsigned num_args, expr* const* args, expr_ref& result) {
rational n1, n2;
switch (num_args) {
case 1:
break;
case 2:
if (m_autil.is_numeral(args[1], n1) && n1.is_unsigned()) {
result = m_util.re.mk_loop(args[0], n1.get_unsigned());
return BR_DONE;
}
break;
case 3:
if (m_autil.is_numeral(args[1], n1) && n1.is_unsigned() &&
m_autil.is_numeral(args[2], n2) && n2.is_unsigned()) {
result = m_util.re.mk_loop(args[0], n1.get_unsigned(), n2.get_unsigned());
return BR_DONE;
}
break;
default:
break;
}
return BR_FAILED;
}
/*
a** = a*
(a* + b)* = (a + b)*
(a + b*)* = (a + b)*
(a*b*)* = (a + b)*
a+* = a*
emp* = ""
all* = all
*/
br_status seq_rewriter::mk_re_star(expr* a, expr_ref& result) {
expr* b, *c, *b1, *c1;
if (m_util.re.is_star(a) || m_util.re.is_full_seq(a)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_full_char(a)) {
result = m_util.re.mk_full_seq(m().get_sort(a));
return BR_DONE;
}
if (m_util.re.is_empty(a)) {
sort* seq_sort = nullptr;
VERIFY(m_util.is_re(a, seq_sort));
result = m_util.re.mk_to_re(m_util.str.mk_empty(seq_sort));
return BR_DONE;
}
if (m_util.re.is_plus(a, b)) {
result = m_util.re.mk_star(b);
return BR_DONE;
}
if (m_util.re.is_union(a, b, c)) {
if (m_util.re.is_star(b, b1)) {
result = m_util.re.mk_star(m_util.re.mk_union(b1, c));
return BR_REWRITE2;
}
if (m_util.re.is_star(c, c1)) {
result = m_util.re.mk_star(m_util.re.mk_union(b, c1));
return BR_REWRITE2;
}
if (is_epsilon(b)) {
result = m_util.re.mk_star(c);
return BR_REWRITE2;
}
if (is_epsilon(c)) {
result = m_util.re.mk_star(b);
return BR_REWRITE2;
}
}
if (m_util.re.is_concat(a, b, c) &&
m_util.re.is_star(b, b1) && m_util.re.is_star(c, c1)) {
result = m_util.re.mk_star(m_util.re.mk_union(b1, c1));
return BR_REWRITE2;
}
return BR_FAILED;
}
/*
* (re.range c_1 c_n)
*/
br_status seq_rewriter::mk_re_range(expr* lo, expr* hi, expr_ref& result) {
return BR_FAILED;
}
/*
emp+ = emp
all+ = all
a*+ = a*
a++ = a+
a+ = aa*
*/
br_status seq_rewriter::mk_re_plus(expr* a, expr_ref& result) {
if (m_util.re.is_empty(a)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_full_seq(a)) {
result = a;
return BR_DONE;
}
if (is_epsilon(a)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_plus(a)) {
result = a;
return BR_DONE;
}
if (m_util.re.is_star(a)) {
result = a;
return BR_DONE;
}
//return BR_FAILED;
result = m_util.re.mk_concat(a, m_util.re.mk_star(a));
return BR_REWRITE2;
}
br_status seq_rewriter::mk_re_opt(expr* a, expr_ref& result) {
sort* s = nullptr;
VERIFY(m_util.is_re(a, s));
result = m_util.re.mk_union(m_util.re.mk_to_re(m_util.str.mk_empty(s)), a);
return BR_REWRITE1;
}
br_status seq_rewriter::mk_eq_core(expr * l, expr * r, expr_ref & result) {
TRACE("seq", tout << mk_pp(l, m()) << " = " << mk_pp(r, m()) << "\n";);
expr_ref_vector lhs(m()), rhs(m()), res(m());
bool changed = false;
if (!reduce_eq(l, r, lhs, rhs, changed)) {
result = m().mk_false();
return BR_DONE;
}
if (!changed) {
return BR_FAILED;
}
for (unsigned i = 0; i < lhs.size(); ++i) {
res.push_back(m().mk_eq(lhs[i].get(), rhs[i].get()));
}
result = mk_and(res);
return BR_REWRITE3;
}
bool seq_rewriter::reduce_eq(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_vector& lhs, expr_ref_vector& rhs, bool& change) {
expr* a, *b;
zstring s;
bool lchange = false;
SASSERT(lhs.empty());
TRACE("seq", tout << ls << "\n"; tout << rs << "\n";);
// solve from back
while (true) {
while (!rs.empty() && m_util.str.is_empty(rs.back())) {
rs.pop_back();
}
while (!ls.empty() && m_util.str.is_empty(ls.back())) {
ls.pop_back();
}
if (ls.empty() || rs.empty()) {
break;
}
expr* l = ls.back();
expr* r = rs.back();
if (m_util.str.is_unit(r) && m_util.str.is_string(l)) {
std::swap(l, r);
ls.swap(rs);
}
if (l == r) {
ls.pop_back();
rs.pop_back();
}
else if(m_util.str.is_unit(l, a) &&
m_util.str.is_unit(r, b)) {
if (m().are_distinct(a, b)) {
return false;
}
lhs.push_back(a);
rhs.push_back(b);
ls.pop_back();
rs.pop_back();
}
else if (m_util.str.is_unit(l, a) && m_util.str.is_string(r, s)) {
SASSERT(s.length() > 0);
app* ch = m_util.str.mk_char(s, s.length()-1);
SASSERT(m().get_sort(ch) == m().get_sort(a));
lhs.push_back(ch);
rhs.push_back(a);
ls.pop_back();
if (s.length() == 1) {
rs.pop_back();
}
else {
expr_ref s2(m_util.str.mk_string(s.extract(0, s.length()-1)), m());
rs[rs.size()-1] = s2;
}
}
else {
break;
}
change = true;
lchange = true;
}
// solve from front
unsigned head1 = 0, head2 = 0;
while (true) {
while (head1 < ls.size() && m_util.str.is_empty(ls[head1].get())) {
++head1;
}
while (head2 < rs.size() && m_util.str.is_empty(rs[head2].get())) {
++head2;
}
if (head1 == ls.size() || head2 == rs.size()) {
break;
}
SASSERT(head1 < ls.size() && head2 < rs.size());
expr* l = ls[head1].get();
expr* r = rs[head2].get();
if (m_util.str.is_unit(r) && m_util.str.is_string(l)) {
std::swap(l, r);
ls.swap(rs);
std::swap(head1, head2);
}
if (l == r) {
++head1;
++head2;
}
else if(m_util.str.is_unit(l, a) &&
m_util.str.is_unit(r, b)) {
if (m().are_distinct(a, b)) {
return false;
}
lhs.push_back(a);
rhs.push_back(b);
++head1;
++head2;
}
else if (m_util.str.is_unit(l, a) && m_util.str.is_string(r, s)) {
SASSERT(s.length() > 0);
app* ch = m_util.str.mk_char(s, 0);
SASSERT(m().get_sort(ch) == m().get_sort(a));
lhs.push_back(ch);
rhs.push_back(a);
++head1;
if (s.length() == 1) {
++head2;
}
else {
expr_ref s2(m_util.str.mk_string(s.extract(1, s.length()-1)), m());
rs[head2] = s2;
}
}
else {
break;
}
TRACE("seq", tout << ls << " == " << rs << "\n";);
change = true;
lchange = true;
}
// reduce strings
zstring s1, s2;
while (head1 < ls.size() &&
head2 < rs.size() &&
m_util.str.is_string(ls[head1].get(), s1) &&
m_util.str.is_string(rs[head2].get(), s2)) {
TRACE("seq", tout << s1 << " - " << s2 << " " << s1.length() << " " << s2.length() << "\n";);
unsigned l = std::min(s1.length(), s2.length());
for (unsigned i = 0; i < l; ++i) {
if (s1[i] != s2[i]) {
TRACE("seq", tout << "different at position " << i << " " << s1[i] << " " << s2[i] << "\n";);
return false;
}
}
if (l == s1.length()) {
++head1;
}
else {
ls[head1] = m_util.str.mk_string(s1.extract(l, s1.length()-l));
}
if (l == s2.length()) {
++head2;
}
else {
rs[head2] = m_util.str.mk_string(s2.extract(l, s2.length()-l));
}
TRACE("seq", tout << "change string\n";);
change = true;
lchange = true;
}
while (head1 < ls.size() &&
head2 < rs.size() &&
m_util.str.is_string(ls.back(), s1) &&
m_util.str.is_string(rs.back(), s2)) {
unsigned l = std::min(s1.length(), s2.length());
for (unsigned i = 0; i < l; ++i) {
if (s1[s1.length()-i-1] != s2[s2.length()-i-1]) {
return false;
}
}
ls.pop_back();
rs.pop_back();
if (l < s1.length()) {
ls.push_back(m_util.str.mk_string(s1.extract(0, s1.length()-l)));
}
if (l < s2.length()) {
rs.push_back(m_util.str.mk_string(s2.extract(0, s2.length()-l)));
}
TRACE("seq", tout << "change string back\n";);
change = true;
lchange = true;
}
bool is_sat = true;
unsigned szl = ls.size() - head1, szr = rs.size() - head2;
expr* const* _ls = ls.c_ptr() + head1, * const* _rs = rs.c_ptr() + head2;
if (solve_itos(szl, _ls, szr, _rs, lhs, rhs, is_sat)) {
ls.reset(); rs.reset();
change = true;
return is_sat;
}
if (length_constrained(szl, _ls, szr, _rs, lhs, rhs, is_sat)) {
ls.reset(); rs.reset();
change = true;
return is_sat;
}
if (szr == 0 && szl == 0) {
ls.reset();
rs.reset();
return true;
}
if (szr == 0 && szl > 0) {
std::swap(szr, szl);
std::swap(_ls, _rs);
}
if (szl == 0 && szr > 0) {
if (set_empty(szr, _rs, true, lhs, rhs)) {
lchange |= szr > 1;
change |= szr > 1;
if (szr == 1 && !lchange) {
lhs.reset();
rhs.reset();
}
else {
ls.reset();
rs.reset();
}
return true;
}
else {
return false;
}
}
SASSERT(szl > 0 && szr > 0);
if (is_subsequence(szl, _ls, szr, _rs, lhs, rhs, is_sat)) {
ls.reset(); rs.reset();
change = true;
return is_sat;
}
if (lchange) {
if (head1 > 0) {
for (unsigned i = 0; i < szl; ++i) {
ls[i] = ls[i + head1].get();
}
}
ls.shrink(szl);
if (head2 > 0) {
for (unsigned i = 0; i < szr; ++i) {
rs[i] = rs[i + head2].get();
}
}
rs.shrink(szr);
}
SASSERT(rs.empty() == ls.empty());
change |= lchange;
return true;
}
void seq_rewriter::add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_vector& lhs, expr_ref_vector& rhs) {
if (ls.empty() && !rs.empty()) {
rhs.push_back(m_util.str.mk_concat(rs));
lhs.push_back(m_util.str.mk_empty(m().get_sort(rhs.back())));
}
else if (rs.empty() && !ls.empty()) {
lhs.push_back(m_util.str.mk_concat(ls));
rhs.push_back(m_util.str.mk_empty(m().get_sort(lhs.back())));
}
else if (!rs.empty() && !ls.empty()) {
lhs.push_back(m_util.str.mk_concat(ls));
rhs.push_back(m_util.str.mk_concat(rs));
}
}
bool seq_rewriter::reduce_eq(expr* l, expr* r, expr_ref_vector& lhs, expr_ref_vector& rhs, bool& changed) {
m_lhs.reset();
m_rhs.reset();
m_util.str.get_concat(l, m_lhs);
m_util.str.get_concat(r, m_rhs);
bool change = false;
if (reduce_eq(m_lhs, m_rhs, lhs, rhs, change)) {
SASSERT(lhs.size() == rhs.size());
if (!change) {
lhs.push_back(l);
rhs.push_back(r);
}
else {
add_seqs(m_lhs, m_rhs, lhs, rhs);
}
changed |= change;
return true;
}
else {
TRACE("seq", tout << mk_pp(l, m()) << " != " << mk_pp(r, m()) << "\n";);
return false;
}
}
bool seq_rewriter::reduce_contains(expr* a, expr* b, expr_ref_vector& disj) {
m_lhs.reset();
m_util.str.get_concat(a, m_lhs);
TRACE("seq", tout << expr_ref(a, m()) << " " << expr_ref(b, m()) << "\n";);
zstring s;
for (unsigned i = 0; i < m_lhs.size(); ++i) {
expr* e = m_lhs.get(i);
if (m_util.str.is_empty(e)) {
continue;
}
if (m_util.str.is_string(e, s)) {
unsigned sz = s.length();
expr_ref_vector es(m());
for (unsigned j = 0; j < sz; ++j) {
es.push_back(m_util.str.mk_unit(m_util.str.mk_char(s, j)));
}
es.append(m_lhs.size() - i, m_lhs.c_ptr() + i);
for (unsigned j = 0; j < sz; ++j) {
disj.push_back(m_util.str.mk_prefix(b, m_util.str.mk_concat(es.size() - j, es.c_ptr() + j)));
}
continue;
}
if (m_util.str.is_unit(e)) {
disj.push_back(m_util.str.mk_prefix(b, m_util.str.mk_concat(m_lhs.size() - i, m_lhs.c_ptr() + i)));
continue;
}
if (m_util.str.is_string(b, s)) {
expr* all = m_util.re.mk_full_seq(m_util.re.mk_re(m().get_sort(b)));
disj.push_back(m_util.re.mk_in_re(m_util.str.mk_concat(m_lhs.size() - i, m_lhs.c_ptr() + i),
m_util.re.mk_concat(all, m_util.re.mk_concat(m_util.re.mk_to_re(b), all))));
return true;
}
if (i == 0) {
return false;
}
disj.push_back(m_util.str.mk_contains(m_util.str.mk_concat(m_lhs.size() - i, m_lhs.c_ptr() + i), b));
return true;
}
disj.push_back(m().mk_eq(b, m_util.str.mk_empty(m().get_sort(b))));
return true;
}
expr* seq_rewriter::concat_non_empty(unsigned n, expr* const* as) {
SASSERT(n > 0);
ptr_vector<expr> bs;
for (unsigned i = 0; i < n; ++i) {
if (m_util.str.is_unit(as[i]) ||
m_util.str.is_string(as[i])) {
bs.push_back(as[i]);
}
}
if (bs.empty()) {
return m_util.str.mk_empty(m().get_sort(as[0]));
}
else {
return m_util.str.mk_concat(bs.size(), bs.c_ptr());
}
}
bool seq_rewriter::set_empty(unsigned sz, expr* const* es, bool all, expr_ref_vector& lhs, expr_ref_vector& rhs) {
zstring s;
expr* emp = nullptr;
for (unsigned i = 0; i < sz; ++i) {
if (m_util.str.is_unit(es[i])) {
if (all) return false;
}
else if (m_util.str.is_empty(es[i])) {
continue;
}
else if (m_util.str.is_string(es[i], s)) {
if (all) {
SASSERT(s.length() > 0);
return false;
}
}
else {
emp = emp?emp:m_util.str.mk_empty(m().get_sort(es[i]));
lhs.push_back(emp);
rhs.push_back(es[i]);
}
}
return true;
}
bool seq_rewriter::min_length(unsigned n, expr* const* es, unsigned& len) {
zstring s;
bool bounded = true;
len = 0;
for (unsigned i = 0; i < n; ++i) {
if (m_util.str.is_unit(es[i])) {
++len;
}
else if (m_util.str.is_empty(es[i])) {
continue;
}
else if (m_util.str.is_string(es[i], s)) {
len += s.length();
}
else {
bounded = false;
}
}
return bounded;
}
bool seq_rewriter::is_string(unsigned n, expr* const* es, zstring& s) const {
zstring s1;
expr* e;
bv_util bv(m());
rational val;
unsigned sz;
for (unsigned i = 0; i < n; ++i) {
if (m_util.str.is_string(es[i], s1)) {
s = s + s1;
}
else if (m_util.str.is_unit(es[i], e) && bv.is_numeral(e, val, sz)) {
s = s + zstring(val.get_unsigned());
}
else {
return false;
}
}
return true;
}
bool seq_rewriter::solve_itos(unsigned szl, expr* const* ls, unsigned szr, expr* const* rs,
expr_ref_vector& lhs, expr_ref_vector& rhs, bool& is_sat) {
expr* l, *r;
is_sat = true;
if (szl == 1 && m_util.str.is_itos(ls[0], l)) {
if (szr == 1 && m_util.str.is_itos(rs[0], r)) {
lhs.push_back(l);
rhs.push_back(r);
return true;
}
zstring s;
if (is_string(szr, rs, s)) {
std::string s1 = s.encode();
rational r(s1.c_str());
if (s1 == r.to_string()) {
lhs.push_back(l);
rhs.push_back(m_autil.mk_numeral(r, true));
return true;
}
}
}
if (szr == 1 && szl >= 1 && m_util.str.is_itos(rs[0], r) && !m_util.str.is_itos(ls[0])) {
return solve_itos(szr, rs, szl, ls, rhs, lhs, is_sat);
}
return false;
}
bool seq_rewriter::length_constrained(unsigned szl, expr* const* l, unsigned szr, expr* const* r,
expr_ref_vector& lhs, expr_ref_vector& rhs, bool& is_sat) {
is_sat = true;
unsigned len1 = 0, len2 = 0;
bool bounded1 = min_length(szl, l, len1);
bool bounded2 = min_length(szr, r, len2);
if (bounded1 && len1 < len2) {
is_sat = false;
return true;
}
if (bounded2 && len2 < len1) {
is_sat = false;
return true;
}
if (bounded1 && len1 == len2 && len1 > 0) {
is_sat = set_empty(szr, r, false, lhs, rhs);
if (is_sat) {
lhs.push_back(concat_non_empty(szl, l));
rhs.push_back(concat_non_empty(szr, r));
//split_units(lhs, rhs);
}
return true;
}
if (bounded2 && len1 == len2 && len1 > 0) {
is_sat = set_empty(szl, l, false, lhs, rhs);
if (is_sat) {
lhs.push_back(concat_non_empty(szl, l));
rhs.push_back(concat_non_empty(szr, r));
//split_units(lhs, rhs);
}
return true;
}
return false;
}
void seq_rewriter::split_units(expr_ref_vector& lhs, expr_ref_vector& rhs) {
expr* a, *b, *a1, *b1, *a2, *b2;
while (true) {
if (m_util.str.is_unit(lhs.back(), a) &&
m_util.str.is_unit(rhs.back(), b)) {
lhs[lhs.size()-1] = a;
rhs[rhs.size()-1] = b;
break;
}
if (m_util.str.is_concat(lhs.back(), a, a2) &&
m_util.str.is_unit(a, a1) &&
m_util.str.is_concat(rhs.back(), b, b2) &&
m_util.str.is_unit(b, b1)) {
expr_ref _pin_a(lhs.back(), m()), _pin_b(rhs.back(), m());
lhs[lhs.size()-1] = a1;
rhs[rhs.size()-1] = b1;
lhs.push_back(a2);
rhs.push_back(b2);
}
else {
break;
}
}
}
bool seq_rewriter::is_epsilon(expr* e) const {
expr* e1;
return m_util.re.is_to_re(e, e1) && m_util.str.is_empty(e1);
}
bool seq_rewriter::is_subsequence(unsigned szl, expr* const* l, unsigned szr, expr* const* r,
expr_ref_vector& lhs, expr_ref_vector& rhs, bool& is_sat) {
is_sat = true;
if (szl == szr) return false;
if (szr < szl) {
std::swap(szl, szr);
std::swap(l, r);
}
uint_set rpos;
for (unsigned i = 0; i < szl; ++i) {
bool found = false;
unsigned j = 0;
bool is_unit = m_util.str.is_unit(l[i]);
for (; !found && j < szr; ++j) {
found = !rpos.contains(j) && (l[i] == r[j] || (is_unit && m_util.str.is_unit(r[j])));
}
if (!found) {
return false;
}
SASSERT(0 < j && j <= szr);
rpos.insert(j-1);
}
// if we reach here, then every element of l is contained in r in some position.
// or each non-unit in l is matched by a non-unit in r, and otherwise, the non-units match up.
bool change = false;
ptr_vector<expr> rs;
for (unsigned j = 0; j < szr; ++j) {
if (rpos.contains(j)) {
rs.push_back(r[j]);
}
else if (!set_empty(1, r + j, true, lhs, rhs)) {
is_sat = false;
return true;
}
else {
change = true;
}
}
if (!change) {
return false;
}
SASSERT(szl == rs.size());
if (szl > 0) {
lhs.push_back(m_util.str.mk_concat(szl, l));
rhs.push_back(m_util.str.mk_concat(szl, rs.c_ptr()));
}
return true;
}