diff --git a/src/ast/sls/sls_arith_base.cpp b/src/ast/sls/sls_arith_base.cpp index 0ede50519..4a599ad75 100644 --- a/src/ast/sls/sls_arith_base.cpp +++ b/src/ast/sls/sls_arith_base.cpp @@ -2111,7 +2111,7 @@ namespace sls { auto const& vi = m_vars[v]; if (vi.m_def_idx == UINT_MAX) return true; - verbose_stream() << " repair def " << mk_bounded_pp(vi.m_expr, m) << "\n"; + IF_VERBOSE(2, verbose_stream() << " repair def " << mk_bounded_pp(vi.m_expr, m) << "\n"); TRACE("sls", tout << "repair def " << mk_bounded_pp(vi.m_expr, m) << "\n"); switch (vi.m_op) { case arith_op_kind::LAST_ARITH_OP: diff --git a/src/ast/sls/sls_context.cpp b/src/ast/sls/sls_context.cpp index 051ae98a7..c55c2ac79 100644 --- a/src/ast/sls/sls_context.cpp +++ b/src/ast/sls/sls_context.cpp @@ -16,12 +16,13 @@ Author: --*/ #include "ast/sls/sls_context.h" -#include "ast/sls/sls_euf_plugin.h" #include "ast/sls/sls_arith_plugin.h" #include "ast/sls/sls_array_plugin.h" -#include "ast/sls/sls_bv_plugin.h" #include "ast/sls/sls_basic_plugin.h" +#include "ast/sls/sls_bv_plugin.h" +#include "ast/sls/sls_euf_plugin.h" #include "ast/sls/sls_datatype_plugin.h" +#include "ast/sls/sls_seq_plugin.h" #include "ast/ast_ll_pp.h" #include "ast/ast_pp.h" #include "smt/params/smt_params_helper.hpp" @@ -57,6 +58,8 @@ namespace sls { void context::ensure_plugin(family_id fid) { if (m_plugins.get(fid, nullptr)) return; + else if (fid == null_family_id) + ; else if (fid == arith_family_id) register_plugin(alloc(arith_plugin, *this)); else if (fid == user_sort_family_id) @@ -69,8 +72,8 @@ namespace sls { register_plugin(alloc(array_plugin, *this)); else if (fid == datatype_util(m).get_family_id()) register_plugin(alloc(datatype_plugin, *this)); - else if (fid == null_family_id) - ; + else if (fid == seq_util(m).get_family_id()) + register_plugin(alloc(seq_plugin, *this)); else verbose_stream() << "did not find plugin for " << fid << "\n"; } diff --git a/src/ast/sls/sls_seq_plugin.cpp b/src/ast/sls/sls_seq_plugin.cpp index aa47f61f9..ded8d10fb 100644 --- a/src/ast/sls/sls_seq_plugin.cpp +++ b/src/ast/sls/sls_seq_plugin.cpp @@ -17,19 +17,36 @@ Author: #include "ast/sls/sls_seq_plugin.h" #include "ast/sls/sls_context.h" +#include "ast/ast_pp.h" namespace sls { seq_plugin::seq_plugin(context& c): plugin(c), - seq(c.get_manager()) - {} + seq(c.get_manager()), + a(c.get_manager()) + { + m_fid = seq.get_family_id(); + } void seq_plugin::propagate_literal(sat::literal lit) { + SASSERT(ctx.is_true(lit)); + auto e = ctx.atom(lit.var()); + if (!is_seq_predicate(e)) + return; + auto a = to_app(e); + if (bval1(e) != lit.sign()) + return; + ctx.new_value_eh(e); } expr_ref seq_plugin::get_value(expr* e) { + if (seq.is_string(e->get_sort())) + return expr_ref(seq.str.mk_string(strval0(e)), m); + + NOT_IMPLEMENTED_YET(); + return expr_ref(m); } @@ -42,9 +59,37 @@ namespace sls { } void seq_plugin::register_term(expr* e) { + if (seq.is_string(e->get_sort())) { + strval0(e) = strval1(e); + for (unsigned i = 0; i < strval0(e).length(); ++i) + m_chars.insert(strval0(e)[i]); + + if (is_app(e) && to_app(e)->get_family_id() == m_fid && + all_of(*to_app(e), [&](expr* arg) { return is_value(arg); })) + get_eval(e).is_value = true; + return; + } } std::ostream& seq_plugin::display(std::ostream& out) const { + if (!m_chars.empty()) + out << "chars: " << m_chars << "\n"; + for (auto t : ctx.subterms()) { + if (!seq.is_string(t->get_sort())) + continue; + if (m.is_value(t)) + continue; + auto* ev = get_eval(t); + if (!ev) + continue; + out << mk_pp(t, m) << " -> " << ev->val0.svalue; + if (ev->min_length > 0) + out << " min-length: " << ev->min_length; + if (ev->max_length < UINT_MAX) + out << " max-length: " << ev->max_length; + out << "\n"; + } + return out; } @@ -52,20 +97,48 @@ namespace sls { return false; } - zstring& seq_plugin::strval0(expr* e) { - SASSERT(seq.is_string(e->get_sort())); + seq_plugin::eval& seq_plugin::get_eval(expr* e) { unsigned id = e->get_id(); m_values.reserve(id + 1); if (!m_values[id]) m_values.set(id, alloc(eval, m)); - return m_values[id]->val0.svalue; + return *m_values[id]; } + seq_plugin::eval* seq_plugin::get_eval(expr* e) const { + unsigned id = e->get_id(); + return m_values.get(id, nullptr); + } + + zstring& seq_plugin::strval0(expr* e) { + SASSERT(seq.is_string(e->get_sort())); + return get_eval(e).val0.svalue; + } + + bool seq_plugin::is_seq_predicate(expr* e) { + if (!is_app(e)) + return false; + if (to_app(e)->get_family_id() == seq.get_family_id()) + return true; + expr* x, *y; + if (m.is_eq(e, x, y)) + return seq.is_seq(x->get_sort()); + if (m.is_distinct(e) && to_app(e)->get_num_args() > 0) + return seq.is_seq(to_app(e)->get_arg(0)); + return false; + } + + bool seq_plugin::bval1(expr* e) { SASSERT(is_app(e)); if (to_app(e)->get_family_id() == seq.get_family_id()) return bval1_seq(to_app(e)); - + expr* x, * y; + if (m.is_eq(e, x, y)) { + if (seq.is_string(x->get_sort())) + return strval0(x) == strval0(y); + NOT_IMPLEMENTED_YET(); + } NOT_IMPLEMENTED_YET(); return false; } @@ -73,21 +146,29 @@ namespace sls { bool seq_plugin::bval1_seq(app* e) { expr* a, *b; switch (e->get_decl_kind()) { - case OP_SEQ_CONTAINS: { + case OP_SEQ_CONTAINS: VERIFY(seq.str.is_contains(e, a, b)); if (seq.is_string(a->get_sort())) return strval0(a).contains(strval0(b)); - else { - NOT_IMPLEMENTED_YET(); - } + + NOT_IMPLEMENTED_YET(); break; - } case OP_SEQ_PREFIX: + VERIFY(seq.str.is_prefix(e, a, b)); + if (seq.is_string(a->get_sort())) + return strval0(a).prefixof(strval0(b)); + NOT_IMPLEMENTED_YET(); + break; case OP_SEQ_SUFFIX: + VERIFY(seq.str.is_suffix(e, a, b)); + if (seq.is_string(a->get_sort())) + return strval0(a).suffixof(strval0(b)); + NOT_IMPLEMENTED_YET(); + break; + case OP_SEQ_IN_RE: case OP_SEQ_NTH: case OP_SEQ_NTH_I: case OP_SEQ_NTH_U: - case OP_SEQ_IN_RE: case OP_SEQ_FOLDL: case OP_SEQ_FOLDLI: case OP_STRING_LT: @@ -104,22 +185,78 @@ namespace sls { zstring const& seq_plugin::strval1(expr* e) { SASSERT(is_app(e)); SASSERT(seq.is_string(e->get_sort())); + auto & ev = get_eval(e); + if (ev.is_value) + return ev.val0.svalue; + if (to_app(e)->get_family_id() == seq.get_family_id()) { switch (to_app(e)->get_decl_kind()) { - case OP_SEQ_UNIT: - case OP_SEQ_EMPTY: - case OP_SEQ_CONCAT: - case OP_SEQ_EXTRACT: + case OP_STRING_CONST: { + zstring str; + VERIFY(seq.str.is_string(e, str)); + ev.val0.svalue = str; + return ev.val0.svalue; + } + case OP_SEQ_UNIT: { + expr* arg = to_app(e)->get_arg(0); + unsigned ch; + if (seq.is_const_char(arg, ch)) { + zstring str(ch); + ev.val0.svalue = str; + return ev.val0.svalue; + } + NOT_IMPLEMENTED_YET(); + } + case OP_SEQ_EMPTY: { + ev.val0.svalue = zstring(); + return ev.val0.svalue; + } + case OP_SEQ_CONCAT: { + zstring r; + for (auto arg : *to_app(e)) + r = r + strval0(arg); + ev.val1.svalue = r; + return ev.val1.svalue; + } + case OP_SEQ_EXTRACT: { + expr* x, * offset, * len; + VERIFY(seq.str.is_extract(e, x, offset, len)); + zstring r = strval0(x); + expr_ref offset_e = ctx.get_value(offset); + expr_ref len_e = ctx.get_value(len); + rational offset_val, len_val; + VERIFY(a.is_numeral(offset_e, offset_val)); + VERIFY(a.is_numeral(len_e, len_val)); + if (offset_val.is_unsigned() && offset_val < r.length() && + len_val.is_unsigned()) { + ev.val1.svalue = r.extract(offset_val.get_unsigned(), len_val.get_unsigned()); + return ev.val1.svalue; + } + else { + ev.val1.svalue = zstring(); + return ev.val1.svalue; + } + } + case OP_SEQ_AT: { + expr* x, * offset; + VERIFY(seq.str.is_at(e, x, offset)); + zstring r = strval0(x); + expr_ref offset_e = ctx.get_value(offset); + rational offset_val; + VERIFY(a.is_numeral(offset_e, offset_val)); + if (offset_val.is_unsigned() && offset_val < r.length()) { + ev.val1.svalue = zstring(r[offset_val.get_unsigned()]); + return ev.val1.svalue; + } + else { + ev.val1.svalue = zstring(); + return ev.val1.svalue; + } + } case OP_SEQ_REPLACE: - case OP_SEQ_AT: case OP_SEQ_NTH: case OP_SEQ_NTH_I: case OP_SEQ_NTH_U: - case OP_SEQ_LENGTH: - case OP_SEQ_INDEX: - case OP_SEQ_LAST_INDEX: - case OP_SEQ_TO_RE: - case OP_SEQ_IN_RE: case OP_SEQ_REPLACE_RE_ALL: case OP_SEQ_REPLACE_RE: case OP_SEQ_REPLACE_ALL: @@ -127,7 +264,13 @@ namespace sls { case OP_SEQ_MAPI: case OP_SEQ_FOLDL: case OP_SEQ_FOLDLI: - case OP_RE_PLUS: + case OP_RE_DERIVATIVE: + case OP_STRING_ITOS: + case OP_STRING_FROM_CODE: + verbose_stream() << "strval1 " << mk_bounded_pp(e, m) << "\n"; + NOT_IMPLEMENTED_YET(); + break; + case OP_RE_PLUS: case OP_RE_STAR: case OP_RE_OPTION: case OP_RE_RANGE: @@ -143,9 +286,10 @@ namespace sls { case OP_RE_FULL_CHAR_SET: case OP_RE_OF_PRED: case OP_RE_REVERSE: - case OP_RE_DERIVATIVE: - case OP_STRING_CONST: - case OP_STRING_ITOS: + case OP_SEQ_TO_RE: + case OP_SEQ_LENGTH: + case OP_SEQ_INDEX: + case OP_SEQ_LAST_INDEX: case OP_STRING_STOI: case OP_STRING_UBVTOS: case OP_STRING_SBVTOS: @@ -153,8 +297,8 @@ namespace sls { case OP_STRING_LE: case OP_STRING_IS_DIGIT: case OP_STRING_TO_CODE: - case OP_STRING_FROM_CODE: - NOT_IMPLEMENTED_YET(); + verbose_stream() << "strval1 " << mk_bounded_pp(e, m) << "\n"; + UNREACHABLE(); break; default: UNREACHABLE(); @@ -167,25 +311,168 @@ namespace sls { } void seq_plugin::repair_up(app* e) { + + if (m.is_bool(e)) + return; + + if (seq.is_string(e->get_sort())) { + if (is_value(e)) + return; + strval0(e) = strval1(e); + return; + } + + if (seq.str.is_length(e)) { + repair_up_str_length(e); + return; + } + + if (seq.str.is_index(e)) { + repair_up_str_indexof(e); + return; + } + + verbose_stream() << "repair up nyi: " << mk_bounded_pp(e, m) << "\n"; } bool seq_plugin::repair_down(app* e) { + if (m.is_bool(e) && bval1(e) == ctx.is_true(e)) + return true; + if (seq.is_string(e->get_sort()) && strval0(e) == strval1(e)) + return true; + if (e->get_family_id() == m_fid) + return repair_down_seq(e); + if (m.is_eq(e)) + return repair_down_eq(e); + + NOT_IMPLEMENTED_YET(); + return false; + } + + bool seq_plugin::repair_down_str_length(app* e) { + expr* x; + VERIFY(seq.str.is_length(e, x)); + expr_ref len = ctx.get_value(e); + rational r; + unsigned len_u; + VERIFY(a.is_numeral(len, r)); + if (!r.is_unsigned()) + return false; + zstring val_x = strval0(x); + len_u = r.get_unsigned(); + if (len_u == val_x.length()) + return true; + if (len_u < val_x.length()) { + for (unsigned i = 0; i + len_u < val_x.length(); ++i) + m_str_updates.push_back({ x, val_x.extract(i, len_u), 1 }); + } + if (!m_chars.empty()) { + zstring ch(m_chars[ctx.rand(m_chars.size())]); + m_str_updates.push_back({ x, val_x + ch, 1 }); + m_str_updates.push_back({ x, ch + val_x, 1 }); + } + return apply_update(); + } + + void seq_plugin::repair_up_str_length(app* e) { + expr* x; + VERIFY(seq.str.is_length(e, x)); + zstring val_x = strval0(x); + update(e, rational(val_x.length())); + } + + void seq_plugin::repair_up_str_indexof(app* e) { + expr* x, * y, * z = nullptr; + VERIFY(seq.str.is_index(e, x, y, z) || seq.str.is_index(e, x, y)); + zstring val_x = strval0(x); + zstring val_y = strval0(y); + unsigned offset = 0; + if (z) { + rational r; + VERIFY(a.is_numeral(ctx.get_value(z), r)); + if (!r.is_unsigned()) { + update(e, rational(-1)); + return; + } + offset = r.get_unsigned(); + } + int idx = val_x.indexofu(val_y, offset); + update(e, rational(idx)); + } + + bool seq_plugin::repair_down_eq(app* e) { + if (seq.is_string(e->get_arg(0)->get_sort())) + return repair_down_str_eq(e); + NOT_IMPLEMENTED_YET(); + return false; + } + + bool seq_plugin::repair_down_str_eq(app* e) { + bool is_true = ctx.is_true(e); + expr* x, * y; + VERIFY(m.is_eq(e, x, y)); + if (ctx.is_true(e)) { + if (!is_value(x)) + m_str_updates.push_back({ x, strval1(y), 1 }); + if (!is_value(y)) + m_str_updates.push_back({ y, strval1(x), 1 }); + } + else { + if (!is_value(x) && !m_chars.empty()) { + zstring ch(m_chars[ctx.rand(m_chars.size())]); + m_str_updates.push_back({ x, strval1(y) + ch, 1 }); + m_str_updates.push_back({ x, ch + strval1(y), 1 }); + } + if (!is_value(y) && !m_chars.empty()) { + zstring ch(m_chars[ctx.rand(m_chars.size())]); + m_str_updates.push_back({ y, strval1(x) + ch, 1 }); + m_str_updates.push_back({ y, ch + strval1(x), 1 }); + } + } + return apply_update(); + } + + bool seq_plugin::repair_down_seq(app* e) { switch (e->get_decl_kind()) { - case OP_SEQ_UNIT: - case OP_SEQ_EMPTY: - case OP_SEQ_CONCAT: - case OP_SEQ_PREFIX: - case OP_SEQ_SUFFIX: case OP_SEQ_CONTAINS: - return repair_contains(e); + if (seq.is_string(to_app(e)->get_arg(0)->get_sort())) + return repair_down_str_contains(e); + break; + case OP_SEQ_EMPTY: + return true; + case OP_SEQ_CONCAT: + if (seq.is_string(e->get_sort())) + return repair_down_str_concat(to_app(e)); + break; case OP_SEQ_EXTRACT: - case OP_SEQ_REPLACE: + if (seq.is_string(e->get_sort())) + return repair_down_str_extract(e); + break; + case OP_SEQ_LENGTH: + if (seq.is_string(to_app(e)->get_arg(0)->get_sort())) + return repair_down_str_length(e); + break; + case OP_SEQ_PREFIX: + if (seq.is_string(to_app(e)->get_arg(0)->get_sort())) + return repair_down_str_prefixof(e); + break; + case OP_SEQ_SUFFIX: + if (seq.is_string(to_app(e)->get_arg(0)->get_sort())) + return repair_down_str_suffixof(e); + break; case OP_SEQ_AT: + if (seq.is_string(to_app(e)->get_arg(0)->get_sort())) + return repair_down_str_at(e); + break; + case OP_SEQ_INDEX: + if (seq.is_string(to_app(e)->get_arg(0)->get_sort())) + return repair_down_str_indexof(e); + break; + case OP_SEQ_UNIT: + case OP_SEQ_REPLACE: case OP_SEQ_NTH: case OP_SEQ_NTH_I: case OP_SEQ_NTH_U: - case OP_SEQ_LENGTH: - case OP_SEQ_INDEX: case OP_SEQ_LAST_INDEX: case OP_SEQ_TO_RE: case OP_SEQ_IN_RE: @@ -223,13 +510,182 @@ namespace sls { case OP_STRING_IS_DIGIT: case OP_STRING_TO_CODE: case OP_STRING_FROM_CODE: - default: + default: break; } + verbose_stream() << "nyi repair down " << mk_bounded_pp(e, m) << "\n"; return false; } - bool seq_plugin::repair_contains(expr* e) { + bool seq_plugin::repair_down_str_at(app* e) { + expr* x, * y; + VERIFY(seq.str.is_at(e, x, y)); + zstring se = strval0(e); + if (se.length() > 1) + return false; + zstring sx = strval0(x); + unsigned lenx = sx.length(); + expr_ref idx = ctx.get_value(y); + rational r; + VERIFY(a.is_numeral(idx, r)); + + if (se.length() == 0) { + // index should be out of bounds of a. + if (!is_value(x)) { + m_str_updates.push_back({ x, zstring(), 1 }); + if (lenx > r && r >= 0) + m_str_updates.push_back({ x, sx.extract(0, r.get_unsigned()), 1 }); + } + if (!m.is_value(y)) { + m_int_updates.push_back({ y, rational(lenx), 1 }); + m_int_updates.push_back({ y, rational(lenx + 1), 1 }); + m_int_updates.push_back({ y, rational(-1), 1 }); + } + } + else { + SASSERT(se.length() == 1); + // index should be in bounds of a. + if (!is_value(x)) { + if (lenx > r && r >= 0) { + zstring new_x = sx.extract(0, r.get_unsigned()) + se + sx.extract(r.get_unsigned() + 1, lenx); + m_str_updates.push_back({ x, new_x, 1 }); + } + if (lenx <= r) { + zstring new_x = sx + se; + m_str_updates.push_back({ x, new_x, 1 }); + } + } + if (!m.is_value(y)) { + for (unsigned i = 0; i < sx.length(); ++i) { + if (se[0] == sx[i]) + m_int_updates.push_back({ y, rational(i), 1 }); + } + } + } + return apply_update(); + } + + bool seq_plugin::repair_down_str_indexof(app* e) { + expr* x, * y, * offset = nullptr; + VERIFY(seq.str.is_index(e, x, y, offset) || seq.str.is_index(e, x, y)); + rational value; + VERIFY(a.is_numeral(ctx.get_value(e), value) && value.is_int()); + zstring sx = strval0(x); + zstring sy = strval0(y); + unsigned lenx = sx.length(); + unsigned leny = sy.length(); + rational offset_r(0); + if (offset) + VERIFY(a.is_numeral(ctx.get_value(offset), offset_r)); + unsigned offset_u = 0; + if (offset_r.is_unsigned()) + offset_u = offset_r.get_unsigned(); + + // change x: + // insert y into x at offset + if (offset_r.is_unsigned() && 0 <= value && offset_u + value <= lenx && leny > 0) { + unsigned offs = offset_u + value.get_unsigned(); + zstring prefix = sx.extract(0, offs); + for (unsigned i = 0; i <= leny && offs + i < lenx; ++i) { + zstring suffix = sx.extract(offs + i, lenx); + m_str_updates.push_back({ x, prefix + sy + suffix, 1 }); + } + } + + // change y: + // replace y by substring of x at offset + if (offset_r.is_unsigned() && 0 <= value && offset_u + value < lenx) { + unsigned offs = offset_u + value.get_unsigned(); + for (unsigned i = offs; i < lenx; ++i) + m_str_updates.push_back({ y, sx.extract(offs, i - offs + 1), 1 }); + } + + // change offset: + // update offset such that value can be the index of y in x at offset + for (int i = sx.indexofu(sy, 0); leny > 0 && value >= 0 && i >= 0; ++i, i = sx.indexofu(sy, i)) + if (value < i) + m_int_updates.push_back({ offset, rational(i) - value, 1 }); + + return apply_update(); + } + + bool seq_plugin::repair_down_str_prefixof(app* e) { + expr* a, * b; + VERIFY(seq.str.is_prefix(e, a, b)); + zstring sa = strval0(a); + zstring sb = strval0(b); + unsigned lena = sa.length(); + unsigned lenb = sb.length(); + verbose_stream() << "repair prefixof " << mk_bounded_pp(e, m) << "\n"; + if (ctx.is_true(e)) { + unsigned n = std::min(lena, lenb); + if (!is_value(a)) { + for (unsigned i = 0; i < n; ++i) + m_str_updates.push_back({ a, sb.extract(0, i), 1 }); + } + if (!is_value(b)) { + zstring new_b = sa + sb.extract(sa.length(), lenb); + m_str_updates.push_back({ b, new_b, 1 }); + m_str_updates.push_back({ b, sa, 1 }); + } + } + else { + SASSERT(lena <= lenb); + if (!is_value(a)) { + zstring ch = zstring(m_chars[ctx.rand(m_chars.size())]); + m_str_updates.push_back({ a, sa + ch, 1 }); + m_str_updates.push_back({ a, ch + sa, 1 }); + m_str_updates.push_back({ a, sb + ch, 1 }); + m_str_updates.push_back({ a, ch + sb, 1 }); + } + if (!is_value(b)) { + zstring ch = zstring(m_chars[ctx.rand(m_chars.size())]); + m_str_updates.push_back({ b, ch + sb, 1 }); + m_str_updates.push_back({ b, zstring(), 1}); + } + } + return apply_update(); + } + + bool seq_plugin::repair_down_str_suffixof(app* e) { + expr* a, * b; + VERIFY(seq.str.is_suffix(e, a, b)); + zstring sa = strval0(a); + zstring sb = strval0(b); + unsigned lena = sa.length(); + unsigned lenb = sb.length(); + verbose_stream() << "repair suffixof " << mk_bounded_pp(e, m) << "\n"; + if (ctx.is_true(e)) { + unsigned n = std::min(lena, lenb); + if (!is_value(a)) { + for (unsigned i = 0; i < n; ++i) + m_str_updates.push_back({ a, sb.extract(lenb - i, i), 1 }); + } + if (!is_value(b)) { + zstring new_b = sb.extract(0, lenb - n) + sa; + m_str_updates.push_back({ b, new_b, 1 }); + m_str_updates.push_back({ b, sa, 1 }); + } + } + else { + SASSERT(lena <= lenb); + if (!is_value(a)) { + zstring ch = zstring(m_chars[ctx.rand(m_chars.size())]); + m_str_updates.push_back({ a, ch + sa, 1 }); + m_str_updates.push_back({ a, sa + ch, 1 }); + m_str_updates.push_back({ a, ch + sb, 1 }); + m_str_updates.push_back({ a, sb + ch, 1 }); + } + if (!is_value(b)) { + zstring ch = zstring(m_chars[ctx.rand(m_chars.size())]); + m_str_updates.push_back({ b, sb + ch, 1 }); + m_str_updates.push_back({ b, zstring(), 1 }); + } + } + return apply_update(); + } + + bool seq_plugin::repair_down_str_contains(expr* e) { expr* a, *b; VERIFY(seq.str.is_contains(e, a, b)); zstring sa = strval0(a); @@ -237,7 +693,8 @@ namespace sls { unsigned lena = sa.length(); unsigned lenb = sb.length(); - m_str_updates.reset(); + verbose_stream() << "repair contains " << mk_bounded_pp(e, m) << "\n"; + if (ctx.is_true(e)) { // add b to a in front // add b to a in back @@ -245,62 +702,199 @@ namespace sls { // take random subsequence of a and set it to b // reduce size of b - m_str_updates.push_back({ a, sb + sa, 1 }); - m_str_updates.push_back({ a, sa + sb, 1 }); - if (lena > 1) { - unsigned mid = ctx.rand(lena-2) + 1; - zstring sa1 = sa.extract(0, mid); - zstring sa2 = sa.extract(mid, lena - mid); - m_str_updates.push_back({ a, sa1 + sb + sa2, 1}); + if (!is_value(a)) { + m_str_updates.push_back({ a, sb + sa, 1 }); + m_str_updates.push_back({ a, sa + sb, 1 }); + if (lena > 2) { + unsigned mid = ctx.rand(lena-2) + 1; + zstring sa1 = sa.extract(0, mid); + zstring sa2 = sa.extract(mid, lena - mid); + m_str_updates.push_back({ a, sa1 + sb + sa2, 1}); + } } - if (lenb > 0) { - m_str_updates.push_back({ b, sb.extract(0, lenb-1), 1}); - m_str_updates.push_back({ b, sb.extract(1, lenb-1), 1}); + if (!is_value(b) && lenb > 0) { + m_str_updates.push_back({ b, sb.extract(0, lenb - 1), 1}); + m_str_updates.push_back({ b, sb.extract(1, lenb - 1), 1}); } } else { // remove occurrences of b in a, if b is non-empty - // append character to b - // set b to be a + character - // + // append or pre-pend character to b + + if (!is_value(a)) { + int idx = sa.indexofu(sb, 0); + SASSERT(idx >= 0); + zstring su; + if (idx > 0) + su = sa.extract(0, idx); + su = su + sa.extract(idx + sb.length(), sa.length() - idx - sb.length()); + m_str_updates.push_back({a, su, 1}); + } + if (!m_chars.empty() && !is_value(b)) { + zstring sb1 = sb + zstring(m_chars[ctx.rand(m_chars.size())]); + zstring sb2 = zstring(m_chars[ctx.rand(m_chars.size())]) + sb; + m_str_updates.push_back({b, sb1, 1}); + m_str_updates.push_back({b, sb2, 1}); + } } - return apply_str_update(); + return apply_update(); } - bool seq_plugin::apply_str_update() { + bool seq_plugin::repair_down_str_extract(app* e) { + expr* x, * offset, * len; + VERIFY(seq.str.is_extract(e, x, offset, len)); + zstring v = strval0(e); + zstring r = strval0(x); + expr_ref offset_e = ctx.get_value(offset); + expr_ref len_e = ctx.get_value(len); + rational offset_val, len_val; + VERIFY(a.is_numeral(offset_e, offset_val)); + VERIFY(a.is_numeral(len_e, len_val)); + if (offset_val < 0) + return false; + if (len_val < 0) + return false; + SASSERT(offset_val.is_unsigned()); + SASSERT(len_val.is_unsigned()); + unsigned offset_u = offset_val.get_unsigned(); + unsigned len_u = len_val.get_unsigned(); + zstring prefix = r.extract(0, offset_u); + zstring suffix = r.extract(offset_u + len_u, r.length()); + zstring new_r = prefix + v + suffix; + m_str_updates.push_back({ x, new_r, 1 }); + return apply_update(); + } + + bool seq_plugin::repair_down_str_concat(app* e) { + zstring val_e = strval0(e); + unsigned len_e = val_e.length(); + // sample a ranom partition. + sbuffer lengths(e->get_num_args(), 0); + for (unsigned i = 0; i < len_e; ++i) + lengths[ctx.rand(lengths.size())]++; + unsigned i = 0, len_prefix = 0; + for (expr* arg : *e) { + auto len = lengths[i]; + auto val_arg = val_e.extract(len_prefix, len); + if (!update(arg, val_arg)) + return false; + ++i; + len_prefix += len; + } + return true; + } + + + bool seq_plugin::apply_update() { double sum_scores = 0; for (auto const& [e, val, score] : m_str_updates) sum_scores += score; + for (auto const& [e, val, score] : m_int_updates) + sum_scores += score; - while (!m_str_updates.empty()) { + while (!m_str_updates.empty() || !m_int_updates.empty()) { + bool is_str_update = false; unsigned i = m_str_updates.size(); double lim = sum_scores * ((double)ctx.rand() / random_gen().max_value()); - do { - lim -= m_str_updates[--i].m_score; + if (i > 0) { + do { + lim -= m_str_updates[--i].m_score; + } while (lim >= 0 && i > 0); } - while (lim >= 0 && i > 0); + is_str_update = lim == 0 || m_int_updates.empty(); - auto [e, value, score] = m_str_updates[i]; + if (!is_str_update) { + i = m_int_updates.size(); + do { + lim -= m_str_updates[--i].m_score; + } while (lim >= 0 && i > 0); + } + + if (is_str_update) { + auto [e, value, score] = m_str_updates[i]; - if (update(e, value)) - return true; + verbose_stream() << "set value " << mk_bounded_pp(e, m) << " := \"" << value << "\"\n"; - sum_scores -= score; - m_str_updates[i] = m_str_updates.back(); - m_str_updates.pop_back(); + if (update(e, value)) { + m_str_updates.reset(); + m_int_updates.reset(); + return true; + } + + sum_scores -= score; + m_str_updates[i] = m_str_updates.back(); + m_str_updates.pop_back(); + } + else { + auto [e, value, score] = m_int_updates[i]; + + verbose_stream() << "set value " << mk_bounded_pp(e, m) << " := " << value << "\n"; + + if (update(e, value)) { + m_int_updates.reset(); + m_str_updates.reset(); + return true; + } + sum_scores -= score; + m_int_updates[i] = m_int_updates.back(); + m_int_updates.pop_back(); + } } return false; } bool seq_plugin::update(expr* e, zstring const& value) { + if (is_value(e)) + return false; + if (value == strval0(e)) + return true; strval0(e) = value; ctx.new_value_eh(e); return true; } - - void seq_plugin::repair_literal(sat::literal lit) { + bool seq_plugin::update(expr* e, rational const& value) { + expr_ref val(a.mk_int(value), m); + return ctx.set_value(e, val); + } + + void seq_plugin::initialize() { + if (m_initialized) + return; + m_initialized = true; + for (auto lit : ctx.unit_literals()) { + auto e = ctx.atom(lit.var()); + expr* x, * y, * z; + rational r; + if (!lit.sign() && (a.is_le(e, x, y) || a.is_ge(e, y, x))) { + if (a.is_numeral(x, r) && r.is_unsigned() && seq.str.is_length(y, z)) { + auto& ev = get_eval(z); + ev.min_length = std::max(ev.min_length, r.get_unsigned()); + } + if (a.is_numeral(y, r) && r.is_unsigned() && seq.str.is_length(x, z)) { + auto& ev = get_eval(z); + ev.max_length = std::min(ev.max_length, r.get_unsigned()); + } + } + } } + void seq_plugin::repair_literal(sat::literal lit) { + SASSERT(ctx.is_true(lit)); + auto e = ctx.atom(lit.var()); + if (!is_seq_predicate(e)) + return; + auto a = to_app(e); + if (bval1(e) == lit.sign()) + ctx.flip(lit.var()); + } + + bool seq_plugin::is_value(expr* e) { + if (seq.is_seq(e)) + return get_eval(e).is_value; + return m.is_value(e); + } + + } diff --git a/src/ast/sls/sls_seq_plugin.h b/src/ast/sls/sls_seq_plugin.h index 59d875d8a..2afc087fd 100644 --- a/src/ast/sls/sls_seq_plugin.h +++ b/src/ast/sls/sls_seq_plugin.h @@ -18,6 +18,7 @@ Author: #include "ast/sls/sls_context.h" #include "ast/seq_decl_plugin.h" +#include "ast/arith_decl_plugin.h" namespace sls { @@ -34,32 +35,66 @@ namespace sls { val0(m), val1(m) {} value val0; value val1; + bool is_value = false; + unsigned min_length = 0; + unsigned max_length = UINT_MAX; }; seq_util seq; + arith_util a; scoped_ptr_vector m_values; + indexed_uint_set m_chars; + bool m_initialized = false; struct str_update { expr* e; zstring value; unsigned m_score; }; + struct int_update { + expr* e; + rational value; + unsigned m_score; + }; vector m_str_updates; - bool apply_str_update(); + vector m_int_updates; + bool apply_update(); bool update(expr* e, zstring const& value); + bool update(expr* e, rational const& value); bool bval1(expr* e); bool bval1_seq(app* e); zstring& strval0(expr* e); zstring const& strval1(expr* e); - bool repair_contains(expr* e); - + bool repair_down_seq(app* e); + bool repair_down_eq(app* e); + bool repair_down_str_eq(app* e); + bool repair_down_str_extract(app* e); + bool repair_down_str_contains(expr* e); + bool repair_down_str_concat(app* e); + bool repair_down_str_length(app* e); + bool repair_down_str_at(app* e); + bool repair_down_str_indexof(app* e); + bool repair_down_str_replace(app* e); + bool repair_down_str_prefixof(app* e); + bool repair_down_str_suffixof(app* e); + + void repair_up_str_length(app* e); + void repair_up_str_indexof(app* e); + + + bool is_seq_predicate(expr* e); + + eval& get_eval(expr* e); + eval* get_eval(expr* e) const; + + bool is_value(expr* e); public: seq_plugin(context& c); ~seq_plugin() override {} expr_ref get_value(expr* e) override; - void initialize() override {} + void initialize() override; void start_propagation() override {} void propagate_literal(sat::literal lit) override; bool propagate() override; @@ -67,7 +102,7 @@ namespace sls { void register_term(expr* e) override; std::ostream& display(std::ostream& out) const override; bool set_value(expr* e, expr* v) override; - bool include_func_interp(func_decl* f) const override { return true; } + bool include_func_interp(func_decl* f) const override { return false; } void repair_up(app* e) override; bool repair_down(app* e) override;