3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-13 04:28:17 +00:00
z3/src/ast/sls/sls_seq_plugin.cpp
Nikolaj Bjorner b143a954c5 add notes and additional functions to sls-seq
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2024-11-25 13:46:16 -08:00

1141 lines
39 KiB
C++

/*++
Copyright (c) 2024 Microsoft Corporation
Module Name:
sls_seq_plugin.cpp
Abstract:
Sequence/String SLS
Author:
Nikolaj Bjorner (nbjorner) 2024-11-22
Notes:
Regex
Assume regexes are ground and for zstring.
to repair:
x in R
- get prefix of x that can be in R
- extend prefix by sampled string y, such that prefix(x)y in R
x not in R:
- assume x is in R, then
- sample prefix of x that is not in R
- sample extension of x that is not in R
- sample prefix of x in R, with extension not in R
next_tokens(R) = { a | exists s: as in R }
delta(a, R) = derivative of R with respect to a.
delta(s, R) = delta(s[n-1], delta(s[0..n-2], R))
nullable(R) = epsilon in R
empty(R) = R is empty
samples(x, R):
yield choose(R)
for i in 0..|x|-1 & delta(x[0..i], R) != empty:
yield x[0..i]choose(delta(x[0..i], R))
choose(R):
if nullable(R):
return epsilon
T = next_tokens(R)
a = choose(T) use a bias on characters that make progress (skip *).
return choose(delta(a, R))
Sequences
Use length constraints as tabu for updates.
Alternate to lookahead strategy:
Lookahead repair based of changing leaves:
With each predicate, track the leaves of non-value arguments.
Suppose x is a leaf string used in a violated predicate.
then we can repair x by taking sub-string, or adding a character,
or adding x with an existing constant within the domain of known constants.
or truncating x to the empty string.
Suppose z is a leaf integer.
we can increment, decrement z, set z to -1, 0, or a known bound.
Lookahead works by updating strval1 starting from the leaf.
- create a priority buffer array of vector<ptr_vector<expr>> based on depth.
- walk from lowest depth up. Reset each inner buffer when processed. Parents always
have higher depth.
- calculate repair/break score when hitting a predicate based on bval1.
- strval1 and bval1 are modified by
- use a global timestamp.
- label each eval subterm by a timestamp that gets set.
- strval0 evaluates to strval1 if timestamp matches global timestamp.
Revert bias on long strings:
- give preference to reset leaves that are assigned to long strings
- bake in bias for shorter strings into equation solving?
Equality solving using stochastic Nelson.
- Given equality where current assignment does not satisfy it:
- Xw = v:
- let X' range over prefixes of X that matches v.
- non-deterministic set X <- strval0(X')
- non-deterministic set X <- strval0(X') + 'a' where strval0(X') + 'a' matches prefix of strval0(v), and X' is longest prefix of X that matches v.
- If X fully matches a prefix of v, then, in addition to the rules above:
- consume constant character from strval0(X)w = v
- reveal the next variable to solve for.
- What scores make sense to use for partial solutions?
--*/
#include "ast/sls/sls_seq_plugin.h"
#include "ast/sls/sls_context.h"
#include "ast/ast_pp.h"
namespace sls {
seq_plugin::seq_plugin(context& c):
plugin(c),
seq(c.get_manager()),
a(c.get_manager())
{
m_fid = seq.get_family_id();
}
void seq_plugin::propagate_literal(sat::literal lit) {
SASSERT(ctx.is_true(lit));
auto e = ctx.atom(lit.var());
if (!is_seq_predicate(e))
return;
auto a = to_app(e);
if (bval1(e) != lit.sign())
return;
ctx.new_value_eh(e);
}
expr_ref seq_plugin::get_value(expr* e) {
if (seq.is_string(e->get_sort()))
return expr_ref(seq.str.mk_string(strval0(e)), m);
NOT_IMPLEMENTED_YET();
return expr_ref(m);
}
bool seq_plugin::propagate() {
return false;
}
bool seq_plugin::is_sat() {
for (expr* e : ctx.subterms()) {
expr* x, * y, * z = nullptr;
rational r;
// coherence between string / integer functions is delayed
// so we check and enforce it here.
if (seq.str.is_length(e, x) && seq.is_string(x->get_sort())) {
auto sx = strval0(x);
auto ve = ctx.get_value(e);
if (a.is_numeral(ve, r) && r == sx.length())
continue;
update(e, rational(sx.length()));
return false;
}
if ((seq.str.is_index(e, x, y, z) || seq.str.is_index(e, x, y)) && seq.is_string(x->get_sort())) {
auto sx = strval0(x);
auto sy = strval0(y);
rational val_z, val_e;
if (z) {
VERIFY(a.is_numeral(ctx.get_value(z), val_z));
}
VERIFY(a.is_numeral(ctx.get_value(e), val_e));
// case: x is empty, val_z = 0
if (val_e < 0 && (val_z < 0 || (val_z >= sx.length() && sx.length() > 0)))
continue;
if (val_z.is_unsigned() && rational(sx.indexofu(sy, val_z.get_unsigned())) == val_e)
continue;
if (val_z < 0 || (val_z >= sx.length() && sx.length() > 0))
update(e, rational(-1));
else
update(e, rational(sx.indexofu(sy, val_z.get_unsigned())));
return false;
}
// last-index-of
// str-to-int
}
return true;
}
void seq_plugin::register_term(expr* e) {
if (seq.is_string(e->get_sort())) {
strval0(e) = strval1(e);
for (unsigned i = 0; i < strval0(e).length(); ++i)
m_chars.insert(strval0(e)[i]);
if (is_app(e) && to_app(e)->get_family_id() == m_fid &&
all_of(*to_app(e), [&](expr* arg) { return is_value(arg); }))
get_eval(e).is_value = true;
}
}
std::ostream& seq_plugin::display(std::ostream& out) const {
if (!m_chars.empty())
out << "chars: " << m_chars << "\n";
for (auto t : ctx.subterms()) {
if (!seq.is_string(t->get_sort()))
continue;
if (m.is_value(t))
continue;
auto* ev = get_eval(t);
if (!ev)
continue;
out << mk_pp(t, m) << " -> \"" << ev->val0.svalue << "\"";
if (ev->min_length > 0)
out << " min-length: " << ev->min_length;
if (ev->max_length < UINT_MAX)
out << " max-length: " << ev->max_length;
out << "\n";
}
return out;
}
bool seq_plugin::set_value(expr* e, expr* v) {
return false;
}
seq_plugin::eval& seq_plugin::get_eval(expr* e) {
unsigned id = e->get_id();
m_values.reserve(id + 1);
if (!m_values[id])
m_values.set(id, alloc(eval, m));
return *m_values[id];
}
seq_plugin::eval* seq_plugin::get_eval(expr* e) const {
unsigned id = e->get_id();
return m_values.get(id, nullptr);
}
zstring& seq_plugin::strval0(expr* e) {
SASSERT(seq.is_string(e->get_sort()));
return get_eval(e).val0.svalue;
}
bool seq_plugin::is_seq_predicate(expr* e) {
if (!is_app(e))
return false;
if (to_app(e)->get_family_id() == seq.get_family_id())
return true;
expr* x, *y;
if (m.is_eq(e, x, y))
return seq.is_seq(x->get_sort());
if (m.is_distinct(e) && to_app(e)->get_num_args() > 0)
return seq.is_seq(to_app(e)->get_arg(0));
return false;
}
bool seq_plugin::bval1(expr* e) {
SASSERT(is_app(e));
if (to_app(e)->get_family_id() == seq.get_family_id())
return bval1_seq(to_app(e));
expr* x, * y;
if (m.is_eq(e, x, y)) {
if (seq.is_string(x->get_sort()))
return strval0(x) == strval0(y);
NOT_IMPLEMENTED_YET();
}
NOT_IMPLEMENTED_YET();
return false;
}
bool seq_plugin::bval1_seq(app* e) {
expr* a, *b;
SASSERT(e->get_family_id() == seq.get_family_id());
switch (e->get_decl_kind()) {
case OP_SEQ_CONTAINS:
VERIFY(seq.str.is_contains(e, a, b));
if (seq.is_string(a->get_sort()))
return strval0(a).contains(strval0(b));
NOT_IMPLEMENTED_YET();
break;
case OP_SEQ_PREFIX:
VERIFY(seq.str.is_prefix(e, a, b));
if (seq.is_string(a->get_sort()))
return strval0(a).prefixof(strval0(b));
NOT_IMPLEMENTED_YET();
break;
case OP_SEQ_SUFFIX:
VERIFY(seq.str.is_suffix(e, a, b));
if (seq.is_string(a->get_sort()))
return strval0(a).suffixof(strval0(b));
NOT_IMPLEMENTED_YET();
break;
case OP_SEQ_IN_RE:
case OP_SEQ_NTH:
case OP_SEQ_NTH_I:
case OP_SEQ_NTH_U:
case OP_SEQ_FOLDL:
case OP_SEQ_FOLDLI:
case OP_STRING_LT:
case OP_STRING_LE:
case OP_STRING_IS_DIGIT:
NOT_IMPLEMENTED_YET();
break;
default:
UNREACHABLE();
break;
}
return false;
}
zstring const& seq_plugin::strval1(expr* e) {
SASSERT(is_app(e));
SASSERT(seq.is_string(e->get_sort()));
auto & ev = get_eval(e);
if (ev.is_value)
return ev.val0.svalue;
if (to_app(e)->get_family_id() == seq.get_family_id()) {
switch (to_app(e)->get_decl_kind()) {
case OP_STRING_CONST: {
zstring str;
VERIFY(seq.str.is_string(e, str));
ev.val0.svalue = str;
return ev.val0.svalue;
}
case OP_SEQ_UNIT: {
expr* arg = to_app(e)->get_arg(0);
unsigned ch;
if (seq.is_const_char(arg, ch)) {
zstring str(ch);
ev.val0.svalue = str;
return ev.val0.svalue;
}
NOT_IMPLEMENTED_YET();
}
case OP_SEQ_EMPTY: {
ev.val0.svalue = zstring();
return ev.val0.svalue;
}
case OP_SEQ_CONCAT: {
zstring r;
for (auto arg : *to_app(e))
r = r + strval0(arg);
ev.val1.svalue = r;
return ev.val1.svalue;
}
case OP_SEQ_EXTRACT: {
expr* x, * offset, * len;
VERIFY(seq.str.is_extract(e, x, offset, len));
zstring r = strval0(x);
expr_ref offset_e = ctx.get_value(offset);
expr_ref len_e = ctx.get_value(len);
rational offset_val, len_val;
VERIFY(a.is_numeral(offset_e, offset_val));
VERIFY(a.is_numeral(len_e, len_val));
if (offset_val.is_unsigned() && offset_val < r.length() &&
len_val.is_unsigned()) {
ev.val1.svalue = r.extract(offset_val.get_unsigned(), len_val.get_unsigned());
return ev.val1.svalue;
}
else {
ev.val1.svalue = zstring();
return ev.val1.svalue;
}
}
case OP_SEQ_AT: {
expr* x, * offset;
VERIFY(seq.str.is_at(e, x, offset));
zstring r = strval0(x);
expr_ref offset_e = ctx.get_value(offset);
rational offset_val;
VERIFY(a.is_numeral(offset_e, offset_val));
if (offset_val.is_unsigned() && offset_val < r.length()) {
ev.val1.svalue = zstring(r[offset_val.get_unsigned()]);
return ev.val1.svalue;
}
else {
ev.val1.svalue = zstring();
return ev.val1.svalue;
}
}
case OP_SEQ_REPLACE:
case OP_SEQ_NTH:
case OP_SEQ_NTH_I:
case OP_SEQ_NTH_U:
case OP_SEQ_REPLACE_RE_ALL:
case OP_SEQ_REPLACE_RE:
case OP_SEQ_REPLACE_ALL:
case OP_SEQ_MAP:
case OP_SEQ_MAPI:
case OP_SEQ_FOLDL:
case OP_SEQ_FOLDLI:
case OP_RE_DERIVATIVE:
case OP_STRING_ITOS:
case OP_STRING_FROM_CODE:
case OP_STRING_UBVTOS:
case OP_STRING_SBVTOS:
verbose_stream() << "strval1 " << mk_bounded_pp(e, m) << "\n";
NOT_IMPLEMENTED_YET();
break;
case OP_RE_PLUS:
case OP_RE_STAR:
case OP_RE_OPTION:
case OP_RE_RANGE:
case OP_RE_CONCAT:
case OP_RE_UNION:
case OP_RE_DIFF:
case OP_RE_INTERSECT:
case OP_RE_LOOP:
case OP_RE_POWER:
case OP_RE_COMPLEMENT:
case OP_RE_EMPTY_SET:
case OP_RE_FULL_SEQ_SET:
case OP_RE_FULL_CHAR_SET:
case OP_RE_OF_PRED:
case OP_RE_REVERSE:
case OP_SEQ_TO_RE:
case OP_SEQ_LENGTH:
case OP_SEQ_INDEX:
case OP_SEQ_LAST_INDEX:
case OP_STRING_STOI:
case OP_STRING_LT:
case OP_STRING_LE:
case OP_STRING_IS_DIGIT:
case OP_STRING_TO_CODE:
verbose_stream() << "strval1 " << mk_bounded_pp(e, m) << "\n";
UNREACHABLE();
break;
default:
UNREACHABLE();
break;
}
}
auto const& v = strval0(e);
m_values[e->get_id()]->val1.svalue = v;
return m_values[e->get_id()]->val1.svalue;
}
void seq_plugin::repair_up(app* e) {
if (m.is_bool(e))
return;
if (seq.str.is_itos(e)) {
repair_up_str_itos(e);
return;
}
if (seq.str.is_stoi(e)) {
repair_up_str_stoi(e);
return;
}
if (seq.str.is_length(e)) {
repair_up_str_length(e);
return;
}
if (seq.str.is_index(e)) {
repair_up_str_indexof(e);
return;
}
if (seq.is_string(e->get_sort())) {
if (is_value(e))
return;
strval0(e) = strval1(e);
ctx.new_value_eh(e);
return;
}
verbose_stream() << "repair up nyi: " << mk_bounded_pp(e, m) << "\n";
}
bool seq_plugin::repair_down(app* e) {
if (m.is_bool(e) && bval1(e) == ctx.is_true(e))
return true;
if (seq.is_string(e->get_sort()) && strval0(e) == strval1(e))
return true;
if (e->get_family_id() == m_fid)
return repair_down_seq(e);
if (m.is_eq(e))
return repair_down_eq(e);
NOT_IMPLEMENTED_YET();
return false;
}
bool seq_plugin::repair_down_str_length(app* e) {
expr* x;
VERIFY(seq.str.is_length(e, x));
expr_ref len = ctx.get_value(e);
rational r;
unsigned len_u;
VERIFY(a.is_numeral(len, r));
if (!r.is_unsigned())
return false;
zstring val_x = strval0(x);
len_u = r.get_unsigned();
if (len_u == val_x.length())
return true;
if (len_u < val_x.length()) {
for (unsigned i = 0; i + len_u < val_x.length(); ++i)
m_str_updates.push_back({ x, val_x.extract(i, len_u), 1 });
}
if (!m_chars.empty()) {
zstring ch(m_chars[ctx.rand(m_chars.size())]);
m_str_updates.push_back({ x, val_x + ch, 1 });
m_str_updates.push_back({ x, ch + val_x, 1 });
}
return apply_update();
}
void seq_plugin::repair_up_str_stoi(app* e) {
expr* x;
VERIFY(seq.str.is_stoi(e, x));
rational val_e;
rational val_x(strval0(x).encode().c_str());
VERIFY(a.is_numeral(ctx.get_value(e), val_e));
if (val_e.is_unsigned() && val_e == val_x)
return;
if (val_x < 0)
update(e, rational(0));
else
update(e, val_x);
}
void seq_plugin::repair_up_str_itos(app* e) {
expr* x;
VERIFY(seq.str.is_itos(e, x));
rational val_x;
VERIFY(a.is_numeral(ctx.get_value(x), val_x));
rational val_e(strval0(e).encode().c_str());
if (val_x.is_unsigned() && val_x == val_e)
return;
if (val_x < 0)
update(e, zstring());
else
update(e, zstring(val_x.to_string()));
}
void seq_plugin::repair_up_str_length(app* e) {
expr* x;
VERIFY(seq.str.is_length(e, x));
zstring val_x = strval0(x);
update(e, rational(val_x.length()));
}
void seq_plugin::repair_up_str_indexof(app* e) {
expr* x, * y, * z = nullptr;
VERIFY(seq.str.is_index(e, x, y, z) || seq.str.is_index(e, x, y));
zstring val_x = strval0(x);
zstring val_y = strval0(y);
unsigned offset = 0;
if (z) {
rational r;
VERIFY(a.is_numeral(ctx.get_value(z), r));
if (!r.is_unsigned()) {
update(e, rational(-1));
return;
}
offset = r.get_unsigned();
}
int idx = val_x.indexofu(val_y, offset);
update(e, rational(idx));
}
bool seq_plugin::repair_down_eq(app* e) {
if (seq.is_string(e->get_arg(0)->get_sort()))
return repair_down_str_eq(e);
NOT_IMPLEMENTED_YET();
return false;
}
bool seq_plugin::repair_down_str_eq(app* e) {
bool is_true = ctx.is_true(e);
expr* x, * y;
VERIFY(m.is_eq(e, x, y));
verbose_stream() << is_true << ": " << mk_bounded_pp(e, m, 3) << "\n";
if (ctx.is_true(e)) {
if (!is_value(x))
m_str_updates.push_back({ x, strval1(y), 1 });
if (!is_value(y))
m_str_updates.push_back({ y, strval1(x), 1 });
}
else {
if (!is_value(x) && !m_chars.empty()) {
zstring ch(m_chars[ctx.rand(m_chars.size())]);
m_str_updates.push_back({ x, strval1(y) + ch, 1 });
m_str_updates.push_back({ x, ch + strval1(y), 1 });
}
if (!is_value(y) && !m_chars.empty()) {
zstring ch(m_chars[ctx.rand(m_chars.size())]);
m_str_updates.push_back({ y, strval1(x) + ch, 1 });
m_str_updates.push_back({ y, ch + strval1(x), 1 });
}
}
return apply_update();
}
bool seq_plugin::repair_down_seq(app* e) {
switch (e->get_decl_kind()) {
case OP_SEQ_CONTAINS:
if (seq.is_string(to_app(e)->get_arg(0)->get_sort()))
return repair_down_str_contains(e);
break;
case OP_SEQ_EMPTY:
return true;
case OP_SEQ_CONCAT:
if (seq.is_string(e->get_sort()))
return repair_down_str_concat(to_app(e));
break;
case OP_SEQ_EXTRACT:
if (seq.is_string(e->get_sort()))
return repair_down_str_extract(e);
break;
case OP_SEQ_LENGTH:
if (seq.is_string(to_app(e)->get_arg(0)->get_sort()))
return repair_down_str_length(e);
break;
case OP_SEQ_PREFIX:
if (seq.is_string(to_app(e)->get_arg(0)->get_sort()))
return repair_down_str_prefixof(e);
break;
case OP_SEQ_SUFFIX:
if (seq.is_string(to_app(e)->get_arg(0)->get_sort()))
return repair_down_str_suffixof(e);
break;
case OP_SEQ_AT:
if (seq.is_string(to_app(e)->get_arg(0)->get_sort()))
return repair_down_str_at(e);
break;
case OP_SEQ_INDEX:
if (seq.is_string(to_app(e)->get_arg(0)->get_sort()))
return repair_down_str_indexof(e);
break;
case OP_STRING_CONST:
UNREACHABLE();
break;
case OP_STRING_ITOS:
return repair_down_str_itos(e);
case OP_STRING_STOI:
return repair_down_str_stoi(e);
case OP_STRING_UBVTOS:
case OP_STRING_SBVTOS:
case OP_STRING_TO_CODE:
case OP_STRING_FROM_CODE:
case OP_SEQ_UNIT:
case OP_SEQ_NTH:
case OP_SEQ_NTH_I:
case OP_SEQ_NTH_U:
case OP_SEQ_REPLACE:
case OP_SEQ_REPLACE_RE_ALL:
case OP_SEQ_REPLACE_RE:
case OP_SEQ_REPLACE_ALL:
case OP_SEQ_MAP:
case OP_SEQ_MAPI:
case OP_SEQ_FOLDL:
case OP_SEQ_FOLDLI:
case OP_SEQ_TO_RE:
case OP_SEQ_IN_RE:
case OP_RE_PLUS:
case OP_RE_STAR:
case OP_RE_OPTION:
case OP_RE_RANGE:
case OP_RE_CONCAT:
case OP_RE_UNION:
case OP_RE_DIFF:
case OP_RE_INTERSECT:
case OP_RE_LOOP:
case OP_RE_POWER:
case OP_RE_COMPLEMENT:
case OP_RE_EMPTY_SET:
case OP_RE_FULL_SEQ_SET:
case OP_RE_FULL_CHAR_SET:
case OP_RE_OF_PRED:
case OP_RE_REVERSE:
case OP_RE_DERIVATIVE:
case OP_STRING_LT:
case OP_STRING_LE:
case OP_STRING_IS_DIGIT:
break;
default:
verbose_stream() << "unexpected repair down " << mk_bounded_pp(e, m) << "\n";
UNREACHABLE();
}
verbose_stream() << "nyi repair down " << mk_bounded_pp(e, m) << "\n";
return false;
}
bool seq_plugin::repair_down_str_itos(app* e) {
expr* x;
VERIFY(seq.str.is_itos(e, x));
zstring se = strval0(e);
rational r(se.encode().c_str());
if (r.is_int())
m_int_updates.push_back({ x, r, 1 });
else
m_int_updates.push_back({ x, rational(-1 - ctx.rand(10)), 1 });
return apply_update();
}
bool seq_plugin::repair_down_str_stoi(app* e) {
expr* x;
rational r;
VERIFY(seq.str.is_stoi(e, x));
VERIFY(a.is_numeral(ctx.get_value(e), r) && r.is_int());
if (r < 0)
return false;
zstring r_val(r.to_string());
m_str_updates.push_back({ x, r_val, 1 });
return apply_update();
}
bool seq_plugin::repair_down_str_at(app* e) {
expr* x, * y;
VERIFY(seq.str.is_at(e, x, y));
zstring se = strval0(e);
if (se.length() > 1)
return false;
zstring sx = strval0(x);
unsigned lenx = sx.length();
expr_ref idx = ctx.get_value(y);
rational r;
VERIFY(a.is_numeral(idx, r));
if (se.length() == 0) {
// index should be out of bounds of a.
if (!is_value(x)) {
m_str_updates.push_back({ x, zstring(), 1 });
if (lenx > r && r >= 0)
m_str_updates.push_back({ x, sx.extract(0, r.get_unsigned()), 1 });
}
if (!m.is_value(y)) {
m_int_updates.push_back({ y, rational(lenx), 1 });
m_int_updates.push_back({ y, rational(lenx + 1), 1 });
m_int_updates.push_back({ y, rational(-1), 1 });
}
}
else {
SASSERT(se.length() == 1);
// index should be in bounds of a.
if (!is_value(x)) {
if (lenx > r && r >= 0) {
zstring new_x = sx.extract(0, r.get_unsigned()) + se + sx.extract(r.get_unsigned() + 1, lenx);
m_str_updates.push_back({ x, new_x, 1 });
}
if (lenx <= r) {
zstring new_x = sx + se;
m_str_updates.push_back({ x, new_x, 1 });
}
}
if (!m.is_value(y)) {
for (unsigned i = 0; i < sx.length(); ++i) {
if (se[0] == sx[i])
m_int_updates.push_back({ y, rational(i), 1 });
}
}
}
return apply_update();
}
bool seq_plugin::repair_down_str_indexof(app* e) {
expr* x, * y, * offset = nullptr;
VERIFY(seq.str.is_index(e, x, y, offset) || seq.str.is_index(e, x, y));
rational value;
VERIFY(a.is_numeral(ctx.get_value(e), value) && value.is_int());
zstring sx = strval0(x);
zstring sy = strval0(y);
unsigned lenx = sx.length();
unsigned leny = sy.length();
rational offset_r(0);
if (offset)
VERIFY(a.is_numeral(ctx.get_value(offset), offset_r));
unsigned offset_u = 0;
if (offset_r.is_unsigned())
offset_u = offset_r.get_unsigned();
// change x:
// insert y into x at offset
if (offset_r.is_unsigned() && 0 <= value && offset_u + value <= lenx && leny > 0) {
unsigned offs = offset_u + value.get_unsigned();
zstring prefix = sx.extract(0, offs);
for (unsigned i = 0; i <= leny && offs + i < lenx; ++i) {
zstring suffix = sx.extract(offs + i, lenx);
m_str_updates.push_back({ x, prefix + sy + suffix, 1 });
}
}
// change y:
// replace y by substring of x at offset
if (offset_r.is_unsigned() && 0 <= value && offset_u + value < lenx) {
unsigned offs = offset_u + value.get_unsigned();
for (unsigned i = offs; i < lenx; ++i)
m_str_updates.push_back({ y, sx.extract(offs, i - offs + 1), 1 });
}
// change offset:
// update offset such that value can be the index of y in x at offset
for (int i = sx.indexofu(sy, 0); leny > 0 && value >= 0 && i >= 0; ++i, i = sx.indexofu(sy, i))
if (value < i)
m_int_updates.push_back({ offset, rational(i) - value, 1 });
return apply_update();
}
bool seq_plugin::repair_down_str_prefixof(app* e) {
expr* a, * b;
VERIFY(seq.str.is_prefix(e, a, b));
zstring sa = strval0(a);
zstring sb = strval0(b);
unsigned lena = sa.length();
unsigned lenb = sb.length();
verbose_stream() << "repair prefixof " << mk_bounded_pp(e, m) << "\n";
if (ctx.is_true(e)) {
unsigned n = std::min(lena, lenb);
if (!is_value(a)) {
for (unsigned i = 0; i < n; ++i)
m_str_updates.push_back({ a, sb.extract(0, i), 1 });
}
if (!is_value(b)) {
zstring new_b = sa + sb.extract(sa.length(), lenb);
m_str_updates.push_back({ b, new_b, 1 });
m_str_updates.push_back({ b, sa, 1 });
}
}
else {
SASSERT(lena <= lenb);
if (!is_value(a)) {
zstring ch = zstring(m_chars[ctx.rand(m_chars.size())]);
m_str_updates.push_back({ a, sa + ch, 1 });
m_str_updates.push_back({ a, ch + sa, 1 });
m_str_updates.push_back({ a, sb + ch, 1 });
m_str_updates.push_back({ a, ch + sb, 1 });
}
if (!is_value(b)) {
zstring ch = zstring(m_chars[ctx.rand(m_chars.size())]);
m_str_updates.push_back({ b, ch + sb, 1 });
m_str_updates.push_back({ b, zstring(), 1});
}
}
return apply_update();
}
bool seq_plugin::repair_down_str_suffixof(app* e) {
expr* a, * b;
VERIFY(seq.str.is_suffix(e, a, b));
zstring sa = strval0(a);
zstring sb = strval0(b);
unsigned lena = sa.length();
unsigned lenb = sb.length();
verbose_stream() << "repair suffixof " << mk_bounded_pp(e, m) << "\n";
if (ctx.is_true(e)) {
unsigned n = std::min(lena, lenb);
if (!is_value(a)) {
for (unsigned i = 0; i < n; ++i)
m_str_updates.push_back({ a, sb.extract(lenb - i, i), 1 });
}
if (!is_value(b)) {
zstring new_b = sb.extract(0, lenb - n) + sa;
m_str_updates.push_back({ b, new_b, 1 });
m_str_updates.push_back({ b, sa, 1 });
}
}
else {
SASSERT(lena <= lenb);
if (!is_value(a)) {
zstring ch = zstring(m_chars[ctx.rand(m_chars.size())]);
m_str_updates.push_back({ a, ch + sa, 1 });
m_str_updates.push_back({ a, sa + ch, 1 });
m_str_updates.push_back({ a, ch + sb, 1 });
m_str_updates.push_back({ a, sb + ch, 1 });
}
if (!is_value(b)) {
zstring ch = zstring(m_chars[ctx.rand(m_chars.size())]);
m_str_updates.push_back({ b, sb + ch, 1 });
m_str_updates.push_back({ b, zstring(), 1 });
}
}
return apply_update();
}
bool seq_plugin::repair_down_str_contains(expr* e) {
expr* a, *b;
VERIFY(seq.str.is_contains(e, a, b));
zstring sa = strval0(a);
zstring sb = strval0(b);
unsigned lena = sa.length();
unsigned lenb = sb.length();
verbose_stream() << "repair contains " << mk_bounded_pp(e, m) << "\n";
if (ctx.is_true(e)) {
// add b to a in front
// add b to a in back
// add part of b to a front/back
// take random subsequence of a and set it to b
// reduce size of b
if (!is_value(a)) {
m_str_updates.push_back({ a, sb + sa, 1 });
m_str_updates.push_back({ a, sa + sb, 1 });
if (lena > 2) {
unsigned mid = ctx.rand(lena-2) + 1;
zstring sa1 = sa.extract(0, mid);
zstring sa2 = sa.extract(mid, lena - mid);
m_str_updates.push_back({ a, sa1 + sb + sa2, 1});
}
}
if (!is_value(b) && lenb > 0) {
m_str_updates.push_back({ b, sb.extract(0, lenb - 1), 1});
m_str_updates.push_back({ b, sb.extract(1, lenb - 1), 1});
}
}
else {
// remove occurrences of b in a, if b is non-empty
// append or pre-pend character to b
if (!is_value(a)) {
int idx = sa.indexofu(sb, 0);
SASSERT(idx >= 0);
zstring su;
if (idx > 0)
su = sa.extract(0, idx);
su = su + sa.extract(idx + sb.length(), sa.length() - idx - sb.length());
m_str_updates.push_back({a, su, 1});
}
if (!m_chars.empty() && !is_value(b)) {
zstring sb1 = sb + zstring(m_chars[ctx.rand(m_chars.size())]);
zstring sb2 = zstring(m_chars[ctx.rand(m_chars.size())]) + sb;
m_str_updates.push_back({b, sb1, 1});
m_str_updates.push_back({b, sb2, 1});
}
}
return apply_update();
}
bool seq_plugin::repair_down_str_extract(app* e) {
expr* x, * offset, * len;
VERIFY(seq.str.is_extract(e, x, offset, len));
zstring v = strval0(e);
zstring r = strval0(x);
expr_ref offset_e = ctx.get_value(offset);
expr_ref len_e = ctx.get_value(len);
rational offset_val, len_val;
VERIFY(a.is_numeral(offset_e, offset_val));
VERIFY(a.is_numeral(len_e, len_val));
if (offset_val < 0)
return false;
if (len_val < 0)
return false;
SASSERT(offset_val.is_unsigned());
SASSERT(len_val.is_unsigned());
unsigned offset_u = offset_val.get_unsigned();
unsigned len_u = len_val.get_unsigned();
zstring prefix = r.extract(0, offset_u);
zstring suffix = r.extract(offset_u + len_u, r.length());
zstring new_r = prefix + v + suffix;
m_str_updates.push_back({ x, new_r, 1 });
return apply_update();
}
bool seq_plugin::repair_down_str_concat(app* e) {
zstring val_e = strval0(e);
unsigned len_e = val_e.length();
// sample a random partition.
// the current sample algorithm isn't uniformly sampling
// each possible partition, but favors what would be a
// normal distribution
sbuffer<unsigned> lengths(e->get_num_args(), 0);
sbuffer<unsigned> non_values;
unsigned i = 0;
//verbose_stream() << "repair concat " << mk_bounded_pp(e, m) << "\n";
for (expr* arg : *e) {
++i;
if (!is_value(arg)) {
non_values.push_back(i - 1);
continue;
}
auto const& arg_val = strval0(arg);
if (arg_val.length() > len_e)
return false;
lengths[i - 1] = arg_val.length();
len_e -= arg_val.length();
}
// TODO: take duplications into account
while (len_e > 0 && !non_values.empty()) {
lengths[non_values[ctx.rand(non_values.size())]]++;
--len_e;
}
if (len_e > 0 && non_values.empty())
return false;
i = 0;
//verbose_stream() << "repair concat2 " << mk_bounded_pp(e, m) << "\n";
unsigned len_prefix = 0;
for (expr* arg : *e) {
auto len = lengths[i];
auto val_arg = val_e.extract(len_prefix, len);
//verbose_stream() << "repair concat3 " << mk_bounded_pp(arg, m) << " " << val_arg << "\n";
if (!update(arg, val_arg))
return false;
++i;
len_prefix += len;
}
return true;
}
bool seq_plugin::apply_update() {
double sum_scores = 0;
for (auto const& [e, val, score] : m_str_updates)
sum_scores += score;
for (auto const& [e, val, score] : m_int_updates)
sum_scores += score;
while (!m_str_updates.empty() || !m_int_updates.empty()) {
bool is_str_update = false;
unsigned i = m_str_updates.size();
double lim = sum_scores * ((double)ctx.rand() / random_gen().max_value());
if (i > 0) {
do {
lim -= m_str_updates[--i].m_score;
} while (lim >= 0 && i > 0);
}
is_str_update = lim == 0 || m_int_updates.empty();
if (!is_str_update) {
i = m_int_updates.size();
do {
lim -= m_str_updates[--i].m_score;
} while (lim >= 0 && i > 0);
}
if (is_str_update) {
auto [e, value, score] = m_str_updates[i];
if (update(e, value)) {
verbose_stream() << "set value " << mk_bounded_pp(e, m) << " := \"" << value << "\"\n";
m_str_updates.reset();
m_int_updates.reset();
return true;
}
sum_scores -= score;
m_str_updates[i] = m_str_updates.back();
m_str_updates.pop_back();
}
else {
auto [e, value, score] = m_int_updates[i];
verbose_stream() << "set value " << mk_bounded_pp(e, m) << " := " << value << "\n";
if (update(e, value)) {
m_int_updates.reset();
m_str_updates.reset();
return true;
}
sum_scores -= score;
m_int_updates[i] = m_int_updates.back();
m_int_updates.pop_back();
}
}
return false;
}
bool seq_plugin::update(expr* e, zstring const& value) {
if (value == strval0(e))
return true;
if (is_value(e))
return false;
if (get_eval(e).min_length > value.length() || get_eval(e).max_length < value.length())
return false;
strval0(e) = value;
ctx.new_value_eh(e);
return true;
}
bool seq_plugin::update(expr* e, rational const& value) {
expr_ref val(a.mk_int(value), m);
return ctx.set_value(e, val);
}
void seq_plugin::initialize() {
if (m_initialized)
return;
m_initialized = true;
for (auto lit : ctx.unit_literals()) {
auto e = ctx.atom(lit.var());
expr* x, * y, * z;
rational r;
if (!lit.sign() && (a.is_le(e, x, y) || a.is_ge(e, y, x))) {
if (a.is_numeral(x, r) && r.is_unsigned() && seq.str.is_length(y, z)) {
auto& ev = get_eval(z);
ev.min_length = std::max(ev.min_length, r.get_unsigned());
}
if (a.is_numeral(y, r) && r.is_unsigned() && seq.str.is_length(x, z)) {
auto& ev = get_eval(z);
ev.max_length = std::min(ev.max_length, r.get_unsigned());
}
}
}
for (auto t : ctx.subterms()) {
if (seq.str.is_string(t)) {
auto& ev = get_eval(t);
ev.min_length = strval0(t).length();
ev.max_length = strval0(t).length();
}
if (seq.str.is_concat(t)) {
unsigned min_length = 0;
unsigned max_length = 0;
for (expr* arg : *to_app(t)) {
auto& ev = get_eval(arg);
min_length += ev.min_length;
if (ev.max_length < UINT_MAX && max_length != UINT_MAX)
max_length += ev.max_length;
else
max_length = UINT_MAX;
}
auto& ev = get_eval(t);
ev.min_length = std::max(min_length, ev.min_length);
ev.max_length = std::min(max_length, ev.max_length);
}
if (seq.str.is_at(t)) {
auto& ev = get_eval(t);
ev.max_length = 1;
}
expr* x, * offset, * len;
rational len_r;
if (seq.str.is_extract(t, x, offset, len) && a.is_numeral(len, len_r)) {
auto& ev = get_eval(t);
if (len_r < 0)
ev.max_length = 0;
if (len_r.is_unsigned())
ev.max_length = std::min(ev.max_length, len_r.get_unsigned());
}
}
}
void seq_plugin::repair_literal(sat::literal lit) {
SASSERT(ctx.is_true(lit));
auto e = ctx.atom(lit.var());
if (!is_seq_predicate(e))
return;
auto a = to_app(e);
// verbose_stream() << "repair " << lit << " " << mk_pp(e, m) << " " << bval1(e) << "\n";
if (bval1(e) == lit.sign())
ctx.flip(lit.var());
}
bool seq_plugin::is_value(expr* e) {
if (seq.is_seq(e))
return get_eval(e).is_value;
return m.is_value(e);
}
}