mirror of
https://github.com/Z3Prover/z3
synced 2025-04-15 13:28:47 +00:00
Add new string repair heuristic (#7496)
* Fixed spurious counterexamples in seq-sls. Updates might not be identity mapping * Removed debug code * One check was missing * Trying different update generation function * Renamed function * Added parameter to select string update function
This commit is contained in:
parent
e002c57aa2
commit
19c95f8561
|
@ -95,9 +95,17 @@ Equality solving using stochastic Nelson.
|
||||||
#include "ast/sls/sls_seq_plugin.h"
|
#include "ast/sls/sls_seq_plugin.h"
|
||||||
#include "ast/sls/sls_context.h"
|
#include "ast/sls/sls_context.h"
|
||||||
#include "ast/ast_pp.h"
|
#include "ast/ast_pp.h"
|
||||||
|
#include "params/sls_params.hpp"
|
||||||
|
|
||||||
|
|
||||||
namespace sls {
|
namespace sls {
|
||||||
|
|
||||||
|
struct zstring_hash_proc {
|
||||||
|
unsigned operator()(zstring const & s) const {
|
||||||
|
auto str = s.encode();
|
||||||
|
return string_hash(str.c_str(), static_cast<unsigned>(s.length()), 17);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
seq_plugin::seq_plugin(context& c):
|
seq_plugin::seq_plugin(context& c):
|
||||||
plugin(c),
|
plugin(c),
|
||||||
|
@ -107,6 +115,8 @@ namespace sls {
|
||||||
thrw(c.get_manager())
|
thrw(c.get_manager())
|
||||||
{
|
{
|
||||||
m_fid = seq.get_family_id();
|
m_fid = seq.get_family_id();
|
||||||
|
sls_params p(c.get_params());
|
||||||
|
m_str_update_strategy = p.str_update_strategy() == 0 ? EDIT_CHAR : EDIT_SUBSTR;
|
||||||
}
|
}
|
||||||
|
|
||||||
void seq_plugin::propagate_literal(sat::literal lit) {
|
void seq_plugin::propagate_literal(sat::literal lit) {
|
||||||
|
@ -531,12 +541,12 @@ namespace sls {
|
||||||
return true;
|
return true;
|
||||||
if (len_u < val_x.length()) {
|
if (len_u < val_x.length()) {
|
||||||
for (unsigned i = 0; i + len_u < val_x.length(); ++i)
|
for (unsigned i = 0; i + len_u < val_x.length(); ++i)
|
||||||
m_str_updates.push_back({ x, val_x.extract(i, len_u), 1 });
|
m_str_updates.push_back({ x, val_x.extract(i, len_u), 1 });
|
||||||
}
|
}
|
||||||
if (!m_chars.empty()) {
|
if (!m_chars.empty()) {
|
||||||
zstring ch(m_chars[ctx.rand(m_chars.size())]);
|
zstring ch(m_chars[ctx.rand(m_chars.size())]);
|
||||||
m_str_updates.push_back({ x, val_x + ch, 1 });
|
m_str_updates.push_back({ x, val_x + ch, 1 });
|
||||||
m_str_updates.push_back({ x, ch + val_x, 1 });
|
m_str_updates.push_back({ x, ch + val_x, 1 });
|
||||||
}
|
}
|
||||||
return apply_update();
|
return apply_update();
|
||||||
}
|
}
|
||||||
|
@ -619,7 +629,7 @@ namespace sls {
|
||||||
if (!is_value(x))
|
if (!is_value(x))
|
||||||
m_str_updates.push_back({ x, strval1(y), 1 });
|
m_str_updates.push_back({ x, strval1(y), 1 });
|
||||||
if (!is_value(y))
|
if (!is_value(y))
|
||||||
m_str_updates.push_back({ y, strval1(x), 1 });
|
m_str_updates.push_back({ y, strval1(x), 1 });
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// disequality
|
// disequality
|
||||||
|
@ -665,8 +675,39 @@ namespace sls {
|
||||||
return d[n][m];
|
return d[n][m];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void seq_plugin::add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars) {
|
void seq_plugin::add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars) {
|
||||||
|
if (m_str_update_strategy == EDIT_CHAR)
|
||||||
|
add_char_edit_updates(w, val, val_other, chars);
|
||||||
|
else
|
||||||
|
add_substr_edit_updates(w, val, val_other, chars);
|
||||||
|
}
|
||||||
|
|
||||||
|
void seq_plugin::add_substr_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars) {
|
||||||
|
// all consecutive subsequences of val_other
|
||||||
|
hashtable<zstring, zstring_hash_proc, default_eq<zstring>> set;
|
||||||
|
set.insert(zstring(""));
|
||||||
|
for (unsigned i = 0; i < val_other.length(); ++i) {
|
||||||
|
for (unsigned j = val_other.length(); j > 0; ++j) {
|
||||||
|
zstring sub = val_other.extract(i, j);
|
||||||
|
if (set.contains(sub))
|
||||||
|
break;
|
||||||
|
set.insert(sub);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto x : w) {
|
||||||
|
if (is_value(x))
|
||||||
|
continue;
|
||||||
|
zstring const& a = strval0(x);
|
||||||
|
for (auto& seq : set) {
|
||||||
|
if (seq == a)
|
||||||
|
continue;
|
||||||
|
m_str_updates.push_back({ x, seq, 1 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void seq_plugin::add_char_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars) {
|
||||||
for (auto x : w) {
|
for (auto x : w) {
|
||||||
if (is_value(x))
|
if (is_value(x))
|
||||||
continue;
|
continue;
|
||||||
|
@ -1057,7 +1098,7 @@ namespace sls {
|
||||||
}
|
}
|
||||||
if (!is_value(xi)) {
|
if (!is_value(xi)) {
|
||||||
m_str_updates.push_back({ xi, vi.extract(0, i), score });
|
m_str_updates.push_back({ xi, vi.extract(0, i), score });
|
||||||
m_str_updates.push_back({ xi, vi.extract(0, i) + zstring(vj[j]), score});
|
m_str_updates.push_back({ xi, vi.extract(0, i) + zstring(vj[j]), score});
|
||||||
}
|
}
|
||||||
if (!is_value(yj)) {
|
if (!is_value(yj)) {
|
||||||
m_str_updates.push_back({ yj, vj.extract(0, j), score });
|
m_str_updates.push_back({ yj, vj.extract(0, j), score });
|
||||||
|
@ -1236,7 +1277,7 @@ namespace sls {
|
||||||
if (!is_value(x)) {
|
if (!is_value(x)) {
|
||||||
m_str_updates.push_back({ x, zstring(), 1 });
|
m_str_updates.push_back({ x, zstring(), 1 });
|
||||||
if (lenx > r && r >= 0)
|
if (lenx > r && r >= 0)
|
||||||
m_str_updates.push_back({ x, sx.extract(0, r.get_unsigned()), 1 });
|
m_str_updates.push_back({ x, sx.extract(0, r.get_unsigned()), 1 });
|
||||||
}
|
}
|
||||||
if (!m.is_value(y)) {
|
if (!m.is_value(y)) {
|
||||||
m_int_updates.push_back({ y, rational(lenx), 1 });
|
m_int_updates.push_back({ y, rational(lenx), 1 });
|
||||||
|
@ -1287,7 +1328,7 @@ namespace sls {
|
||||||
if (value == -1) {
|
if (value == -1) {
|
||||||
m_str_updates.push_back({ y, zstring(m_chars[ctx.rand(m_chars.size())]), 1 });
|
m_str_updates.push_back({ y, zstring(m_chars[ctx.rand(m_chars.size())]), 1 });
|
||||||
if (lenx > 0)
|
if (lenx > 0)
|
||||||
m_str_updates.push_back({ x, zstring(), 1 });
|
m_str_updates.push_back({ x, zstring(), 1 });
|
||||||
}
|
}
|
||||||
// change x:
|
// change x:
|
||||||
// insert y into x at offset
|
// insert y into x at offset
|
||||||
|
@ -1305,7 +1346,7 @@ namespace sls {
|
||||||
if (offset_r.is_unsigned() && 0 <= value && offset_u + value < lenx) {
|
if (offset_r.is_unsigned() && 0 <= value && offset_u + value < lenx) {
|
||||||
unsigned offs = offset_u + value.get_unsigned();
|
unsigned offs = offset_u + value.get_unsigned();
|
||||||
for (unsigned i = offs; i < lenx; ++i)
|
for (unsigned i = offs; i < lenx; ++i)
|
||||||
m_str_updates.push_back({ y, sx.extract(offs, i - offs + 1), 1 });
|
m_str_updates.push_back({ y, sx.extract(offs, i - offs + 1), 1 });
|
||||||
}
|
}
|
||||||
|
|
||||||
// change offset:
|
// change offset:
|
||||||
|
@ -1436,13 +1477,13 @@ namespace sls {
|
||||||
if (idx > 0)
|
if (idx > 0)
|
||||||
su = sa.extract(0, idx);
|
su = sa.extract(0, idx);
|
||||||
su = su + sa.extract(idx + sb.length(), sa.length() - idx - sb.length());
|
su = su + sa.extract(idx + sb.length(), sa.length() - idx - sb.length());
|
||||||
m_str_updates.push_back({a, su, 1});
|
m_str_updates.push_back({ a, su, 1});
|
||||||
}
|
}
|
||||||
if (!m_chars.empty() && !is_value(b)) {
|
if (!m_chars.empty() && !is_value(b)) {
|
||||||
zstring sb1 = sb + zstring(m_chars[ctx.rand(m_chars.size())]);
|
zstring sb1 = sb + zstring(m_chars[ctx.rand(m_chars.size())]);
|
||||||
zstring sb2 = zstring(m_chars[ctx.rand(m_chars.size())]) + sb;
|
zstring sb2 = zstring(m_chars[ctx.rand(m_chars.size())]) + sb;
|
||||||
m_str_updates.push_back({b, sb1, 1});
|
m_str_updates.push_back({ b, sb1, 1});
|
||||||
m_str_updates.push_back({b, sb2, 1});
|
m_str_updates.push_back({ b, sb2, 1});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return apply_update();
|
return apply_update();
|
||||||
|
|
|
@ -42,13 +42,19 @@ namespace sls {
|
||||||
ptr_vector<expr> lhs, rhs;
|
ptr_vector<expr> lhs, rhs;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum edit_distance_strategy {
|
||||||
|
EDIT_CHAR,
|
||||||
|
EDIT_SUBSTR,
|
||||||
|
};
|
||||||
|
|
||||||
seq_util seq;
|
seq_util seq;
|
||||||
arith_util a;
|
arith_util a;
|
||||||
seq_rewriter rw;
|
seq_rewriter rw;
|
||||||
th_rewriter thrw;
|
th_rewriter thrw;
|
||||||
scoped_ptr_vector<eval> m_values;
|
scoped_ptr_vector<eval> m_values;
|
||||||
indexed_uint_set m_chars; // set of characters in the problem
|
indexed_uint_set m_chars; // set of characters in the problem
|
||||||
bool m_initialized = false;
|
bool m_initialized = false;
|
||||||
|
edit_distance_strategy m_str_update_strategy;
|
||||||
|
|
||||||
struct str_update {
|
struct str_update {
|
||||||
expr* e;
|
expr* e;
|
||||||
|
@ -122,6 +128,8 @@ namespace sls {
|
||||||
unsigned edit_distance_with_updates(string_instance const& a, string_instance const& b);
|
unsigned edit_distance_with_updates(string_instance const& a, string_instance const& b);
|
||||||
unsigned edit_distance(zstring const& a, zstring const& b);
|
unsigned edit_distance(zstring const& a, zstring const& b);
|
||||||
void add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);
|
void add_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);
|
||||||
|
void add_char_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);
|
||||||
|
void add_substr_edit_updates(ptr_vector<expr> const& w, zstring const& val, zstring const& val_other, uint_set const& chars);
|
||||||
|
|
||||||
// regex functionality
|
// regex functionality
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ def_module_params('sls',
|
||||||
description='Experimental Stochastic Local Search Solver (for QFBV only).',
|
description='Experimental Stochastic Local Search Solver (for QFBV only).',
|
||||||
params=(max_memory_param(),
|
params=(max_memory_param(),
|
||||||
('max_restarts', UINT, UINT_MAX, 'maximum number of restarts'),
|
('max_restarts', UINT, UINT_MAX, 'maximum number of restarts'),
|
||||||
('max_repairs', UINT, 1000, 'maximum number of repairs before restart'),
|
('max_repairs', UINT, 1000, 'maximum number of repairs before restart'),
|
||||||
('walksat', BOOL, 1, 'use walksat assertion selection (instead of gsat)'),
|
('walksat', BOOL, 1, 'use walksat assertion selection (instead of gsat)'),
|
||||||
('walksat_ucb', BOOL, 1, 'use bandit heuristic for walksat assertion selection (instead of random)'),
|
('walksat_ucb', BOOL, 1, 'use bandit heuristic for walksat assertion selection (instead of random)'),
|
||||||
('walksat_ucb_constant', DOUBLE, 20.0, 'the ucb constant c in the term score + c * f(touched)'),
|
('walksat_ucb_constant', DOUBLE, 20.0, 'the ucb constant c in the term score + c * f(touched)'),
|
||||||
|
@ -24,5 +24,6 @@ def_module_params('sls',
|
||||||
('rescore', BOOL, 1, 'rescore/normalize top-level score every base restart interval'),
|
('rescore', BOOL, 1, 'rescore/normalize top-level score every base restart interval'),
|
||||||
('dt_axiomatic', BOOL, True, 'use axiomatic mode or model reduction for datatype solver'),
|
('dt_axiomatic', BOOL, True, 'use axiomatic mode or model reduction for datatype solver'),
|
||||||
('track_unsat', BOOL, 0, 'keep a list of unsat assertions as done in SAT - currently disabled internally'),
|
('track_unsat', BOOL, 0, 'keep a list of unsat assertions as done in SAT - currently disabled internally'),
|
||||||
('random_seed', UINT, 0, 'random seed')
|
('random_seed', UINT, 0, 'random seed'),
|
||||||
|
('str_update_strategy', UINT, 1, 'string update candidate selection: 0 - single character based update, 1 - subsequence based update')
|
||||||
))
|
))
|
||||||
|
|
Loading…
Reference in a new issue