diff --git a/src/ast/sls/sls_seq_plugin.cpp b/src/ast/sls/sls_seq_plugin.cpp index 667511e78..93e70dfd9 100644 --- a/src/ast/sls/sls_seq_plugin.cpp +++ b/src/ast/sls/sls_seq_plugin.cpp @@ -95,9 +95,17 @@ Equality solving using stochastic Nelson. #include "ast/sls/sls_seq_plugin.h" #include "ast/sls/sls_context.h" #include "ast/ast_pp.h" +#include "params/sls_params.hpp" namespace sls { + + struct zstring_hash_proc { + unsigned operator()(zstring const & s) const { + auto str = s.encode(); + return string_hash(str.c_str(), static_cast(s.length()), 17); + } + }; seq_plugin::seq_plugin(context& c): plugin(c), @@ -107,6 +115,8 @@ namespace sls { thrw(c.get_manager()) { m_fid = seq.get_family_id(); + sls_params p(c.get_params()); + m_str_update_strategy = p.str_update_strategy() == 0 ? EDIT_CHAR : EDIT_SUBSTR; } void seq_plugin::propagate_literal(sat::literal lit) { @@ -531,12 +541,12 @@ namespace sls { return true; if (len_u < val_x.length()) { for (unsigned i = 0; i + len_u < val_x.length(); ++i) - m_str_updates.push_back({ x, val_x.extract(i, len_u), 1 }); + m_str_updates.push_back({ x, val_x.extract(i, len_u), 1 }); } if (!m_chars.empty()) { zstring ch(m_chars[ctx.rand(m_chars.size())]); m_str_updates.push_back({ x, val_x + ch, 1 }); - m_str_updates.push_back({ x, ch + val_x, 1 }); + m_str_updates.push_back({ x, ch + val_x, 1 }); } return apply_update(); } @@ -619,7 +629,7 @@ namespace sls { if (!is_value(x)) m_str_updates.push_back({ x, strval1(y), 1 }); if (!is_value(y)) - m_str_updates.push_back({ y, strval1(x), 1 }); + m_str_updates.push_back({ y, strval1(x), 1 }); } else { // disequality @@ -665,8 +675,39 @@ namespace sls { return d[n][m]; } - void seq_plugin::add_edit_updates(ptr_vector const& w, zstring const& val, zstring const& val_other, uint_set const& chars) { + if (m_str_update_strategy == EDIT_CHAR) + add_char_edit_updates(w, val, val_other, chars); + else + add_substr_edit_updates(w, val, val_other, chars); + } + + void seq_plugin::add_substr_edit_updates(ptr_vector const& w, zstring const& val, zstring const& val_other, uint_set const& chars) { + // all consecutive subsequences of val_other + hashtable> set; + set.insert(zstring("")); + for (unsigned i = 0; i < val_other.length(); ++i) { + for (unsigned j = val_other.length(); j > 0; ++j) { + zstring sub = val_other.extract(i, j); + if (set.contains(sub)) + break; + set.insert(sub); + } + } + + for (auto x : w) { + if (is_value(x)) + continue; + zstring const& a = strval0(x); + for (auto& seq : set) { + if (seq == a) + continue; + m_str_updates.push_back({ x, seq, 1 }); + } + } + } + + void seq_plugin::add_char_edit_updates(ptr_vector const& w, zstring const& val, zstring const& val_other, uint_set const& chars) { for (auto x : w) { if (is_value(x)) continue; @@ -1057,7 +1098,7 @@ namespace sls { } if (!is_value(xi)) { m_str_updates.push_back({ xi, vi.extract(0, i), score }); - m_str_updates.push_back({ xi, vi.extract(0, i) + zstring(vj[j]), score}); + m_str_updates.push_back({ xi, vi.extract(0, i) + zstring(vj[j]), score}); } if (!is_value(yj)) { m_str_updates.push_back({ yj, vj.extract(0, j), score }); @@ -1236,7 +1277,7 @@ namespace sls { if (!is_value(x)) { m_str_updates.push_back({ x, zstring(), 1 }); if (lenx > r && r >= 0) - m_str_updates.push_back({ x, sx.extract(0, r.get_unsigned()), 1 }); + m_str_updates.push_back({ x, sx.extract(0, r.get_unsigned()), 1 }); } if (!m.is_value(y)) { m_int_updates.push_back({ y, rational(lenx), 1 }); @@ -1287,7 +1328,7 @@ namespace sls { if (value == -1) { m_str_updates.push_back({ y, zstring(m_chars[ctx.rand(m_chars.size())]), 1 }); if (lenx > 0) - m_str_updates.push_back({ x, zstring(), 1 }); + m_str_updates.push_back({ x, zstring(), 1 }); } // change x: // insert y into x at offset @@ -1305,7 +1346,7 @@ namespace sls { if (offset_r.is_unsigned() && 0 <= value && offset_u + value < lenx) { unsigned offs = offset_u + value.get_unsigned(); for (unsigned i = offs; i < lenx; ++i) - m_str_updates.push_back({ y, sx.extract(offs, i - offs + 1), 1 }); + m_str_updates.push_back({ y, sx.extract(offs, i - offs + 1), 1 }); } // change offset: @@ -1436,13 +1477,13 @@ namespace sls { if (idx > 0) su = sa.extract(0, idx); su = su + sa.extract(idx + sb.length(), sa.length() - idx - sb.length()); - m_str_updates.push_back({a, su, 1}); + m_str_updates.push_back({ a, su, 1}); } if (!m_chars.empty() && !is_value(b)) { zstring sb1 = sb + zstring(m_chars[ctx.rand(m_chars.size())]); zstring sb2 = zstring(m_chars[ctx.rand(m_chars.size())]) + sb; - m_str_updates.push_back({b, sb1, 1}); - m_str_updates.push_back({b, sb2, 1}); + m_str_updates.push_back({ b, sb1, 1}); + m_str_updates.push_back({ b, sb2, 1}); } } return apply_update(); diff --git a/src/ast/sls/sls_seq_plugin.h b/src/ast/sls/sls_seq_plugin.h index c3298cbae..ff71d257f 100644 --- a/src/ast/sls/sls_seq_plugin.h +++ b/src/ast/sls/sls_seq_plugin.h @@ -42,13 +42,19 @@ namespace sls { ptr_vector lhs, rhs; }; + enum edit_distance_strategy { + EDIT_CHAR, + EDIT_SUBSTR, + }; + seq_util seq; arith_util a; seq_rewriter rw; th_rewriter thrw; scoped_ptr_vector m_values; indexed_uint_set m_chars; // set of characters in the problem - bool m_initialized = false; + bool m_initialized = false; + edit_distance_strategy m_str_update_strategy; struct str_update { expr* e; @@ -122,6 +128,8 @@ namespace sls { unsigned edit_distance_with_updates(string_instance const& a, string_instance const& b); unsigned edit_distance(zstring const& a, zstring const& b); void add_edit_updates(ptr_vector const& w, zstring const& val, zstring const& val_other, uint_set const& chars); + void add_char_edit_updates(ptr_vector const& w, zstring const& val, zstring const& val_other, uint_set const& chars); + void add_substr_edit_updates(ptr_vector const& w, zstring const& val, zstring const& val_other, uint_set const& chars); // regex functionality diff --git a/src/params/sls_params.pyg b/src/params/sls_params.pyg index 854366609..89aa9834f 100644 --- a/src/params/sls_params.pyg +++ b/src/params/sls_params.pyg @@ -3,7 +3,7 @@ def_module_params('sls', description='Experimental Stochastic Local Search Solver (for QFBV only).', params=(max_memory_param(), ('max_restarts', UINT, UINT_MAX, 'maximum number of restarts'), - ('max_repairs', UINT, 1000, 'maximum number of repairs before restart'), + ('max_repairs', UINT, 1000, 'maximum number of repairs before restart'), ('walksat', BOOL, 1, 'use walksat assertion selection (instead of gsat)'), ('walksat_ucb', BOOL, 1, 'use bandit heuristic for walksat assertion selection (instead of random)'), ('walksat_ucb_constant', DOUBLE, 20.0, 'the ucb constant c in the term score + c * f(touched)'), @@ -24,5 +24,6 @@ def_module_params('sls', ('rescore', BOOL, 1, 'rescore/normalize top-level score every base restart interval'), ('dt_axiomatic', BOOL, True, 'use axiomatic mode or model reduction for datatype solver'), ('track_unsat', BOOL, 0, 'keep a list of unsat assertions as done in SAT - currently disabled internally'), - ('random_seed', UINT, 0, 'random seed') + ('random_seed', UINT, 0, 'random seed'), + ('str_update_strategy', UINT, 1, 'string update candidate selection: 0 - single character based update, 1 - subsequence based update') ))