From dafee71500922eb6e8962154c142a4ec2266d61f Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Thu, 21 Jan 2021 14:24:26 -0800 Subject: [PATCH] reshuffle unicode support to use global parameter, and use bit-vectors on demand --- src/ast/rewriter/seq_rewriter.cpp | 3 + src/ast/seq_decl_plugin.cpp | 126 +++++----- src/ast/seq_decl_plugin.h | 23 +- src/params/context_params.cpp | 5 + src/params/context_params.h | 1 + src/smt/params/smt_params_helper.pyg | 1 - src/smt/params/theory_seq_params.cpp | 1 - src/smt/params/theory_seq_params.h | 4 +- src/smt/seq_unicode.cpp | 350 ++++++++++++++++++--------- src/smt/seq_unicode.h | 82 +++---- src/smt/theory_seq.cpp | 38 +-- 11 files changed, 367 insertions(+), 267 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 6b024e243..0503b73f8 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -719,6 +719,9 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con SASSERT(num_args == 1); st = mk_str_stoi(args[0], result); break; + case OP_CHAR_LE: + case OP_CHAR_CONST: + break; case _OP_STRING_CONCAT: case _OP_STRING_PREFIX: case _OP_STRING_SUFFIX: diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index ea47bc455..f75ee7b71 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -16,6 +16,7 @@ Author: Revision History: --*/ +#include "util/gparams.h" #include "ast/seq_decl_plugin.h" #include "ast/arith_decl_plugin.h" #include "ast/array_decl_plugin.h" @@ -47,7 +48,7 @@ static bool is_octal_digit(char ch, unsigned& d) { return false; } -static bool is_escape_char(char const *& s, unsigned& result) { +bool zstring::is_escape_char(char const *& s, unsigned& result) { unsigned d1, d2, d3; if (*s != '\\' || *(s + 1) == 0) { return false; @@ -89,10 +90,8 @@ static bool is_escape_char(char const *& s, unsigned& result) { result = 16*result + d1; } else if (*(s+3+i) == '}') { -#if !Z3_USE_UNICODE - if (result > 255) + if (result > 255 && !uses_unicode()) throw default_exception("unicode characters outside of byte range are not supported"); -#endif s += 4 + i; return true; } @@ -113,10 +112,8 @@ static bool is_escape_char(char const *& s, unsigned& result) { break; } } -#if !Z3_USE_UNICODE - if (result > 255) + if (result > 255 && !uses_unicode()) throw default_exception("unicode characters outside of byte range are not supported"); -#endif s += 3 + i; return true; } @@ -177,14 +174,8 @@ zstring::zstring(char const* s) { SASSERT(well_formed()); } -zstring::zstring(unsigned num_bits, bool const* ch) { - SASSERT(num_bits == 8); // TBD for unicode - unsigned n = 0; - for (unsigned i = 0; i < num_bits; ++i) { - n |= (((unsigned)ch[i]) << i); - } - m_buffer.push_back(n); - SASSERT(well_formed()); +bool zstring::uses_unicode() const { + return gparams::get_value("unicode") == "true"; } bool zstring::well_formed() const { @@ -402,7 +393,9 @@ seq_decl_plugin::seq_decl_plugin(): m_init(false), m_char(nullptr), m_reglan(nullptr), m_has_re(false), - m_has_seq(false) {} + m_has_seq(false) { + m_unicode = gparams::get_value("unicode") == "true"; +} void seq_decl_plugin::finalize() { for (psig* s : m_sigs) { @@ -623,11 +616,9 @@ void seq_decl_plugin::init() { m_sigs[OP_SEQ_REPLACE_RE] = alloc(psig, m, "str.replace_re", 1, 3, seqAreAseqA, seqA); m_sigs[OP_SEQ_REPLACE_ALL] = alloc(psig, m, "str.replace_all", 1, 3, seqAseqAseqA, seqA); m_sigs[OP_STRING_CONST] = nullptr; -#if Z3_USE_UNICODE m_sigs[OP_CHAR_CONST] = nullptr; sort* charTcharT[2] = { m_char, m_char }; - m_sigs[OP_CHAR_LE] = alloc(psig, m, "char.<=", 0, 2, charTcharT, boolT); -#endif + m_sigs[OP_CHAR_LE] = unicode() ? alloc(psig, m, "char.<=", 0, 2, charTcharT, boolT) : nullptr; m_sigs[_OP_STRING_STRIDOF] = alloc(psig, m, "str.indexof", 0, 3, str2TintT, intT); m_sigs[_OP_STRING_STRREPL] = alloc(psig, m, "str.replace", 0, 3, str3T, strT); m_sigs[_OP_STRING_FROM_CHAR] = alloc(psig, m, "char", 1, 0, nullptr, strT); @@ -654,11 +645,10 @@ void seq_decl_plugin::init() { void seq_decl_plugin::set_manager(ast_manager* m, family_id id) { decl_plugin::set_manager(m, id); bv_util bv(*m); -#if Z3_USE_UNICODE - m_char = m->mk_sort(symbol("Unicode"), sort_info(m_family_id, _CHAR_SORT, 0, nullptr)); -#else - m_char = bv.mk_sort(8); -#endif + if (unicode()) + m_char = m->mk_sort(symbol("Unicode"), sort_info(m_family_id, _CHAR_SORT, 0, nullptr)); + else + m_char = bv.mk_sort(8); m->inc_ref(m_char); parameter param(m_char); m_string = m->mk_sort(symbol("String"), sort_info(m_family_id, SEQ_SORT, 1, ¶m)); @@ -692,10 +682,8 @@ sort * seq_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter } return m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, num_parameters, parameters)); } -#if Z3_USE_UNICODE case _CHAR_SORT: return m_char; -#endif case _STRING_SORT: return m_string; case _REGLAN_SORT: @@ -941,7 +929,6 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, m_has_re = true; return mk_str_fun(k, arity, domain, range, OP_SEQ_TO_RE); -#if Z3_USE_UNICODE case OP_CHAR_LE: if (arity == 2 && domain[0] == m_char && domain[1] == m_char) { return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, m.mk_bool_sort(), func_decl_info(m_family_id, k, 0, nullptr)); @@ -955,7 +942,6 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, m.raise_exception("invalid character declaration"); } return m.mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, OP_CHAR_CONST, num_parameters, parameters)); -#endif case OP_SEQ_IN_RE: m_has_re = true; @@ -1046,14 +1032,15 @@ app* seq_decl_plugin::mk_string(zstring const& s) { } app* seq_decl_plugin::mk_char(unsigned u) { -#if Z3_USE_UNICODE - parameter param(u); - func_decl* f = m_manager->mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, OP_CHAR_CONST, 1, ¶m)); - return m_manager->mk_const(f); -#else - UNREACHABLE(); - return nullptr; -#endif + if (unicode()) { + parameter param(u); + func_decl* f = m_manager->mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, OP_CHAR_CONST, 1, ¶m)); + return m_manager->mk_const(f); + } + else { + UNREACHABLE(); + return nullptr; + } } @@ -1117,13 +1104,11 @@ bool seq_decl_plugin::are_distinct(app* a, app* b) const { if (is_app_of(b, m_family_id, OP_SEQ_EMPTY) && is_app_of(a, m_family_id, OP_SEQ_UNIT)) { return true; - } -#if Z3_USE_UNICODE - if (is_app_of(a, m_family_id, OP_CHAR_CONST) && + } + if (unicode() && is_app_of(a, m_family_id, OP_CHAR_CONST) && is_app_of(b, m_family_id, OP_CHAR_CONST)) { return true; } -#endif return false; } @@ -1160,6 +1145,17 @@ app* seq_util::str::mk_char(unsigned ch) const { return u.mk_char(ch); } +app* seq_util::str::mk_char_bit(expr* e, unsigned idx) { + return u.mk_char_bit(e, idx); +} + +app* seq_util::mk_char_bit(expr* e, unsigned i) { + parameter params[2] = { parameter(symbol("char.bit")), parameter(i) }; + sort* range = m.mk_bool_sort(); + func_decl* f = m.mk_func_decl(get_family_id(), _OP_SEQ_SKOLEM, 2, params, 1, &e, range); + return m.mk_app(f, 1, &e); +} + bv_util& seq_util::bv() const { if (!m_bv) m_bv = alloc(bv_util, m); return *m_bv.get(); @@ -1178,36 +1174,44 @@ unsigned seq_util::max_mul(unsigned x, unsigned y) const { bool seq_util::is_const_char(expr* e, unsigned& c) const { -#if Z3_USE_UNICODE - return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true); -#else - rational r; - unsigned sz; - return bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned() && (c = r.get_unsigned(), true); -#endif + if (seq.unicode()) { + return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true); + } + else { + rational r; + unsigned sz; + return bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned() && (c = r.get_unsigned(), true); + } +} + +bool seq_util::is_char_le(expr const* e) const { + if (seq.unicode()) + return is_app_of(e, m_fid, OP_CHAR_LE); + else + return bv().is_bv_ule(e) && is_char(to_app(e)->get_arg(0)); } app* seq_util::mk_char(unsigned ch) const { -#if Z3_USE_UNICODE - return seq.mk_char(ch); -#else - return bv().mk_numeral(rational(ch), 8); -#endif + if (seq.unicode()) + return seq.mk_char(ch); + else + return bv().mk_numeral(rational(ch), 8); } app* seq_util::mk_le(expr* ch1, expr* ch2) const { expr_ref _ch1(ch1, m), _ch2(ch2, m); -#if Z3_USE_UNICODE - expr* es[2] = { ch1, ch2 }; - return m.mk_app(m_fid, OP_CHAR_LE, 2, es); -#else - rational r1, r2; - if (bv().is_numeral(ch1, r1) && bv().is_numeral(ch2, r2)) { - return m.mk_bool_val(r1 <= r2); + if (seq.unicode()) { + expr* es[2] = { ch1, ch2 }; + return m.mk_app(m_fid, OP_CHAR_LE, 2, es); + } + else { + rational r1, r2; + if (bv().is_numeral(ch1, r1) && bv().is_numeral(ch2, r2)) { + return m.mk_bool_val(r1 <= r2); + } + return bv().mk_ule(ch1, ch2); } - return bv().mk_ule(ch1, ch2); -#endif } app* seq_util::mk_lt(expr* ch1, expr* ch2) const { diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 690bfb19d..684611f6d 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -27,14 +27,11 @@ Revision History: #include #include "util/lbool.h" -#define Z3_USE_UNICODE 0 enum seq_sort_kind { SEQ_SORT, RE_SORT, -#if Z3_USE_UNICODE _CHAR_SORT, // internal only -#endif _STRING_SORT, _REGLAN_SORT }; @@ -90,10 +87,8 @@ enum seq_op_kind { OP_STRING_TO_CODE, OP_STRING_FROM_CODE, -#if Z3_USE_UNICODE OP_CHAR_CONST, // constant character OP_CHAR_LE, // Unicode comparison -#endif // internal only operators. Converted to SEQ variants. _OP_STRING_FROM_CHAR, _OP_STRING_STRREPL, @@ -120,13 +115,15 @@ class zstring { private: buffer m_buffer; bool well_formed() const; + bool uses_unicode() const; + bool is_escape_char(char const *& s, unsigned& result); public: static unsigned max_char() { return 196607; } + static unsigned num_bits() { return 16; } zstring() {} zstring(char const* s); zstring(const std::string &str) : zstring(str.c_str()) {} zstring(unsigned sz, unsigned const* s) { m_buffer.append(sz, s); SASSERT(well_formed()); } - zstring(unsigned num_bits, bool const* ch); zstring(unsigned ch); zstring replace(zstring const& src, zstring const& dst) const; zstring reverse() const; @@ -174,6 +171,7 @@ class seq_decl_plugin : public decl_plugin { sort* m_reglan; bool m_has_re; bool m_has_seq; + bool m_unicode { false }; void match(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng); @@ -202,6 +200,8 @@ public: ~seq_decl_plugin() override {} void finalize() override; + bool unicode() const { return m_unicode; } + decl_plugin * mk_fresh() override { return alloc(seq_decl_plugin); } sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) override; @@ -260,14 +260,12 @@ public: bool is_re(expr* e, sort*& seq) const { return is_re(m.get_sort(e), seq); } bool is_char(expr* e) const { return is_char(m.get_sort(e)); } bool is_const_char(expr* e, unsigned& c) const; -#if Z3_USE_UNICODE - bool is_char_le(expr const* e) const { return is_app_of(e, m_fid, OP_CHAR_LE); } -#else - bool is_char_le(expr const* e) const { return bv().is_bv_ule(e) && is_char(to_app(e)->get_arg(0)); } -#endif + bool is_const_char(expr* e) const { unsigned c; return is_const_char(e, c); } + bool is_char_le(expr const* e) const; + app* mk_char_bit(expr* e, unsigned i); app* mk_char(unsigned ch) const; app* mk_le(expr* ch1, expr* ch2) const; - app* mk_lt(expr* ch1, expr* ch2) const; + app* mk_lt(expr* ch1, expr* ch2) const; app* mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range); bool is_skolem(expr const* e) const { return is_app_of(e, m_fid, _OP_SEQ_SKOLEM); } @@ -317,6 +315,7 @@ public: app* mk_replace(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c}; return m.mk_app(m_fid, OP_SEQ_REPLACE, 3, es); } app* mk_unit(expr* u) const { return m.mk_app(m_fid, OP_SEQ_UNIT, 1, &u); } app* mk_char(zstring const& s, unsigned idx) const; + app* mk_char_bit(expr* e, unsigned i); app* mk_itos(expr* i) const { return m.mk_app(m_fid, OP_STRING_ITOS, 1, &i); } app* mk_stoi(expr* s) const { return m.mk_app(m_fid, OP_STRING_STOI, 1, &s); } app* mk_is_empty(expr* s) const; diff --git a/src/params/context_params.cpp b/src/params/context_params.cpp index 51881ae09..95e612512 100644 --- a/src/params/context_params.cpp +++ b/src/params/context_params.cpp @@ -114,6 +114,9 @@ void context_params::set(char const * param, char const * value) { else if (p == "smtlib2_compliant") { set_bool(m_smtlib2_compliant, param, value); } + else if (p == "unicode") { + set_bool(m_unicode, param, value); + } else { param_descrs d; collect_param_descrs(d); @@ -145,6 +148,7 @@ void context_params::updt_params(params_ref const & p) { m_debug_ref_count = p.get_bool("debug_ref_count", m_debug_ref_count); m_smtlib2_compliant = p.get_bool("smtlib2_compliant", m_smtlib2_compliant); m_statistics = p.get_bool("stats", m_statistics); + m_unicode = p.get_bool("unicode", m_unicode); } void context_params::collect_param_descrs(param_descrs & d) { @@ -161,6 +165,7 @@ void context_params::collect_param_descrs(param_descrs & d) { d.insert("debug_ref_count", CPK_BOOL, "debug support for AST reference counting", "false"); d.insert("smtlib2_compliant", CPK_BOOL, "enable/disable SMT-LIB 2.0 compliance", "false"); d.insert("stats", CPK_BOOL, "enable/disable statistics", "false"); + d.insert("unicode", CPK_BOOL, "use unicode strings instead of ASCII strings"); // statistics are hidden as they are controlled by the /st option. collect_solver_param_descrs(d); } diff --git a/src/params/context_params.h b/src/params/context_params.h index acf72f823..c5555dfb0 100644 --- a/src/params/context_params.h +++ b/src/params/context_params.h @@ -44,6 +44,7 @@ public: bool m_smtlib2_compliant { false }; // it must be here because it enable/disable the use of coercions in the ast_manager. unsigned m_timeout { UINT_MAX } ; bool m_statistics { false }; + bool m_unicode { false }; unsigned rlimit() const { return m_rlimit; } context_params(); diff --git a/src/smt/params/smt_params_helper.pyg b/src/smt/params/smt_params_helper.pyg index 7308b7f0b..b139ec744 100644 --- a/src/smt/params/smt_params_helper.pyg +++ b/src/smt/params/smt_params_helper.pyg @@ -104,7 +104,6 @@ def_module_params(module_name='smt', ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'), ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'), ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'), - ('seq.use_unicode', BOOL, False, 'dev flag (not for users) enable unicode semantics'), ('str.strong_arrangements', BOOL, True, 'assert equivalences instead of implications when generating string arrangement axioms'), ('str.aggressive_length_testing', BOOL, False, 'prioritize testing concrete length values over generating more options'), ('str.aggressive_value_testing', BOOL, False, 'prioritize testing concrete string constant values over generating more options'), diff --git a/src/smt/params/theory_seq_params.cpp b/src/smt/params/theory_seq_params.cpp index bd67e2bda..68cce6e66 100644 --- a/src/smt/params/theory_seq_params.cpp +++ b/src/smt/params/theory_seq_params.cpp @@ -21,5 +21,4 @@ void theory_seq_params::updt_params(params_ref const & _p) { smt_params_helper p(_p); m_split_w_len = p.seq_split_w_len(); m_seq_validate = p.seq_validate(); - m_seq_use_unicode = p.seq_use_unicode(); } diff --git a/src/smt/params/theory_seq_params.h b/src/smt/params/theory_seq_params.h index 9d227e5fc..002d6c9e6 100644 --- a/src/smt/params/theory_seq_params.h +++ b/src/smt/params/theory_seq_params.h @@ -24,13 +24,11 @@ struct theory_seq_params { */ bool m_split_w_len; bool m_seq_validate; - bool m_seq_use_unicode; theory_seq_params(params_ref const & p = params_ref()): m_split_w_len(false), - m_seq_validate(false), - m_seq_use_unicode(false) + m_seq_validate(false) { updt_params(p); } diff --git a/src/smt/seq_unicode.cpp b/src/smt/seq_unicode.cpp index 7d36453be..5b5d43cd7 100644 --- a/src/smt/seq_unicode.cpp +++ b/src/smt/seq_unicode.cpp @@ -17,7 +17,6 @@ Author: #include "smt/seq_unicode.h" #include "smt/smt_context.h" -#include "smt/smt_arith_value.h" #include "util/trail.h" namespace smt { @@ -26,148 +25,261 @@ namespace smt { th(th), m(th.get_manager()), seq(m), - m_qhead(0), - m_var_value_hash(*this), - m_var_value_eq(*this), - m_var_value_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_var_value_hash, m_var_value_eq) - {} - - // <= atomic constraints on characters - void seq_unicode::assign_le(theory_var v1, theory_var v2, literal lit) { - dl.init_var(v1); - dl.init_var(v2); - ctx().push_trail(push_back_vector>(m_asserted_edges)); - m_asserted_edges.push_back(dl.add_edge(v1, v2, s_integer(0), lit)); + bv(m), + m_bb(m, ctx().get_fparams()) + { + m_enabled = gparams::get_value("unicode") == "true"; } - // < atomic constraint on characters - void seq_unicode::assign_lt(theory_var v1, theory_var v2, literal lit) { - dl.init_var(v1); - dl.init_var(v2); - ctx().push_trail(push_back_vector>(m_asserted_edges)); - m_asserted_edges.push_back(dl.add_edge(v1, v2, s_integer(1), lit)); + + struct seq_unicode::reset_bits : public trail { + seq_unicode& s; + unsigned idx; + + reset_bits(seq_unicode& s, unsigned idx): + s(s), + idx(idx) + {} + + void undo(context& ctx) override { + s.m_bits[idx].reset(); + s.m_ebits[idx].reset(); + } + }; + + bool seq_unicode::has_bits(theory_var v) const { + return (m_bits.size() > (unsigned)v) && !m_bits[v].empty(); } - literal seq_unicode::mk_literal(expr* e) { - expr_ref _e(e, m); - th.ensure_enode(e); - return ctx().get_literal(e); + void seq_unicode::init_bits(theory_var v) { + if (has_bits(v)) + return; + m_bits.reserve(v + 1); + auto& bits = m_bits[v]; + expr* e = th.get_expr(v); + while ((unsigned) v >= m_ebits.size()) + m_ebits.push_back(expr_ref_vector(m)); + ctx().push_trail(reset_bits(*this, v)); + auto& ebits = m_ebits[v]; + SASSERT(ebits.empty()); + for (unsigned i = 0; i < zstring::num_bits(); ++i) + ebits.push_back(seq.mk_char_bit(e, i)); + ctx().internalize(ebits.c_ptr(), ebits.size(), true); + for (expr* arg : ebits) + bits.push_back(literal(ctx().get_bool_var(arg))); } - void seq_unicode::adapt_eq(theory_var v1, theory_var v2) { - expr* e1 = th.get_expr(v1); - expr* e2 = th.get_expr(v2); - literal eq = th.mk_eq(e1, e2, false); - literal le = mk_literal(seq.mk_le(e1, e2)); - literal ge = mk_literal(seq.mk_le(e2, e1)); - add_axiom(~eq, le); - add_axiom(~eq, ge); - add_axiom(le, ge, eq); + void seq_unicode::internalize_le(literal lit, app* term) { + expr* x = nullptr, *y = nullptr; + VERIFY(seq.is_char_le(term, x, y)); + theory_var v1 = ctx().get_enode(x)->get_th_var(th.get_id()); + theory_var v2 = ctx().get_enode(y)->get_th_var(th.get_id()); + auto const& b1 = get_ebits(v1); + auto const& b2 = get_ebits(v2); + expr_ref e(m); + m_bb.mk_ule(b1.size(), b1.c_ptr(), b2.c_ptr(), e); + literal le = th.mk_literal(e); + ctx().mk_th_axiom(th.get_id(), ~lit, le); + ctx().mk_th_axiom(th.get_id(), lit, ~le); + } + + literal_vector const& seq_unicode::get_bits(theory_var v) { + init_bits(v); + return m_bits[v]; + } + + expr_ref_vector const& seq_unicode::get_ebits(theory_var v) { + init_bits(v); + return m_ebits[v]; } // = on characters - void seq_unicode::new_eq_eh(theory_var v1, theory_var v2) { - adapt_eq(v1, v2); + void seq_unicode::new_eq_eh(theory_var v, theory_var w) { + if (has_bits(v) && has_bits(w)) { + auto& a = get_bits(v); + auto& b = get_bits(w); + unsigned i = a.size(); + literal _eq = null_literal; + auto eq = [&]() { + if (_eq == null_literal) + _eq = th.mk_eq(th.get_expr(v), th.get_expr(w), false); + return _eq; + }; + for (; i-- > 0; ) { + lbool v1 = ctx().get_assignment(a[i]); + lbool v2 = ctx().get_assignment(b[i]); + if (v1 != l_undef && v2 != l_undef && v1 != v2) { + enforce_ackerman(v, w); + return; + } + if (v1 == l_true) + ctx().mk_th_axiom(th.get_id(), ~eq(), ~a[i], b[i]); + if (v1 == l_false) + ctx().mk_th_axiom(th.get_id(), ~eq(), a[i], ~b[i]); + if (v2 == l_true) + ctx().mk_th_axiom(th.get_id(), ~eq(), a[i], ~b[i]); + if (v2 == l_false) + ctx().mk_th_axiom(th.get_id(), ~eq(), ~a[i], b[i]); + } + } } // != on characters - void seq_unicode::new_diseq_eh(theory_var v1, theory_var v2) { - adapt_eq(v1, v2); + void seq_unicode::new_diseq_eh(theory_var v, theory_var w) { + if (has_bits(v) && has_bits(w)) { + auto& a = get_bits(v); + auto& b = get_bits(w); + for (unsigned i = a.size(); i-- > 0; ) { + lbool v1 = ctx().get_assignment(a[i]); + lbool v2 = ctx().get_assignment(b[i]); + if (v1 == l_undef || v2 == l_undef || v1 != v2) + return; + } + enforce_ackerman(v, w); + } } - bool seq_unicode::final_check() { - // ensure all variables are above 0 and less than zstring::max_char() - bool added_constraint = false; - // TBD: shift assignments on variables that are not lower-bounded, so that they are "nice" (have values 'a', 'b', ...) - // TBD: set "zero" to a zero value. - // TBD: ensure that unicode constants have the right values - arith_util a(m); - arith_value avalue(m); - avalue.init(&ctx()); - for (unsigned v = 0; !ctx().inconsistent() && v < th.get_num_vars(); ++v) { - if (!seq.is_char(th.get_expr(v))) - continue; - dl.init_var(v); - auto val = dl.get_assignment(v).get_int(); - if (val > static_cast(zstring::max_char())) { - expr_ref ch(seq.str.mk_char(zstring::max_char()), m); - enode* n = th.ensure_enode(ch); - theory_var v_max = n->get_th_var(th.get_id()); - assign_le(v, v_max, null_literal); - added_constraint = true; - continue; - } - if (val < 0) { - expr_ref ch(seq.str.mk_char(0), m); - enode* n = th.ensure_enode(ch); - theory_var v_min = n->get_th_var(th.get_id()); - assign_le(v_min, v, null_literal); - dl.init_var(v_min); - dl.set_to_zero(v_min); - added_constraint = true; - continue; - } - // ensure str.to_code(unit(v)) = val - expr_ref ch(m); - if (false) { - /// m_rewrite.coalesce_chars(); - ch = seq.str.mk_string(zstring(val)); - } - else { - ch = seq.str.mk_unit(seq.str.mk_char(val)); - } - expr_ref code(seq.str.mk_to_code(ch), m); - rational val2; - if (avalue.get_value(code, val2) && val2 == rational(val)) - continue; - add_axiom(th.mk_eq(code, a.mk_int(val), false)); - added_constraint = true; + void seq_unicode::new_const_char(theory_var v, unsigned c) { + auto& bits = get_bits(v); + for (auto b : bits) { + bool bit = (0 != (c & 1)); + ctx().assign(bit ? b : ~b, nullptr); + c >>= 1; } - if (added_constraint) - return false; - - // ensure equalities over shared symbols - if (th.assume_eqs(m_var_value_table)) + } + + /** + * 1. Extract values of roots. + * Check that values of roots are unique. + * Check that values assigned to non-roots align with root values. + * Enforce that values of roots are within max_char. + * 2. Assign values to other roots that haven't been assigned. + * 3. Assign values to non-roots using values of roots. + */ + bool seq_unicode::final_check() { + m_var2value.reset(); + m_var2value.resize(th.get_num_vars(), 0); + m_value2var.reset(); + + // extract the initial set of constants. + uint_set values; + unsigned c = 0, d = 0; + bool requires_fix = false; + for (unsigned v = th.get_num_vars(); v-- > 0; ) { + expr* e = th.get_expr(v); + if (seq.is_char(e) && th.get_enode(v)->is_root() && get_value(v, c)) { + if (c >= zstring::max_char()) { + enforce_value_bound(v); + requires_fix = true; + continue; + } + values.insert(c); + m_var2value[v] = c; + m_value2var.reserve(c + 1, null_theory_var); + theory_var w = m_value2var[c]; + if (w != null_theory_var && th.get_enode(v)->get_root() != th.get_enode(w)->get_root()) { + enforce_ackerman(v, w); + requires_fix = true; + } + else { + m_value2var[c] = v; + } + for (enode* n : *th.get_enode(v)) { + theory_var w = n->get_th_var(th.get_id()); + if (v != w && get_value(w, d) && d != c) { + enforce_ackerman(v, w); + requires_fix = true; + break; + } + } + } + } + if (requires_fix) return false; + // assign values to roots + c = 'a'; + for (unsigned v = th.get_num_vars(); v-- > 0; ) { + expr* e = th.get_expr(v); + if (!seq.is_char(e) || !th.get_enode(v)->is_root() || get_value(v, d)) + continue; + + d = c; + while (values.contains(c)) { + c = (c + 1) % zstring::max_char(); + if (d == c) { + enforce_bits(); + return false; + } + } + m_var2value[v] = c; + values.insert(c); + } + for (unsigned v = th.get_num_vars(); v-- > 0; ) { + expr* e = th.get_expr(v); + if (seq.is_char(e) && !th.get_enode(v)->is_root() && !get_value(v, d)) { + theory_var w = th.get_enode(v)->get_root()->get_th_var(th.get_id()); + SASSERT(w != v && w != null_theory_var); + m_var2value[v] = m_var2value[w]; + } + } return true; } - - void seq_unicode::propagate() { - return; - ctx().push_trail(value_trail(m_qhead)); - for (; m_qhead < m_asserted_edges.size() && !ctx().inconsistent(); ++m_qhead) { - propagate(m_asserted_edges[m_qhead]); + + void seq_unicode::enforce_bits() { + TRACE("seq", tout << "enforce bits\n";); + for (unsigned v = th.get_num_vars(); v-- > 0; ) { + expr* e = th.get_expr(v); + if (seq.is_char(e) && th.get_enode(v)->is_root() && !has_bits(v)) + init_bits(v); } } + + void seq_unicode::enforce_value_bound(theory_var v) { + TRACE("seq", tout << "enforce bound " << v << "\n";); + enode* n = th.ensure_enode(seq.mk_char(zstring::max_char())); + theory_var w = n->get_th_var(th.get_id()); + SASSERT(has_bits(w)); + auto const& mbits = get_ebits(w); + auto const& bits = get_ebits(v); + expr_ref le(m); + m_bb.mk_ule(bits.size(), bits.c_ptr(), mbits.c_ptr(), le); + ctx().assign(th.mk_literal(le), nullptr); + } + + void seq_unicode::enforce_ackerman(theory_var v, theory_var w) { + TRACE("seq", tout << "enforce ackerman " << v << " " << w << "\n";); + literal eq = th.mk_eq(th.get_expr(v), th.get_expr(w), false); + literal_vector lits; + auto& a = get_ebits(v); + auto& b = get_ebits(w); + for (unsigned i = a.size(); i-- > 0; ) { + literal beq = th.mk_eq(a[i], b[i], false); + lits.push_back(~beq); + // eq => a = b + ctx().mk_th_axiom(th.get_id(), ~eq, beq); + } + // a = b => eq + lits.push_back(eq); + ctx().mk_th_axiom(th.get_id(), lits.size(), lits.c_ptr()); + } - void seq_unicode::propagate(edge_id edge) { - return; - if (dl.enable_edge(edge)) - return; - dl.traverse_neg_cycle2(false, m_nc_functor); - literal_vector const & lits = m_nc_functor.get_lits(); - vector params; - if (m.proofs_enabled()) { - params.push_back(parameter(symbol("farkas"))); - for (unsigned i = 0; i <= lits.size(); ++i) { - params.push_back(parameter(rational(1))); - } - } - ctx().set_conflict( - ctx().mk_justification( - ext_theory_conflict_justification( - th.get_id(), ctx().get_region(), - lits.size(), lits.c_ptr(), - 0, nullptr, - params.size(), params.c_ptr())));; - } - unsigned seq_unicode::get_value(theory_var v) { - dl.init_var(v); - auto val = dl.get_assignment(v); - return val.get_int(); + return m_var2value[v]; } + bool seq_unicode::get_value(theory_var v, unsigned& c) { + if (!has_bits(v)) + return false; + auto const & b = get_bits(v); + c = 0; + unsigned p = 1; + for (literal lit : b) { + if (ctx().get_assignment(lit) == l_true) + c += p; + p *= 2; + } + return true; + } } diff --git a/src/smt/seq_unicode.h b/src/smt/seq_unicode.h index 349b4cda5..e2a9ba309 100644 --- a/src/smt/seq_unicode.h +++ b/src/smt/seq_unicode.h @@ -16,83 +16,59 @@ Author: --*/ #pragma once -#include "util/s_integer.h" #include "ast/seq_decl_plugin.h" +#include "ast/bv_decl_plugin.h" +#include "ast/rewriter/bit_blaster/bit_blaster.h" #include "smt/smt_theory.h" -#include "smt/diff_logic.h" namespace smt { class seq_unicode { - struct ext { - static const bool m_int_theory = true; - typedef s_integer numeral; - typedef s_integer fin_numeral; - numeral m_epsilon; - typedef literal explanation; - ext(): m_epsilon(1) {} - }; - - class nc_functor { - literal_vector m_literals; - public: - nc_functor() {} - void operator()(literal const& l) { - if (l != null_literal) m_literals.push_back(l); - } - literal_vector const& get_lits() const { return m_literals; } - void new_edge(dl_var s, dl_var d, unsigned num_edges, edge_id const* edges) {} - }; - - struct var_value_hash { - seq_unicode & m_th; - var_value_hash(seq_unicode & th):m_th(th) {} - unsigned operator()(theory_var v) const { return m_th.get_value(v); } - }; - - struct var_value_eq { - seq_unicode & m_th; - var_value_eq(seq_unicode & th):m_th(th) {} - bool operator()(theory_var v1, theory_var v2) const { - return m_th.get_value(v1) == m_th.get_value(v2); - } - }; - - typedef int_hashtable var_value_table; theory& th; ast_manager& m; seq_util seq; - dl_graph dl; - unsigned m_qhead; - svector m_asserted_edges; - nc_functor m_nc_functor; - var_value_hash m_var_value_hash; - var_value_eq m_var_value_eq; - var_value_table m_var_value_table; - std::function m_add_axiom; + bv_util bv; + vector m_bits; + vector m_ebits; + unsigned_vector m_var2value; + svector m_value2var; + bool m_enabled { false }; + bit_blaster m_bb; + + struct reset_bits; context& ctx() const { return th.get_context(); } - void propagate(edge_id edge); + literal_vector const& get_bits(theory_var v); - void add_axiom(literal a, literal b = null_literal, literal c = null_literal) { - m_add_axiom(a, b, c); - } + expr_ref_vector const& get_ebits(theory_var v); - void adapt_eq(theory_var v1, theory_var v2); + bool has_bits(theory_var v) const; - literal mk_literal(expr* e); + void init_bits(theory_var v); + + bool get_value(theory_var v, unsigned& c); + + void enforce_ackerman(theory_var v, theory_var w); + + void enforce_value_bound(theory_var v); + + void enforce_bits(); public: seq_unicode(theory& th); + bool enabled() const { return m_enabled; } + // <= atomic constraints on characters void assign_le(theory_var v1, theory_var v2, literal lit); // < atomic constraint on characters void assign_lt(theory_var v1, theory_var v2, literal lit); + + void new_const_char(theory_var v, unsigned c); // = on characters void new_eq_eh(theory_var v1, theory_var v2); @@ -105,9 +81,7 @@ namespace smt { unsigned get_value(theory_var v); - void propagate(); - - bool can_propagate() const { return m_qhead < m_asserted_edges.size(); } + void internalize_le(literal lit, app* term); }; diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index 16ed92546..560914881 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -354,7 +354,7 @@ final_check_status theory_seq::final_check_eh() { TRACEFIN("zero_length"); return FC_CONTINUE; } - if (ctx.get_fparams().m_seq_use_unicode && !m_unicode.final_check()) { + if (m_unicode.enabled() && !m_unicode.final_check()) { return FC_CONTINUE; } if (get_fparams().m_split_w_len && len_based_split()) { @@ -737,7 +737,6 @@ void theory_seq::propagate_lit(dependency* dep, unsigned n, literal const* _lits set_conflict(dep, lits); return; } - ctx.mark_as_relevant(lit); enode_pair_vector eqs; linearize(dep, eqs, lits); @@ -1519,6 +1518,11 @@ bool theory_seq::internalize_term(app* term) { bool_var bv = ctx.mk_bool_var(term); ctx.set_var_theory(bv, get_id()); ctx.mark_as_relevant(bv); + if (m_util.is_char_le(term) && m_unicode.enabled()) { + mk_var(ensure_enode(term->get_arg(0))); + mk_var(ensure_enode(term->get_arg(1))); + m_unicode.internalize_le(literal(bv, false), term); + } } enode* e = nullptr; @@ -1528,10 +1532,15 @@ bool theory_seq::internalize_term(app* term) { else { e = ctx.mk_enode(term, false, m.is_bool(term), true); } - mk_var(e); + theory_var v = mk_var(e); if (!ctx.relevancy()) { relevant_eh(term); } + + unsigned c = 0; + if (m_unicode.enabled() && m_util.is_const_char(term, c)) + m_unicode.new_const_char(v, c); + return true; } @@ -2015,6 +2024,10 @@ model_value_proc * theory_seq::mk_value(enode * n, model_generator & mg) { m_concat.shrink(start); return sv; } + else if (m_unicode.enabled() && m_util.is_char(e)) { + unsigned ch = m_unicode.get_value(n->get_th_var(get_id())); + return alloc(expr_wrapper_proc, m_util.str.mk_char(ch)); + } else { return alloc(expr_wrapper_proc, mk_value(e)); } @@ -2319,7 +2332,7 @@ void theory_seq::validate_fmls(enode_pair_vector const& eqs, literal_vector cons theory_var theory_seq::mk_var(enode* n) { expr* o = n->get_owner(); - if (!m_util.is_seq(o) && !m_util.is_re(o)) + if (!m_util.is_seq(o) && !m_util.is_re(o) && (!m_unicode.enabled() || !m_util.is_char(o))) return null_theory_var; if (is_attached_to_var(n)) @@ -2333,7 +2346,7 @@ theory_var theory_seq::mk_var(enode* n) { } bool theory_seq::can_propagate() { - return m_axioms_head < m_axioms.size() || !m_replay.empty() || m_new_solution || m_unicode.can_propagate() || m_regex.can_propagate(); + return m_axioms_head < m_axioms.size() || !m_replay.empty() || m_new_solution || m_regex.can_propagate(); } bool theory_seq::canonize(expr* e, dependency*& eqs, expr_ref& result) { @@ -2525,8 +2538,6 @@ void theory_seq::add_dependency(dependency*& dep, enode* a, enode* b) { void theory_seq::propagate() { - if (ctx.get_fparams().m_seq_use_unicode) - m_unicode.propagate(); if (m_regex.can_propagate()) m_regex.propagate(); while (m_axioms_head < m_axioms.size() && !ctx.inconsistent()) { @@ -2980,13 +2991,8 @@ void theory_seq::assign_eh(bool_var v, bool is_true) { else if (m_util.str.is_nth_i(e) || m_util.str.is_nth_u(e)) { // no-op } - else if (ctx.get_fparams().m_seq_use_unicode && m_util.is_char_le(e, e1, e2)) { - theory_var v1 = get_th_var(ctx.get_enode(e1)); - theory_var v2 = get_th_var(ctx.get_enode(e2)); - if (is_true) - m_unicode.assign_le(v1, v2, lit); - else - m_unicode.assign_lt(v2, v1, lit); + else if (m_unicode.enabled() && m_util.is_char_le(e)) { + // no-op } else if (m_util.is_skolem(e)) { @@ -3006,7 +3012,7 @@ void theory_seq::new_eq_eh(theory_var v1, theory_var v2) { enode* n2 = get_enode(v2); expr* o1 = n1->get_owner(); expr* o2 = n2->get_owner(); - if (ctx.get_fparams().m_seq_use_unicode && m_util.is_char(o1)) { + if (m_unicode.enabled() && m_util.is_char(o1)) { m_unicode.new_eq_eh(v1, v2); return; } @@ -3057,7 +3063,7 @@ void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) { m_regex.propagate_ne(e1, e2); return; } - if (ctx.get_fparams().m_seq_use_unicode && m_util.is_char(n1->get_owner())) { + if (m_unicode.enabled() && m_util.is_char(n1->get_owner())) { m_unicode.new_diseq_eh(v1, v2); return; }