From d0f1d8f59e70843f6d1d21fe284a2ad94a585315 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 27 Jan 2021 05:46:45 -0800 Subject: [PATCH] move to unicode as stand-alone theory Signed-off-by: Nikolaj Bjorner --- src/ast/char_decl_plugin.cpp | 5 +++++ src/ast/char_decl_plugin.h | 17 +++++++++++++-- src/ast/reg_decl_plugins.cpp | 6 +++--- src/ast/seq_decl_plugin.cpp | 29 ++++++++++++++++++++----- src/ast/seq_decl_plugin.h | 6 ++++++ src/cmd_context/cmd_context.cpp | 4 ++-- src/model/char_factory.h | 4 ++++ src/model/seq_factory.h | 38 ++++++++++++++++----------------- src/smt/smt_setup.cpp | 4 ++-- src/smt/theory_char.cpp | 34 ++++++++++++++++++----------- src/smt/theory_char.h | 5 +++-- src/smt/theory_seq.cpp | 21 ++++++++++-------- 12 files changed, 115 insertions(+), 58 deletions(-) diff --git a/src/ast/char_decl_plugin.cpp b/src/ast/char_decl_plugin.cpp index 5453fd369..19914b17b 100644 --- a/src/ast/char_decl_plugin.cpp +++ b/src/ast/char_decl_plugin.cpp @@ -93,3 +93,8 @@ app* char_decl_plugin::mk_char(unsigned u) { expr* char_decl_plugin::get_some_value(sort* s) { return mk_char(0); } + +app* char_decl_plugin::mk_le(expr* a, expr* b) { + expr* es[2] = { a, b}; + return m_manager->mk_app(m_family_id, OP_CHAR_LE, 2, es); +} diff --git a/src/ast/char_decl_plugin.h b/src/ast/char_decl_plugin.h index 7f891b7d4..92e666721 100644 --- a/src/ast/char_decl_plugin.h +++ b/src/ast/char_decl_plugin.h @@ -23,7 +23,6 @@ Revision History: #pragma once #include "ast/ast.h" -#include "ast/bv_decl_plugin.h" #include enum char_sort_kind { @@ -67,7 +66,21 @@ public: bool are_distinct(app* a, app* b) const override; + expr* get_some_value(sort* s) override; + + sort* char_sort() const { return m_char; } + app* mk_char(unsigned u); - expr* get_some_value(sort* s) override; + app* mk_le(expr* a, expr* b); + + bool is_le(expr const* e) const { return is_app_of(e, m_family_id, OP_CHAR_LE); } + + bool is_const_char(expr const* e, unsigned& c) const { + return is_app_of(e, m_family_id, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true); + } + + + + }; diff --git a/src/ast/reg_decl_plugins.cpp b/src/ast/reg_decl_plugins.cpp index 7781bf829..8cb0bbe5a 100644 --- a/src/ast/reg_decl_plugins.cpp +++ b/src/ast/reg_decl_plugins.cpp @@ -49,12 +49,12 @@ void reg_decl_plugins(ast_manager & m) { if (!m.get_plugin(m.mk_family_id(symbol("datalog_relation")))) { m.register_plugin(symbol("datalog_relation"), alloc(datalog::dl_decl_plugin)); } - if (!m.get_plugin(m.mk_family_id(symbol("seq")))) { - m.register_plugin(symbol("seq"), alloc(seq_decl_plugin)); - } if (!m.get_plugin(m.mk_family_id(symbol("char")))) { m.register_plugin(symbol("char"), alloc(char_decl_plugin)); } + if (!m.get_plugin(m.mk_family_id(symbol("seq")))) { + m.register_plugin(symbol("seq"), alloc(seq_decl_plugin)); + } if (!m.get_plugin(m.mk_family_id(symbol("fpa")))) { m.register_plugin(symbol("fpa"), alloc(fpa_decl_plugin)); } diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 05cb4d8ab..7fb3623ad 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -24,6 +24,8 @@ Revision History: #include "ast/bv_decl_plugin.h" #include +#define _USE_CHAR_PLUGIN 1 + seq_decl_plugin::seq_decl_plugin(): m_init(false), m_stringc_sym("String"), @@ -283,8 +285,12 @@ void seq_decl_plugin::init() { void seq_decl_plugin::set_manager(ast_manager* m, family_id id) { decl_plugin::set_manager(m, id); bv_util bv(*m); - if (unicode()) - m_char = m->mk_sort(symbol("Unicode"), sort_info(m_family_id, _CHAR_SORT, 0, nullptr)); + if (unicode()) +#if _USE_CHAR_PLUGIN + m_char = get_char_plugin().char_sort(); +#else + m_char = m->mk_sort(symbol("Unicode"), sort_info(m_family_id, _CHAR_SORT, 0, nullptr)); +#endif else m_char = bv.mk_sort(8); m->inc_ref(m_char); @@ -641,9 +647,6 @@ void seq_decl_plugin::get_sort_names(svector & sort_names, symbol sort_names.push_back(builtin_name("Seq", SEQ_SORT)); sort_names.push_back(builtin_name("RegEx", RE_SORT)); - // TBD: - // sort_names.push_back(builtin_name("Unicode", CHAR_SORT)); - // SMTLIB 2.6 RegLan, String sort_names.push_back(builtin_name("RegLan", _REGLAN_SORT)); sort_names.push_back(builtin_name("String", _STRING_SORT)); @@ -671,9 +674,13 @@ app* seq_decl_plugin::mk_string(zstring const& s) { app* seq_decl_plugin::mk_char(unsigned u) { if (unicode()) { +#if _USE_CHAR_PLUGIN + return get_char_plugin().mk_char(u); +#else parameter param(u); func_decl* f = m_manager->mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, _OP_CHAR_CONST, 1, ¶m)); return m_manager->mk_const(f); +#endif } else { bv_util bv(*m_manager); @@ -813,7 +820,11 @@ unsigned seq_util::max_mul(unsigned x, unsigned y) const { bool seq_util::is_const_char(expr* e, unsigned& c) const { if (seq.unicode()) { +#if _USE_CHAR_PLUGIN + return ch.is_const_char(e, c); +#else return is_app_of(e, m_fid, _OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true); +#endif } else { rational r; @@ -824,7 +835,11 @@ bool seq_util::is_const_char(expr* e, unsigned& c) const { bool seq_util::is_char_le(expr const* e) const { if (seq.unicode()) +#if _USE_CHAR_PLUGIN + return ch.is_le(e); +#else return is_app_of(e, m_fid, _OP_CHAR_LE); +#endif else return bv().is_bv_ule(e) && is_char(to_app(e)->get_arg(0)); } @@ -840,8 +855,12 @@ app* seq_util::mk_le(expr* ch1, expr* ch2) const { expr_ref _ch1(ch1, m), _ch2(ch2, m); if (seq.unicode()) { +#if _USE_CHAR_PLUGIN + return ch.mk_le(ch1, ch2); +#else expr* es[2] = { ch1, ch2 }; return m.mk_app(m_fid, _OP_CHAR_LE, 2, es); +#endif } else { rational r1, r2; diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 3cc1abc8f..f2b2b2973 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -24,6 +24,7 @@ Revision History: #include "ast/ast.h" #include "ast/bv_decl_plugin.h" +#include "ast/char_decl_plugin.h" #include "util/lbool.h" #include "util/zstring.h" @@ -204,11 +205,15 @@ public: sort* char_sort() const { return m_char; } sort* string_sort() const { return m_string; } + + char_decl_plugin& get_char_plugin() { return *static_cast(m_manager->get_plugin(m_manager->mk_family_id("char"))); } + }; class seq_util { ast_manager& m; seq_decl_plugin& seq; + char_decl_plugin& ch; family_id m_fid; mutable scoped_ptr m_bv; bv_util& bv() const; @@ -571,6 +576,7 @@ public: seq_util(ast_manager& m): m(m), seq(*static_cast(m.get_plugin(m.mk_family_id("seq")))), + ch(seq.get_char_plugin()), m_fid(seq.get_family_id()), str(*this), re(*this) { diff --git a/src/cmd_context/cmd_context.cpp b/src/cmd_context/cmd_context.cpp index 65470bd23..d39e30233 100644 --- a/src/cmd_context/cmd_context.cpp +++ b/src/cmd_context/cmd_context.cpp @@ -701,8 +701,8 @@ void cmd_context::init_manager_core(bool new_manager) { register_plugin(symbol("array"), alloc(array_decl_plugin), logic_has_array()); register_plugin(symbol("datatype"), alloc(datatype_decl_plugin), logic_has_datatype()); register_plugin(symbol("recfun"), alloc(recfun::decl::plugin), logic_has_recfun()); - register_plugin(symbol("seq"), alloc(seq_decl_plugin), logic_has_seq()); register_plugin(symbol("char"), alloc(char_decl_plugin), logic_has_seq()); + register_plugin(symbol("seq"), alloc(seq_decl_plugin), logic_has_seq()); register_plugin(symbol("pb"), alloc(pb_decl_plugin), logic_has_pb()); register_plugin(symbol("fpa"), alloc(fpa_decl_plugin), logic_has_fpa()); register_plugin(symbol("datalog_relation"), alloc(datalog::dl_decl_plugin), !has_logic()); @@ -719,7 +719,7 @@ void cmd_context::init_manager_core(bool new_manager) { load_plugin(symbol("array"), logic_has_array(), fids); load_plugin(symbol("datatype"), logic_has_datatype(), fids); load_plugin(symbol("recfun"), logic_has_recfun(), fids); - load_plugin(symbol("char"), logic_has_seq(), fids); + load_plugin(symbol("char"), logic_has_seq(), fids); load_plugin(symbol("seq"), logic_has_seq(), fids); load_plugin(symbol("fpa"), logic_has_fpa(), fids); load_plugin(symbol("pb"), logic_has_pb(), fids); diff --git a/src/model/char_factory.h b/src/model/char_factory.h index 997d3c4e0..0e99ecd79 100644 --- a/src/model/char_factory.h +++ b/src/model/char_factory.h @@ -67,6 +67,10 @@ public: m_chars.insert(ch); } + void register_value(unsigned u) { + m_chars.insert(u); + } + void add_trail(expr* e) { m_trail.push_back(e); } diff --git a/src/model/seq_factory.h b/src/model/seq_factory.h index 8e952cc62..3928b6c25 100644 --- a/src/model/seq_factory.h +++ b/src/model/seq_factory.h @@ -57,13 +57,13 @@ public: } expr* get_some_value(sort* s) override { - if (u.is_seq(s)) { - return u.str.mk_empty(s); - } sort* seq = nullptr; - if (u.is_re(s, seq)) { + if (u.is_seq(s)) + return u.str.mk_empty(s); + if (u.is_re(s, seq)) return u.re.mk_to_re(u.str.mk_empty(seq)); - } + if (u.is_char(s)) + return u.mk_char('A'); UNREACHABLE(); return nullptr; } @@ -84,7 +84,11 @@ public: return false; } } - NOT_IMPLEMENTED_YET(); + if (u.is_char(s)) { + v1 = u.mk_char('a'); + v2 = u.mk_char('b'); + return true; + } return false; } expr* get_fresh_value(sort* s) override { @@ -104,9 +108,7 @@ public: return u.re.mk_to_re(v0); } if (u.is_char(s)) { - //char s[2] = { ++m_char, 0 }; - //return u.str.mk_char(zstring(s), 0); - return u.str.mk_char(zstring("a"), 0); + return u.mk_char('a'); } if (u.is_seq(s, ch)) { expr* v = m_model.get_fresh_value(ch); @@ -134,23 +136,19 @@ public: symbol sym; if (u.str.is_string(n, sym)) { m_strings.insert(sym); - if (sym.str().find(m_unique_delim) != std::string::npos) { + if (sym.str().find(m_unique_delim) != std::string::npos) add_new_delim(); - } } } + private: void add_new_delim() { - bool found = true; - while (found) { - found = false; - m_unique_delim += "!"; - symbol_set::iterator it = m_strings.begin(), end = m_strings.end(); - for (; it != end && !found; ++it) { - found = it->str().find(m_unique_delim) != std::string::npos; - } - } + try_again: + m_unique_delim += "!"; + for (auto const& s : m_strings) + if (s.str().find(m_unique_delim) != std::string::npos) + goto try_again; } }; diff --git a/src/smt/smt_setup.cpp b/src/smt/smt_setup.cpp index 8e757ed59..b503a35c4 100644 --- a/src/smt/smt_setup.cpp +++ b/src/smt/smt_setup.cpp @@ -940,8 +940,8 @@ namespace smt { sort* s = seq.mk_string_sort(); family_id ch_fid = ch->get_family_id(); if (s->get_family_id() != ch_fid) - m_context.register_plugin(alloc(smt::theory_char, m_context, ch_fid)); - + m_context.register_plugin(alloc(smt::theory_char, m_context, ch_fid, nullptr)); + } void setup::setup_special_relations() { diff --git a/src/smt/theory_char.cpp b/src/smt/theory_char.cpp index e8ec4a3fe..238d037ba 100644 --- a/src/smt/theory_char.cpp +++ b/src/smt/theory_char.cpp @@ -22,15 +22,18 @@ Author: namespace smt { - theory_char::theory_char(context& ctx, family_id fid): + theory_char::theory_char(context& ctx, family_id fid, theory* th): theory(ctx, fid), seq(m), - m_bb(m, ctx.get_fparams()) + m_bb(m, ctx.get_fparams()), + m_th(th) { bv_util bv(m); sort_ref b8(bv.mk_sort(8), m); m_enabled = !seq.is_char(b8); m_bits2char = symbol("bits2char"); + if (!m_th) + m_th = this; } struct theory_char::reset_bits : public trail { @@ -215,21 +218,22 @@ namespace smt { * 2. Assign values to characters that haven't been assigned. */ bool theory_char::final_check() { + TRACE("seq", tout << "final check " << m_th->get_num_vars() << "\n";); m_var2value.reset(); - m_var2value.resize(get_num_vars(), UINT_MAX); + m_var2value.resize(m_th->get_num_vars(), UINT_MAX); m_value2var.reset(); // extract the initial set of constants. uint_set values; unsigned c = 0, d = 0; - for (unsigned v = get_num_vars(); v-- > 0; ) { - expr* e = get_expr(v); + for (unsigned v = m_th->get_num_vars(); v-- > 0; ) { + expr* e = m_th->get_expr(v); if (seq.is_char(e) && m_var2value[v] == UINT_MAX && get_char_value(v, c)) { - CTRACE("seq", seq.is_char(e), tout << mk_pp(e, m) << " root: " << get_enode(v)->is_root() << " is_value: " << get_char_value(v, c) << "\n";); - enode* r = get_enode(v)->get_root(); + CTRACE("seq", seq.is_char(e), tout << mk_pp(e, m) << " root: " << m_th->get_enode(v)->is_root() << " is_value: " << get_char_value(v, c) << "\n";); + enode* r = m_th->get_enode(v)->get_root(); m_value2var.reserve(c + 1, null_theory_var); theory_var u = m_value2var[c]; - if (u != null_theory_var && r != get_enode(u)->get_root()) { + if (u != null_theory_var && r != m_th->get_enode(u)->get_root()) { enforce_ackerman(u, v); return false; } @@ -254,8 +258,8 @@ namespace smt { // assign values to other unassigned nodes c = 'A'; - for (unsigned v = get_num_vars(); v-- > 0; ) { - expr* e = get_expr(v); + for (unsigned v = m_th->get_num_vars(); v-- > 0; ) { + expr* e = m_th->get_expr(v); if (seq.is_char(e) && m_var2value[v] == UINT_MAX) { d = c; while (values.contains(c)) { @@ -266,7 +270,7 @@ namespace smt { } } TRACE("seq", tout << "fresh: " << mk_pp(e, m) << " := " << c << "\n";); - for (enode* n : *get_enode(v)) + for (enode* n : *m_th->get_enode(v)) m_var2value[n->get_th_var(get_id())] = c; m_value2var.reserve(c + 1, null_theory_var); m_value2var[c] = v; @@ -278,9 +282,9 @@ namespace smt { void theory_char::enforce_bits() { TRACE("seq", tout << "enforce bits\n";); - for (unsigned v = get_num_vars(); v-- > 0; ) { + for (unsigned v = m_th->get_num_vars(); v-- > 0; ) { expr* e = get_expr(v); - if (seq.is_char(e) && get_enode(v)->is_root() && !has_bits(v)) + if (seq.is_char(e) && m_th->get_enode(v)->is_root() && !has_bits(v)) init_bits(v); } } @@ -344,6 +348,10 @@ namespace smt { // a stand-alone theory. void theory_char::init_model(model_generator & mg) { m_factory = alloc(char_factory, get_manager(), get_family_id(), mg.get_model()); + mg.register_factory(m_factory); + for (auto val : m_var2value) + if (val != UINT_MAX) + m_factory->register_value(val); } model_value_proc * theory_char::mk_value(enode * n, model_generator & mg) { diff --git a/src/smt/theory_char.h b/src/smt/theory_char.h index 7f2f09343..ae4b22b29 100644 --- a/src/smt/theory_char.h +++ b/src/smt/theory_char.h @@ -44,6 +44,7 @@ namespace smt { stats m_stats; symbol m_bits2char; char_factory* m_factory { nullptr }; + theory* m_th; struct reset_bits; @@ -58,11 +59,11 @@ namespace smt { public: - theory_char(context& ctx, family_id fid); + theory_char(context& ctx, family_id fid, theory * th); void new_eq_eh(theory_var v1, theory_var v2) override; void new_diseq_eh(theory_var v1, theory_var v2) override; - theory * mk_fresh(context * new_ctx) override { return alloc(theory_char, *new_ctx, get_family_id()); } + theory * mk_fresh(context * new_ctx) override { return alloc(theory_char, *new_ctx, get_family_id(), nullptr); } bool internalize_atom(app * atom, bool gate_ctx) override; bool internalize_term(app * term) override; void display(std::ostream& out) const override {} diff --git a/src/smt/theory_seq.cpp b/src/smt/theory_seq.cpp index cce9a8bd3..037f0f344 100644 --- a/src/smt/theory_seq.cpp +++ b/src/smt/theory_seq.cpp @@ -108,6 +108,8 @@ Outline: #include "smt/theory_lra.h" #include "smt/smt_kernel.h" +#define _USE_CHAR (false && m_char.enabled()) + using namespace smt; void theory_seq::solution_map::update(expr* e, expr* r, dependency* d) { @@ -272,7 +274,7 @@ theory_seq::theory_seq(context& ctx): m_autil(m), m_sk(m, m_rewrite), m_ax(*this, m_rewrite), - m_char(ctx, get_family_id()), + m_char(ctx, get_family_id(), this), m_regex(*this), m_arith_value(m), m_trail_stack(*this), @@ -354,7 +356,7 @@ final_check_status theory_seq::final_check_eh() { TRACEFIN("zero_length"); return FC_CONTINUE; } - if (m_char.enabled() && !m_char.final_check()) { + if (_USE_CHAR && !m_char.final_check()) { return FC_CONTINUE; } if (get_fparams().m_split_w_len && len_based_split()) { @@ -1489,7 +1491,7 @@ bool theory_seq::internalize_term(app* term) { bool_var bv = ctx.mk_bool_var(term); ctx.set_var_theory(bv, get_id()); ctx.mark_as_relevant(bv); - if (m_util.is_char_le(term) && m_char.enabled()) { + if (m_util.is_char_le(term) && _USE_CHAR) { mk_var(ensure_enode(term->get_arg(0))); mk_var(ensure_enode(term->get_arg(1))); m_char.internalize_le(literal(bv, false), term); @@ -1509,7 +1511,7 @@ bool theory_seq::internalize_term(app* term) { } unsigned c = 0; - if (m_char.enabled() && m_util.is_const_char(term, c)) + if (_USE_CHAR && m_util.is_const_char(term, c)) m_char.new_const_char(v, c); return true; @@ -1854,6 +1856,7 @@ public: rational val; bool is_string = th.m_util.is_string(m_sort); expr_ref result(th.m); + if (is_string) { unsigned_vector sbuffer; unsigned ch; @@ -1991,7 +1994,7 @@ model_value_proc * theory_seq::mk_value(enode * n, model_generator & mg) { m_concat.shrink(start); return sv; } - else if (m_char.enabled() && m_util.is_char(e)) { + else if (_USE_CHAR && m_util.is_char(e)) { unsigned ch = m_char.get_char_value(n->get_th_var(get_id())); app* val = m_util.str.mk_char(ch); m_factory->add_trail(val); @@ -2301,7 +2304,7 @@ void theory_seq::validate_fmls(enode_pair_vector const& eqs, literal_vector cons theory_var theory_seq::mk_var(enode* n) { expr* o = n->get_owner(); - if (!m_util.is_seq(o) && !m_util.is_re(o) && (!m_char.enabled() || !m_util.is_char(o))) + if (!m_util.is_seq(o) && !m_util.is_re(o) && (!_USE_CHAR || !m_util.is_char(o))) return null_theory_var; if (is_attached_to_var(n)) @@ -2960,7 +2963,7 @@ void theory_seq::assign_eh(bool_var v, bool is_true) { else if (m_util.str.is_nth_i(e) || m_util.str.is_nth_u(e)) { // no-op } - else if (m_char.enabled() && m_util.is_char_le(e)) { + else if (_USE_CHAR && m_util.is_char_le(e)) { // no-op } else if (m_util.is_skolem(e)) { @@ -2981,7 +2984,7 @@ void theory_seq::new_eq_eh(theory_var v1, theory_var v2) { enode* n2 = get_enode(v2); expr* o1 = n1->get_owner(); expr* o2 = n2->get_owner(); - if (m_char.enabled() && m_util.is_char(o1)) { + if (_USE_CHAR && m_util.is_char(o1)) { m_char.new_eq_eh(v1, v2); return; } @@ -3032,7 +3035,7 @@ void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) { m_regex.propagate_ne(e1, e2); return; } - if (m_char.enabled() && m_util.is_char(n1->get_owner())) { + if (_USE_CHAR && m_util.is_char(n1->get_owner())) { m_char.new_diseq_eh(v1, v2); return; }