3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-10-26 01:14:36 +00:00

move to unicode as stand-alone theory

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2021-01-27 05:46:45 -08:00
parent ecba26beae
commit d0f1d8f59e
12 changed files with 115 additions and 58 deletions

View file

@ -93,3 +93,8 @@ app* char_decl_plugin::mk_char(unsigned u) {
expr* char_decl_plugin::get_some_value(sort* s) {
return mk_char(0);
}
app* char_decl_plugin::mk_le(expr* a, expr* b) {
expr* es[2] = { a, b};
return m_manager->mk_app(m_family_id, OP_CHAR_LE, 2, es);
}

View file

@ -23,7 +23,6 @@ Revision History:
#pragma once
#include "ast/ast.h"
#include "ast/bv_decl_plugin.h"
#include <string>
enum char_sort_kind {
@ -67,7 +66,21 @@ public:
bool are_distinct(app* a, app* b) const override;
expr* get_some_value(sort* s) override;
sort* char_sort() const { return m_char; }
app* mk_char(unsigned u);
expr* get_some_value(sort* s) override;
app* mk_le(expr* a, expr* b);
bool is_le(expr const* e) const { return is_app_of(e, m_family_id, OP_CHAR_LE); }
bool is_const_char(expr const* e, unsigned& c) const {
return is_app_of(e, m_family_id, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true);
}
};

View file

@ -49,12 +49,12 @@ void reg_decl_plugins(ast_manager & m) {
if (!m.get_plugin(m.mk_family_id(symbol("datalog_relation")))) {
m.register_plugin(symbol("datalog_relation"), alloc(datalog::dl_decl_plugin));
}
if (!m.get_plugin(m.mk_family_id(symbol("seq")))) {
m.register_plugin(symbol("seq"), alloc(seq_decl_plugin));
}
if (!m.get_plugin(m.mk_family_id(symbol("char")))) {
m.register_plugin(symbol("char"), alloc(char_decl_plugin));
}
if (!m.get_plugin(m.mk_family_id(symbol("seq")))) {
m.register_plugin(symbol("seq"), alloc(seq_decl_plugin));
}
if (!m.get_plugin(m.mk_family_id(symbol("fpa")))) {
m.register_plugin(symbol("fpa"), alloc(fpa_decl_plugin));
}

View file

@ -24,6 +24,8 @@ Revision History:
#include "ast/bv_decl_plugin.h"
#include <sstream>
#define _USE_CHAR_PLUGIN 1
seq_decl_plugin::seq_decl_plugin(): m_init(false),
m_stringc_sym("String"),
@ -284,7 +286,11 @@ void seq_decl_plugin::set_manager(ast_manager* m, family_id id) {
decl_plugin::set_manager(m, id);
bv_util bv(*m);
if (unicode())
#if _USE_CHAR_PLUGIN
m_char = get_char_plugin().char_sort();
#else
m_char = m->mk_sort(symbol("Unicode"), sort_info(m_family_id, _CHAR_SORT, 0, nullptr));
#endif
else
m_char = bv.mk_sort(8);
m->inc_ref(m_char);
@ -641,9 +647,6 @@ void seq_decl_plugin::get_sort_names(svector<builtin_name> & sort_names, symbol
sort_names.push_back(builtin_name("Seq", SEQ_SORT));
sort_names.push_back(builtin_name("RegEx", RE_SORT));
// TBD:
// sort_names.push_back(builtin_name("Unicode", CHAR_SORT));
// SMTLIB 2.6 RegLan, String
sort_names.push_back(builtin_name("RegLan", _REGLAN_SORT));
sort_names.push_back(builtin_name("String", _STRING_SORT));
@ -671,9 +674,13 @@ app* seq_decl_plugin::mk_string(zstring const& s) {
app* seq_decl_plugin::mk_char(unsigned u) {
if (unicode()) {
#if _USE_CHAR_PLUGIN
return get_char_plugin().mk_char(u);
#else
parameter param(u);
func_decl* f = m_manager->mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, _OP_CHAR_CONST, 1, &param));
return m_manager->mk_const(f);
#endif
}
else {
bv_util bv(*m_manager);
@ -813,7 +820,11 @@ unsigned seq_util::max_mul(unsigned x, unsigned y) const {
bool seq_util::is_const_char(expr* e, unsigned& c) const {
if (seq.unicode()) {
#if _USE_CHAR_PLUGIN
return ch.is_const_char(e, c);
#else
return is_app_of(e, m_fid, _OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true);
#endif
}
else {
rational r;
@ -824,7 +835,11 @@ bool seq_util::is_const_char(expr* e, unsigned& c) const {
bool seq_util::is_char_le(expr const* e) const {
if (seq.unicode())
#if _USE_CHAR_PLUGIN
return ch.is_le(e);
#else
return is_app_of(e, m_fid, _OP_CHAR_LE);
#endif
else
return bv().is_bv_ule(e) && is_char(to_app(e)->get_arg(0));
}
@ -840,8 +855,12 @@ app* seq_util::mk_le(expr* ch1, expr* ch2) const {
expr_ref _ch1(ch1, m), _ch2(ch2, m);
if (seq.unicode()) {
#if _USE_CHAR_PLUGIN
return ch.mk_le(ch1, ch2);
#else
expr* es[2] = { ch1, ch2 };
return m.mk_app(m_fid, _OP_CHAR_LE, 2, es);
#endif
}
else {
rational r1, r2;

View file

@ -24,6 +24,7 @@ Revision History:
#include "ast/ast.h"
#include "ast/bv_decl_plugin.h"
#include "ast/char_decl_plugin.h"
#include "util/lbool.h"
#include "util/zstring.h"
@ -204,11 +205,15 @@ public:
sort* char_sort() const { return m_char; }
sort* string_sort() const { return m_string; }
char_decl_plugin& get_char_plugin() { return *static_cast<char_decl_plugin*>(m_manager->get_plugin(m_manager->mk_family_id("char"))); }
};
class seq_util {
ast_manager& m;
seq_decl_plugin& seq;
char_decl_plugin& ch;
family_id m_fid;
mutable scoped_ptr<bv_util> m_bv;
bv_util& bv() const;
@ -571,6 +576,7 @@ public:
seq_util(ast_manager& m):
m(m),
seq(*static_cast<seq_decl_plugin*>(m.get_plugin(m.mk_family_id("seq")))),
ch(seq.get_char_plugin()),
m_fid(seq.get_family_id()),
str(*this),
re(*this) {

View file

@ -701,8 +701,8 @@ void cmd_context::init_manager_core(bool new_manager) {
register_plugin(symbol("array"), alloc(array_decl_plugin), logic_has_array());
register_plugin(symbol("datatype"), alloc(datatype_decl_plugin), logic_has_datatype());
register_plugin(symbol("recfun"), alloc(recfun::decl::plugin), logic_has_recfun());
register_plugin(symbol("seq"), alloc(seq_decl_plugin), logic_has_seq());
register_plugin(symbol("char"), alloc(char_decl_plugin), logic_has_seq());
register_plugin(symbol("seq"), alloc(seq_decl_plugin), logic_has_seq());
register_plugin(symbol("pb"), alloc(pb_decl_plugin), logic_has_pb());
register_plugin(symbol("fpa"), alloc(fpa_decl_plugin), logic_has_fpa());
register_plugin(symbol("datalog_relation"), alloc(datalog::dl_decl_plugin), !has_logic());

View file

@ -67,6 +67,10 @@ public:
m_chars.insert(ch);
}
void register_value(unsigned u) {
m_chars.insert(u);
}
void add_trail(expr* e) {
m_trail.push_back(e);
}

View file

@ -57,13 +57,13 @@ public:
}
expr* get_some_value(sort* s) override {
if (u.is_seq(s)) {
return u.str.mk_empty(s);
}
sort* seq = nullptr;
if (u.is_re(s, seq)) {
if (u.is_seq(s))
return u.str.mk_empty(s);
if (u.is_re(s, seq))
return u.re.mk_to_re(u.str.mk_empty(seq));
}
if (u.is_char(s))
return u.mk_char('A');
UNREACHABLE();
return nullptr;
}
@ -84,7 +84,11 @@ public:
return false;
}
}
NOT_IMPLEMENTED_YET();
if (u.is_char(s)) {
v1 = u.mk_char('a');
v2 = u.mk_char('b');
return true;
}
return false;
}
expr* get_fresh_value(sort* s) override {
@ -104,9 +108,7 @@ public:
return u.re.mk_to_re(v0);
}
if (u.is_char(s)) {
//char s[2] = { ++m_char, 0 };
//return u.str.mk_char(zstring(s), 0);
return u.str.mk_char(zstring("a"), 0);
return u.mk_char('a');
}
if (u.is_seq(s, ch)) {
expr* v = m_model.get_fresh_value(ch);
@ -134,23 +136,19 @@ public:
symbol sym;
if (u.str.is_string(n, sym)) {
m_strings.insert(sym);
if (sym.str().find(m_unique_delim) != std::string::npos) {
if (sym.str().find(m_unique_delim) != std::string::npos)
add_new_delim();
}
}
}
private:
void add_new_delim() {
bool found = true;
while (found) {
found = false;
try_again:
m_unique_delim += "!";
symbol_set::iterator it = m_strings.begin(), end = m_strings.end();
for (; it != end && !found; ++it) {
found = it->str().find(m_unique_delim) != std::string::npos;
}
}
for (auto const& s : m_strings)
if (s.str().find(m_unique_delim) != std::string::npos)
goto try_again;
}
};

View file

@ -940,7 +940,7 @@ namespace smt {
sort* s = seq.mk_string_sort();
family_id ch_fid = ch->get_family_id();
if (s->get_family_id() != ch_fid)
m_context.register_plugin(alloc(smt::theory_char, m_context, ch_fid));
m_context.register_plugin(alloc(smt::theory_char, m_context, ch_fid, nullptr));
}

View file

@ -22,15 +22,18 @@ Author:
namespace smt {
theory_char::theory_char(context& ctx, family_id fid):
theory_char::theory_char(context& ctx, family_id fid, theory* th):
theory(ctx, fid),
seq(m),
m_bb(m, ctx.get_fparams())
m_bb(m, ctx.get_fparams()),
m_th(th)
{
bv_util bv(m);
sort_ref b8(bv.mk_sort(8), m);
m_enabled = !seq.is_char(b8);
m_bits2char = symbol("bits2char");
if (!m_th)
m_th = this;
}
struct theory_char::reset_bits : public trail<context> {
@ -215,21 +218,22 @@ namespace smt {
* 2. Assign values to characters that haven't been assigned.
*/
bool theory_char::final_check() {
TRACE("seq", tout << "final check " << m_th->get_num_vars() << "\n";);
m_var2value.reset();
m_var2value.resize(get_num_vars(), UINT_MAX);
m_var2value.resize(m_th->get_num_vars(), UINT_MAX);
m_value2var.reset();
// extract the initial set of constants.
uint_set values;
unsigned c = 0, d = 0;
for (unsigned v = get_num_vars(); v-- > 0; ) {
expr* e = get_expr(v);
for (unsigned v = m_th->get_num_vars(); v-- > 0; ) {
expr* e = m_th->get_expr(v);
if (seq.is_char(e) && m_var2value[v] == UINT_MAX && get_char_value(v, c)) {
CTRACE("seq", seq.is_char(e), tout << mk_pp(e, m) << " root: " << get_enode(v)->is_root() << " is_value: " << get_char_value(v, c) << "\n";);
enode* r = get_enode(v)->get_root();
CTRACE("seq", seq.is_char(e), tout << mk_pp(e, m) << " root: " << m_th->get_enode(v)->is_root() << " is_value: " << get_char_value(v, c) << "\n";);
enode* r = m_th->get_enode(v)->get_root();
m_value2var.reserve(c + 1, null_theory_var);
theory_var u = m_value2var[c];
if (u != null_theory_var && r != get_enode(u)->get_root()) {
if (u != null_theory_var && r != m_th->get_enode(u)->get_root()) {
enforce_ackerman(u, v);
return false;
}
@ -254,8 +258,8 @@ namespace smt {
// assign values to other unassigned nodes
c = 'A';
for (unsigned v = get_num_vars(); v-- > 0; ) {
expr* e = get_expr(v);
for (unsigned v = m_th->get_num_vars(); v-- > 0; ) {
expr* e = m_th->get_expr(v);
if (seq.is_char(e) && m_var2value[v] == UINT_MAX) {
d = c;
while (values.contains(c)) {
@ -266,7 +270,7 @@ namespace smt {
}
}
TRACE("seq", tout << "fresh: " << mk_pp(e, m) << " := " << c << "\n";);
for (enode* n : *get_enode(v))
for (enode* n : *m_th->get_enode(v))
m_var2value[n->get_th_var(get_id())] = c;
m_value2var.reserve(c + 1, null_theory_var);
m_value2var[c] = v;
@ -278,9 +282,9 @@ namespace smt {
void theory_char::enforce_bits() {
TRACE("seq", tout << "enforce bits\n";);
for (unsigned v = get_num_vars(); v-- > 0; ) {
for (unsigned v = m_th->get_num_vars(); v-- > 0; ) {
expr* e = get_expr(v);
if (seq.is_char(e) && get_enode(v)->is_root() && !has_bits(v))
if (seq.is_char(e) && m_th->get_enode(v)->is_root() && !has_bits(v))
init_bits(v);
}
}
@ -344,6 +348,10 @@ namespace smt {
// a stand-alone theory.
void theory_char::init_model(model_generator & mg) {
m_factory = alloc(char_factory, get_manager(), get_family_id(), mg.get_model());
mg.register_factory(m_factory);
for (auto val : m_var2value)
if (val != UINT_MAX)
m_factory->register_value(val);
}
model_value_proc * theory_char::mk_value(enode * n, model_generator & mg) {

View file

@ -44,6 +44,7 @@ namespace smt {
stats m_stats;
symbol m_bits2char;
char_factory* m_factory { nullptr };
theory* m_th;
struct reset_bits;
@ -58,11 +59,11 @@ namespace smt {
public:
theory_char(context& ctx, family_id fid);
theory_char(context& ctx, family_id fid, theory * th);
void new_eq_eh(theory_var v1, theory_var v2) override;
void new_diseq_eh(theory_var v1, theory_var v2) override;
theory * mk_fresh(context * new_ctx) override { return alloc(theory_char, *new_ctx, get_family_id()); }
theory * mk_fresh(context * new_ctx) override { return alloc(theory_char, *new_ctx, get_family_id(), nullptr); }
bool internalize_atom(app * atom, bool gate_ctx) override;
bool internalize_term(app * term) override;
void display(std::ostream& out) const override {}

View file

@ -108,6 +108,8 @@ Outline:
#include "smt/theory_lra.h"
#include "smt/smt_kernel.h"
#define _USE_CHAR (false && m_char.enabled())
using namespace smt;
void theory_seq::solution_map::update(expr* e, expr* r, dependency* d) {
@ -272,7 +274,7 @@ theory_seq::theory_seq(context& ctx):
m_autil(m),
m_sk(m, m_rewrite),
m_ax(*this, m_rewrite),
m_char(ctx, get_family_id()),
m_char(ctx, get_family_id(), this),
m_regex(*this),
m_arith_value(m),
m_trail_stack(*this),
@ -354,7 +356,7 @@ final_check_status theory_seq::final_check_eh() {
TRACEFIN("zero_length");
return FC_CONTINUE;
}
if (m_char.enabled() && !m_char.final_check()) {
if (_USE_CHAR && !m_char.final_check()) {
return FC_CONTINUE;
}
if (get_fparams().m_split_w_len && len_based_split()) {
@ -1489,7 +1491,7 @@ bool theory_seq::internalize_term(app* term) {
bool_var bv = ctx.mk_bool_var(term);
ctx.set_var_theory(bv, get_id());
ctx.mark_as_relevant(bv);
if (m_util.is_char_le(term) && m_char.enabled()) {
if (m_util.is_char_le(term) && _USE_CHAR) {
mk_var(ensure_enode(term->get_arg(0)));
mk_var(ensure_enode(term->get_arg(1)));
m_char.internalize_le(literal(bv, false), term);
@ -1509,7 +1511,7 @@ bool theory_seq::internalize_term(app* term) {
}
unsigned c = 0;
if (m_char.enabled() && m_util.is_const_char(term, c))
if (_USE_CHAR && m_util.is_const_char(term, c))
m_char.new_const_char(v, c);
return true;
@ -1854,6 +1856,7 @@ public:
rational val;
bool is_string = th.m_util.is_string(m_sort);
expr_ref result(th.m);
if (is_string) {
unsigned_vector sbuffer;
unsigned ch;
@ -1991,7 +1994,7 @@ model_value_proc * theory_seq::mk_value(enode * n, model_generator & mg) {
m_concat.shrink(start);
return sv;
}
else if (m_char.enabled() && m_util.is_char(e)) {
else if (_USE_CHAR && m_util.is_char(e)) {
unsigned ch = m_char.get_char_value(n->get_th_var(get_id()));
app* val = m_util.str.mk_char(ch);
m_factory->add_trail(val);
@ -2301,7 +2304,7 @@ void theory_seq::validate_fmls(enode_pair_vector const& eqs, literal_vector cons
theory_var theory_seq::mk_var(enode* n) {
expr* o = n->get_owner();
if (!m_util.is_seq(o) && !m_util.is_re(o) && (!m_char.enabled() || !m_util.is_char(o)))
if (!m_util.is_seq(o) && !m_util.is_re(o) && (!_USE_CHAR || !m_util.is_char(o)))
return null_theory_var;
if (is_attached_to_var(n))
@ -2960,7 +2963,7 @@ void theory_seq::assign_eh(bool_var v, bool is_true) {
else if (m_util.str.is_nth_i(e) || m_util.str.is_nth_u(e)) {
// no-op
}
else if (m_char.enabled() && m_util.is_char_le(e)) {
else if (_USE_CHAR && m_util.is_char_le(e)) {
// no-op
}
else if (m_util.is_skolem(e)) {
@ -2981,7 +2984,7 @@ void theory_seq::new_eq_eh(theory_var v1, theory_var v2) {
enode* n2 = get_enode(v2);
expr* o1 = n1->get_owner();
expr* o2 = n2->get_owner();
if (m_char.enabled() && m_util.is_char(o1)) {
if (_USE_CHAR && m_util.is_char(o1)) {
m_char.new_eq_eh(v1, v2);
return;
}
@ -3032,7 +3035,7 @@ void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) {
m_regex.propagate_ne(e1, e2);
return;
}
if (m_char.enabled() && m_util.is_char(n1->get_owner())) {
if (_USE_CHAR && m_util.is_char(n1->get_owner())) {
m_char.new_diseq_eh(v1, v2);
return;
}