From 1bfc12d6e6b1fce07a6b58ab973efa3d12f8b0ac Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sun, 17 May 2020 11:42:39 -0700 Subject: [PATCH] initial stab at independent unicode module Signed-off-by: Nikolaj Bjorner --- src/smt/CMakeLists.txt | 1 + src/smt/seq_axioms.cpp | 21 ++--- src/smt/seq_unicode.cpp | 168 ++++++++++++++++++++++++++++++++++++++++ src/smt/seq_unicode.h | 114 +++++++++++++++++++++++++++ 4 files changed, 290 insertions(+), 14 deletions(-) create mode 100644 src/smt/seq_unicode.cpp create mode 100644 src/smt/seq_unicode.h diff --git a/src/smt/CMakeLists.txt b/src/smt/CMakeLists.txt index 441b7074b..5f55677fc 100644 --- a/src/smt/CMakeLists.txt +++ b/src/smt/CMakeLists.txt @@ -17,6 +17,7 @@ z3_add_component(smt seq_eq_solver.cpp seq_ne_solver.cpp seq_offset_eq.cpp + seq_unicode.cpp smt_almost_cg_table.cpp smt_arith_value.cpp smt_case_split_queue.cpp diff --git a/src/smt/seq_axioms.cpp b/src/smt/seq_axioms.cpp index dccf8c81f..3a5c57f3c 100644 --- a/src/smt/seq_axioms.cpp +++ b/src/smt/seq_axioms.cpp @@ -772,25 +772,18 @@ void seq_axioms::add_le_axiom(expr* n) { } /** - is_digit(e) <=> e = '0' or ... or e = '9' + is_digit(e) <=> to_code('0') <= to_code(e) <= to_code('9') */ void seq_axioms::add_is_digit_axiom(expr* n) { expr* e = nullptr; VERIFY(seq.str.is_is_digit(n, e)); literal is_digit = mk_literal(n); - literal_vector digits; - digits.push_back(~is_digit); - for (unsigned i = 0; i < 10; ++i) { - unsigned d = '0' + i; - zstring str(1, &d); - expr_ref s(seq.str.mk_string(str), m); - m_rewrite(s); // if the solver depends on unit normal form - literal digit_i = mk_eq(e, s); - digits.push_back(digit_i); - add_axiom(~digit_i, is_digit); - } - // literals are marked relevant by add_axiom of binary clauses - ctx().mk_th_axiom(th.get_id(), digits); + expr_ref to_code(seq.str.mk_to_code(e), m); + literal ge0 = mk_ge(to_code, (unsigned)'0'); + literal le9 = mk_le(to_code, (unsigned)'9'); + add_axiom(~is_digit, ge0); + add_axiom(~is_digit, le9); + add_axiom(is_digit, ~ge0, ~le9); } /** diff --git a/src/smt/seq_unicode.cpp b/src/smt/seq_unicode.cpp new file mode 100644 index 000000000..c15c1fe32 --- /dev/null +++ b/src/smt/seq_unicode.cpp @@ -0,0 +1,168 @@ +/*++ +Copyright (c) 2011 Microsoft Corporation + +Module Name: + + seq_unicode.cpp + +Abstract: + + Solver for unicode characters + +Author: + + Nikolaj Bjorner (nbjorner) 2020-5-16 + +--*/ + +#include "smt/seq_unicode.h" +#include "smt/smt_context.h" +#include "smt/smt_arith_value.h" +#include "util/trail.h" + +namespace smt { + + seq_unicode::seq_unicode(theory& th): + th(th), + m(th.get_manager()), + seq(m), + dl(), + m_qhead(0), + m_nc_functor(*this), + m_var_value_hash(*this), + m_var_value_eq(*this), + m_var_value_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_var_value_hash, m_var_value_eq) + {} + + // <= atomic constraints on characters + void seq_unicode::assign_le(theory_var v1, theory_var v2, literal lit) { + dl.init_var(v1); + dl.init_var(v2); + // dl.set_assignment(v1, 'a' + ctx().random(24)); + // dl.set_assignment(v2, 'a' + ctx().random(24)); + auto edge_id = dl.add_edge(v1, v2, s_integer(0), lit); + m_asserted_edges.push_back(edge_id); + ctx().push_trail(push_back_vector>(m_asserted_edges)); + } + + // < atomic constraint on characters + void seq_unicode::assign_lt(theory_var v1, theory_var v2, literal lit) { + dl.init_var(v1); + dl.init_var(v2); + auto edge_id = dl.add_edge(v1, v2, s_integer(1), lit); + m_asserted_edges.push_back(edge_id); + ctx().push_trail(push_back_vector>(m_asserted_edges)); + } + + // = on characters + void seq_unicode::new_eq_eh(theory_var v1, theory_var v2) { + expr* e1 = th.get_expr(v1); + expr* e2 = th.get_expr(v2); + literal eq = th.mk_eq(e1, e2, false); + literal le = mk_literal(seq.mk_le(e1, e2)); + literal ge = mk_literal(seq.mk_le(e2, e1)); + add_axiom(~eq, le); + add_axiom(~eq, ge); + add_axiom(le, ge, eq); + } + + // != on characters + void seq_unicode::new_diseq_eh(theory_var v1, theory_var v2) { + expr* e1 = th.get_expr(v1); + expr* e2 = th.get_expr(v2); + literal eq = th.mk_eq(e1, e2, false); + literal le = mk_literal(seq.mk_le(e1, e2)); + literal ge = mk_literal(seq.mk_le(e2, e1)); + add_axiom(~eq, le); + add_axiom(~eq, ge); + add_axiom(le, ge, eq); + } + + final_check_status seq_unicode::final_check() { + // ensure all variables are above 0 and less than zstring::max_char() + bool added_constraint = false; + // TBD: shift assignments on variables that are not lower-bounded + // TBD: set zero to a zero value. + arith_util a(m); + arith_value avalue(m); + avalue.init(&ctx()); + for (unsigned v = 0; v < th.get_num_vars(); ++v) { + if (!seq.is_char(th.get_expr(v))) + continue; + dl.init_var(v); + auto val = dl.get_assignment(v).get_int(); + if (val > static_cast(zstring::max_char())) { + expr_ref ch(seq.str.mk_char(zstring::max_char()), m); + enode* n = th.ensure_enode(ch); + theory_var v_max = n->get_th_var(th.get_id()); + assign_le(v, v_max, null_literal); + added_constraint = true; + continue; + } + if (val < 0) { + expr_ref ch(seq.str.mk_char(0), m); + enode* n = th.ensure_enode(ch); + theory_var v_min = n->get_th_var(th.get_id()); + assign_le(v_min, v, null_literal); + dl.init_var(v_min); + dl.set_to_zero(v_min); + added_constraint = true; + continue; + } + // ensure str.to_code(unit(v)) = val + expr_ref ch1(seq.str.mk_unit(seq.str.mk_char(val)), m); + // or + // expr_ref ch2(seq.str.mk_string(zstring(val)), m); + expr_ref code(seq.str.mk_to_code(ch1), m); + rational val2; + if (avalue.get_fixed(code, val2) && val2 == rational(val)) + continue; + add_axiom(th.mk_eq(code, a.mk_int(val), false)); + added_constraint = true; + } + if (added_constraint) + return FC_CONTINUE; + + // ensure equalities over shared symbols + if (th.assume_eqs(m_var_value_table)) + return FC_CONTINUE; + + return FC_DONE; + } + + void seq_unicode::propagate() { + ctx().push_trail(value_trail(m_qhead)); + for (; m_qhead < m_asserted_edges.size() && !ctx().inconsistent(); ++m_qhead) { + propagate(m_asserted_edges[m_qhead]); + } + } + + void seq_unicode::propagate(edge_id edge) { + if (dl.enable_edge(edge)) + return; + dl.traverse_neg_cycle2(false, m_nc_functor); + literal_vector const & lits = m_nc_functor.get_lits(); + vector params; + if (m.proofs_enabled()) { + params.push_back(parameter(symbol("farkas"))); + for (unsigned i = 0; i <= lits.size(); ++i) { + params.push_back(parameter(rational(1))); + } + } + ctx().set_conflict( + ctx().mk_justification( + ext_theory_conflict_justification( + th.get_id(), ctx().get_region(), + lits.size(), lits.c_ptr(), + 0, nullptr, + params.size(), params.c_ptr())));; + } + + unsigned seq_unicode::get_value(theory_var v) { + dl.init_var(v); + auto val = dl.get_assignment(v); + return val.get_int(); + } + +} + diff --git a/src/smt/seq_unicode.h b/src/smt/seq_unicode.h new file mode 100644 index 000000000..9f2361cdd --- /dev/null +++ b/src/smt/seq_unicode.h @@ -0,0 +1,114 @@ +/*++ +Copyright (c) 2011 Microsoft Corporation + +Module Name: + + seq_unicode.h + +Abstract: + + Solver for unicode characters + +Author: + + Nikolaj Bjorner (nbjorner) 2020-5-16 + +--*/ +#pragma once + +#include "util/s_integer.h" +#include "ast/seq_decl_plugin.h" +#include "smt/smt_theory.h" +#include "smt/diff_logic.h" + +namespace smt { + + class seq_unicode { + struct ext { + static const bool m_int_theory = true; + typedef s_integer numeral; + typedef s_integer fin_numeral; + numeral m_epsilon; + typedef literal explanation; + ext(): m_epsilon(1) {} + }; + + class nc_functor { + literal_vector m_literals; + seq_unicode& m_super; + public: + nc_functor(seq_unicode& s): m_super(s) {} + void operator()(literal const& l) { + if (l != null_literal) m_literals.push_back(l); + } + literal_vector const& get_lits() const { return m_literals; } + void new_edge(dl_var s, dl_var d, unsigned num_edges, edge_id const* edges) {} + }; + + struct var_value_hash { + seq_unicode & m_th; + var_value_hash(seq_unicode & th):m_th(th) {} + unsigned operator()(theory_var v) const { return m_th.get_value(v); } + }; + + struct var_value_eq { + seq_unicode & m_th; + var_value_eq(seq_unicode & th):m_th(th) {} + bool operator()(theory_var v1, theory_var v2) const { + return m_th.get_value(v1) == m_th.get_value(v2); + } + }; + + typedef int_hashtable var_value_table; + + theory& th; + ast_manager& m; + seq_util seq; + dl_graph dl; + unsigned m_qhead; + svector m_asserted_edges; + nc_functor m_nc_functor; + var_value_hash m_var_value_hash; + var_value_eq m_var_value_eq; + var_value_table m_var_value_table; + + context& ctx() const { return th.get_context(); } + + void propagate(edge_id edge); + + std::function m_add_axiom; + void add_axiom(literal a, literal b = null_literal, literal c = null_literal) { + m_add_axiom(a, b, c); + } + + literal mk_literal(expr* e) { return null_literal; } // TBD + + public: + + seq_unicode(theory& th); + + // <= atomic constraints on characters + void assign_le(theory_var v1, theory_var v2, literal lit); + + // < atomic constraint on characters + void assign_lt(theory_var v1, theory_var v2, literal lit); + + // = on characters + void new_eq_eh(theory_var v1, theory_var v2); + + // != on characters + void new_diseq_eh(theory_var v1, theory_var v2); + + // ensure coherence for character codes and equalities of shared symbols. + final_check_status final_check(); + + unsigned get_value(theory_var v); + + void propagate(); + + bool can_propagate() const { return m_qhead < m_asserted_edges.size(); } + + }; + +} +