3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-10 03:07:07 +00:00

optional unicode mode

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2020-05-17 19:06:34 -07:00
parent 30f17b1509
commit 1def58bc9f
7 changed files with 155 additions and 26 deletions

View file

@ -212,6 +212,7 @@ public:
void set_solver(expr_solver* solver) { m_re2aut.set_solver(solver); }
bool has_solver() { return m_re2aut.has_solver(); }
bool coalesce_chars() const { return m_coalesce_chars; }
br_status mk_app_core(func_decl * f, unsigned num_args, expr * const * args, expr_ref & result);
br_status mk_eq_core(expr * lhs, expr * rhs, expr_ref & result);

View file

@ -82,8 +82,7 @@ static bool is_escape_char(char const *& s, unsigned& result) {
return true;
}
#if 0
/* unicode */
#if _USE_UNICODE
if (*(s+1) == 'u' && *(s+2) == '{') {
result = 0;
for (unsigned i = 0; i < 5; ++i) {
@ -570,6 +569,7 @@ void seq_decl_plugin::init() {
sort* str2TintT[3] = { strT, strT, intT };
sort* seqAintT[2] = { seqA, intT };
sort* seq3A[3] = { seqA, seqA, seqA };
sort* charTcharT[2] = { m_char, m_char };
m_sigs.resize(LAST_SEQ_OP);
// TBD: have (par ..) construct and load parameterized signature from premable.
m_sigs[OP_SEQ_UNIT] = alloc(psig, m, "seq.unit", 1, 1, &A, seqA);
@ -607,6 +607,10 @@ void seq_decl_plugin::init() {
m_sigs[OP_SEQ_REPLACE_RE] = alloc(psig, m, "str.replace_re", 1, 3, seqAreAseqA, seqA);
m_sigs[OP_SEQ_REPLACE_ALL] = alloc(psig, m, "str.replace_all", 1, 3, seqAseqAseqA, seqA);
m_sigs[OP_STRING_CONST] = nullptr;
#if _USE_UNICODE
m_sigs[OP_CHAR_CONST] = nullptr;
m_sigs[OP_CHAR_LE] = alloc(psig, m, "char.<=", 0, 2, charTcharT, boolT);
#endif
m_sigs[_OP_STRING_STRIDOF] = alloc(psig, m, "str.indexof", 0, 3, str2TintT, intT);
m_sigs[_OP_STRING_STRREPL] = alloc(psig, m, "str.replace", 0, 3, str3T, strT);
m_sigs[OP_STRING_ITOS] = alloc(psig, m, "str.from_int", 0, 1, &intT, strT);
@ -632,8 +636,11 @@ void seq_decl_plugin::init() {
void seq_decl_plugin::set_manager(ast_manager* m, family_id id) {
decl_plugin::set_manager(m, id);
bv_util bv(*m);
// to be changed to Unicode (abstract sort)
#if _USE_UNICODE
m_char = m->mk_sort(symbol("Unicode"), sort_info(m_family_id, _CHAR_SORT, 0, nullptr));
#else
m_char = bv.mk_sort(8);
#endif
m->inc_ref(m_char);
parameter param(m_char);
m_string = m->mk_sort(symbol("String"), sort_info(m_family_id, SEQ_SORT, 1, &param));
@ -667,8 +674,10 @@ sort * seq_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter
}
return m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, num_parameters, parameters));
}
#if _USE_UNICODE
case _CHAR_SORT:
return m_char;
#endif
case _STRING_SORT:
return m_string;
case _REGLAN_SORT:
@ -744,7 +753,6 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
case OP_STRING_IS_DIGIT:
case OP_STRING_TO_CODE:
case OP_STRING_FROM_CODE:
m.raise_exception("character function is not yet supported");
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
@ -831,7 +839,6 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
case OP_SEQ_REPLACE_RE:
m_has_re = true;
case OP_SEQ_REPLACE_ALL:
m.raise_exception("replace-all operation is not yet supported");
return mk_str_fun(k, arity, domain, range, k);
case OP_SEQ_CONCAT:
@ -895,6 +902,22 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
m_has_re = true;
return mk_str_fun(k, arity, domain, range, OP_SEQ_TO_RE);
#if _USE_UNICODE
case OP_CHAR_LE:
if (arity == 2 && domain[0] == m_char && domain[1] == m_char) {
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, m.mk_bool_sort(), func_decl_info(m_family_id, k, 0, nullptr));
}
m.raise_exception("Incorrect parameters passed to character comparison");
case OP_CHAR_CONST:
if (!(num_parameters == 1 && arity == 0 &&
parameters[0].is_int() &&
0 <= parameters[0].get_int() &&
parameters[0].get_int() < static_cast<int>(zstring::max_char()))) {
m.raise_exception("invalid character declaration");
}
return m.mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, OP_CHAR_CONST, num_parameters, parameters));
#endif
case OP_SEQ_IN_RE:
m_has_re = true;
return mk_seq_fun(k, arity, domain, range, _OP_STRING_IN_REGEXP);
@ -983,6 +1006,17 @@ app* seq_decl_plugin::mk_string(zstring const& s) {
return m_manager->mk_const(f);
}
app* seq_decl_plugin::mk_char(unsigned u) {
#if _USE_UNICODE
parameter param(u);
func_decl* f = m_manager->mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, OP_CHAR_CONST, 1, &param));
return m_manager->mk_const(f);
#else
UNREACHABLE();
return nullptr;
#endif
}
bool seq_decl_plugin::is_considered_uninterpreted(func_decl * f) {
seq_util util(*m_manager);
@ -1045,6 +1079,12 @@ bool seq_decl_plugin::are_distinct(app* a, app* b) const {
is_app_of(a, m_family_id, OP_SEQ_UNIT)) {
return true;
}
#if _USE_UNICODE
if (is_app_of(a, m_family_id, OP_CHAR_CONST) &&
is_app_of(b, m_family_id, OP_CHAR_CONST)) {
return true;
}
#endif
return false;
}
@ -1074,13 +1114,11 @@ app* seq_util::str::mk_string(zstring const& s) const {
}
app* seq_util::str::mk_char(zstring const& s, unsigned idx) const {
// TBD for unicode
return u.bv().mk_numeral(s[idx], 8);
return u.mk_char(s[idx]);
}
app* seq_util::str::mk_char(char ch) const {
zstring s(ch);
return mk_char(s, 0);
app* seq_util::str::mk_char(unsigned ch) const {
return u.mk_char(ch);
}
bv_util& seq_util::bv() const {
@ -1089,21 +1127,34 @@ bv_util& seq_util::bv() const {
}
bool seq_util::is_const_char(expr* e, unsigned& c) const {
#if _USE_UNICODE
return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true);
#else
rational r;
unsigned sz;
return bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned() && (c = r.get_unsigned(), true);
#endif
}
app* seq_util::mk_char(unsigned ch) const {
#if _USE_UNICODE
return seq.mk_char(ch);
#else
return bv().mk_numeral(rational(ch), 8);
#endif
}
app* seq_util::mk_le(expr* ch1, expr* ch2) const {
#if _USE_UNICODE
expr* es[2] = { ch1, ch2 };
return m.mk_app(m_fid, OP_CHAR_LE, 2, es);
#else
return bv().mk_ule(ch1, ch2);
#endif
}
app* seq_util::mk_lt(expr* ch1, expr* ch2) const {
return m.mk_not(bv().mk_ule(ch2, ch1));
return m.mk_not(mk_le(ch2, ch1));
}
bool seq_util::str::is_string(func_decl const* f, zstring& s) const {

View file

@ -26,11 +26,14 @@ Revision History:
#include "ast/ast.h"
#include "ast/bv_decl_plugin.h"
#define _USE_UNICODE 0
enum seq_sort_kind {
SEQ_SORT,
RE_SORT,
#if _USE_UNICODE
_CHAR_SORT, // internal only
#endif
_STRING_SORT,
_REGLAN_SORT
};
@ -83,6 +86,11 @@ enum seq_op_kind {
OP_STRING_IS_DIGIT,
OP_STRING_TO_CODE,
OP_STRING_FROM_CODE,
#if _USE_UNICODE
OP_CHAR_CONST, // constant character
OP_CHAR_LE, // Unicode comparison
#endif
// internal only operators. Converted to SEQ variants.
_OP_STRING_STRREPL,
_OP_STRING_CONCAT,
@ -210,6 +218,7 @@ public:
app* mk_string(symbol const& s);
app* mk_string(zstring const& s);
app* mk_char(unsigned ch);
bool has_re() const { return m_has_re; }
bool has_seq() const { return m_has_seq; }
@ -238,6 +247,11 @@ public:
bool is_re(expr* e, sort*& seq) const { return is_re(m.get_sort(e), seq); }
bool is_char(expr* e) const { return is_char(m.get_sort(e)); }
bool is_const_char(expr* e, unsigned& c) const;
#if _USE_UNICODE
bool is_char_le(expr const* e) const { return is_app_of(e, m_fid, OP_CHAR_LE); }
#else
bool is_char_le(expr const* e) const { return false; }
#endif
app* mk_char(unsigned ch) const;
app* mk_le(expr* ch1, expr* ch2) const;
app* mk_lt(expr* ch1, expr* ch2) const;
@ -245,6 +259,8 @@ public:
app* mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range);
bool is_skolem(expr const* e) const { return is_app_of(e, m_fid, _OP_SEQ_SKOLEM); }
MATCH_BINARY(is_char_le);
bool has_re() const { return seq.has_re(); }
bool has_seq() const { return seq.has_seq(); }
@ -265,7 +281,7 @@ public:
app* mk_empty(sort* s) const { return m.mk_const(m.mk_func_decl(m_fid, OP_SEQ_EMPTY, 0, nullptr, 0, (expr*const*)nullptr, s)); }
app* mk_string(zstring const& s) const;
app* mk_string(symbol const& s) const { return u.seq.mk_string(s); }
app* mk_char(char ch) const;
app* mk_char(unsigned ch) const;
app* mk_concat(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONCAT, 2, es); }
app* mk_concat(expr* a, expr* b, expr* c) const { return mk_concat(a, mk_concat(b, c)); }
expr* mk_concat(unsigned n, expr* const* es, sort* s) const {
@ -323,6 +339,9 @@ public:
bool is_index(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_INDEX); }
bool is_last_index(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_LAST_INDEX); }
bool is_replace(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE); }
bool is_replace_re(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE_RE); }
bool is_replace_re_all(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE_RE_ALL); }
bool is_replace_all(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_REPLACE_ALL); }
bool is_prefix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_PREFIX); }
bool is_suffix(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_SUFFIX); }
bool is_itos(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_ITOS); }
@ -356,6 +375,9 @@ public:
MATCH_TERNARY(is_index);
MATCH_BINARY(is_last_index);
MATCH_TERNARY(is_replace);
MATCH_TERNARY(is_replace_re);
MATCH_TERNARY(is_replace_re_all);
MATCH_TERNARY(is_replace_all);
MATCH_BINARY(is_prefix);
MATCH_BINARY(is_suffix);
MATCH_BINARY(is_lt);

View file

@ -38,8 +38,6 @@ namespace smt {
void seq_unicode::assign_le(theory_var v1, theory_var v2, literal lit) {
dl.init_var(v1);
dl.init_var(v2);
// dl.set_assignment(v1, 'a' + ctx().random(24));
// dl.set_assignment(v2, 'a' + ctx().random(24));
ctx().push_trail(push_back_vector<context, svector<theory_var>>(m_asserted_edges));
m_asserted_edges.push_back(dl.add_edge(v1, v2, s_integer(0), lit));
}
@ -79,11 +77,12 @@ namespace smt {
adapt_eq(v1, v2);
}
final_check_status seq_unicode::final_check() {
bool seq_unicode::final_check() {
// ensure all variables are above 0 and less than zstring::max_char()
bool added_constraint = false;
// TBD: shift assignments on variables that are not lower-bounded
// TBD: set zero to a zero value.
// TBD: shift assignments on variables that are not lower-bounded, so that they are "nice" (have values 'a', 'b', ...)
// TBD: set "zero" to a zero value.
// TBD: ensure that unicode constants have the right values
arith_util a(m);
arith_value avalue(m);
avalue.init(&ctx());
@ -111,10 +110,15 @@ namespace smt {
continue;
}
// ensure str.to_code(unit(v)) = val
expr_ref ch1(seq.str.mk_unit(seq.str.mk_char(val)), m);
// or
// expr_ref ch2(seq.str.mk_string(zstring(val)), m);
expr_ref code(seq.str.mk_to_code(ch1), m);
expr_ref ch(m);
if (false) {
/// m_rewrite.coalesce_chars();
ch = seq.str.mk_string(zstring(val));
}
else {
ch = seq.str.mk_unit(seq.str.mk_char(val));
}
expr_ref code(seq.str.mk_to_code(ch), m);
rational val2;
if (avalue.get_value(code, val2) && val2 == rational(val))
continue;
@ -122,13 +126,13 @@ namespace smt {
added_constraint = true;
}
if (added_constraint)
return FC_CONTINUE;
return false;
// ensure equalities over shared symbols
if (th.assume_eqs(m_var_value_table))
return FC_CONTINUE;
return false;
return FC_DONE;
return true;
}
void seq_unicode::propagate() {

View file

@ -102,7 +102,7 @@ namespace smt {
void new_diseq_eh(theory_var v1, theory_var v2);
// ensure coherence for character codes and equalities of shared symbols.
final_check_status final_check();
bool final_check();
unsigned get_value(theory_var v);

View file

@ -298,12 +298,14 @@ theory_seq::theory_seq(context& ctx):
m_autil(m),
m_sk(m, m_rewrite),
m_ax(*this, m_rewrite),
m_unicode(*this),
m_arith_value(m),
m_trail_stack(*this),
m_ls(m), m_rs(m),
m_lhs(m), m_rhs(m),
m_new_eqs(m),
m_has_seq(m_util.has_seq()),
m_unhandled_expr(nullptr),
m_res(m),
m_max_unfolding_depth(1),
m_max_unfolding_lit(null_literal),
@ -346,6 +348,7 @@ final_check_status theory_seq::final_check_eh() {
if (!m_has_seq) {
return FC_DONE;
}
m_new_propagation = false;
TRACE("seq", display(tout << "level: " << ctx.get_scope_level() << "\n"););
TRACE("seq_verbose", ctx.display(tout););
@ -374,6 +377,9 @@ final_check_status theory_seq::final_check_eh() {
TRACEFIN("zero_length");
return FC_CONTINUE;
}
if (!m_unicode.final_check()) {
return FC_CONTINUE;
}
if (get_fparams().m_split_w_len && len_based_split()) {
++m_stats.m_branch_variable;
TRACEFIN("split_based_on_length");
@ -424,6 +430,9 @@ final_check_status theory_seq::final_check_eh() {
TRACEFIN("branch_ne");
return FC_CONTINUE;
}
if (m_unhandled_expr) {
return FC_GIVEUP;
}
if (is_solved()) {
//scoped_enable_trace _se;
TRACEFIN("is_solved");
@ -2309,7 +2318,7 @@ theory_var theory_seq::mk_var(enode* n) {
}
bool theory_seq::can_propagate() {
return m_axioms_head < m_axioms.size() || !m_replay.empty() || m_new_solution;
return m_axioms_head < m_axioms.size() || !m_replay.empty() || m_new_solution || m_unicode.can_propagate();
}
bool theory_seq::canonize(expr* e, dependency*& eqs, expr_ref& result) {
@ -2501,6 +2510,7 @@ void theory_seq::add_dependency(dependency*& dep, enode* a, enode* b) {
void theory_seq::propagate() {
m_unicode.propagate();
while (m_axioms_head < m_axioms.size() && !ctx.inconsistent()) {
expr_ref e(m);
e = m_axioms[m_axioms_head].get();
@ -2578,6 +2588,15 @@ void theory_seq::deque_axiom(expr* n) {
else if (m_util.str.is_unit(n)) {
m_ax.add_unit_axiom(n);
}
else if (m_util.str.is_is_digit(n)) {
m_ax.add_is_digit_axiom(n);
}
else if (m_util.str.is_from_code(n)) {
m_ax.add_str_from_code_axiom(n);
}
else if (m_util.str.is_to_code(n)) {
m_ax.add_str_to_code_axiom(n);
}
}
expr_ref theory_seq::add_elim_string_axiom(expr* n) {
@ -3053,7 +3072,16 @@ void theory_seq::assign_eh(bool_var v, bool is_true) {
else if (m_util.str.is_nth_i(e) || m_util.str.is_nth_u(e)) {
// no-op
}
else if (m_util.is_char_le(e, e1, e2)) {
theory_var v1 = get_th_var(ctx.get_enode(e1));
theory_var v2 = get_th_var(ctx.get_enode(e2));
if (is_true)
m_unicode.assign_le(v1, v2, lit);
else
m_unicode.assign_lt(v2, v1, lit);
}
else if (m_util.is_skolem(e)) {
// no-op
}
else {
@ -3065,6 +3093,10 @@ void theory_seq::assign_eh(bool_var v, bool is_true) {
void theory_seq::new_eq_eh(theory_var v1, theory_var v2) {
enode* n1 = get_enode(v1);
enode* n2 = get_enode(v2);
if (m_util.is_char(n1->get_owner())) {
m_unicode.new_eq_eh(v1, v2);
return;
}
dependency* deps = m_dm.mk_leaf(assumption(n1, n2));
new_eq_eh(deps, n1, n2);
}
@ -3156,6 +3188,10 @@ void theory_seq::new_diseq_eh(theory_var v1, theory_var v2) {
throw default_exception("convert regular expressions into automata");
}
}
if (m_util.is_char(n1->get_owner())) {
m_unicode.new_diseq_eh(v1, v2);
return;
}
m_exclude.update(e1, e2);
expr_ref eq(m.mk_eq(e1, e2), m);
TRACE("seq", tout << "new disequality " << ctx.get_scope_level() << ": " << mk_bounded_pp(eq, m, 2) << "\n";);
@ -3219,6 +3255,9 @@ void theory_seq::relevant_eh(app* n) {
m_util.str.is_itos(n) ||
m_util.str.is_stoi(n) ||
m_util.str.is_lt(n) ||
m_util.str.is_is_digit(n) ||
m_util.str.is_from_code(n) ||
m_util.str.is_to_code(n) ||
m_util.str.is_unit(n) ||
m_util.str.is_le(n)) {
enque_axiom(n);
@ -3237,6 +3276,15 @@ void theory_seq::relevant_eh(app* n) {
if (m_util.str.is_length(n, arg) && !has_length(arg) && ctx.e_internalized(arg)) {
add_length_to_eqc(arg);
}
if (m_util.str.is_replace_all(n) ||
m_util.str.is_replace_re(n) ||
m_util.str.is_replace_re_all(n)) {
if (!m_unhandled_expr) {
ctx.push_trail(value_trail<context, expr*>(m_unhandled_expr));
m_unhandled_expr = n;
}
}
}

View file

@ -33,6 +33,7 @@ Revision History:
#include "smt/theory_seq_empty.h"
#include "smt/seq_skolem.h"
#include "smt/seq_axioms.h"
#include "smt/seq_unicode.h"
#include "smt/seq_offset_eq.h"
namespace smt {
@ -402,6 +403,7 @@ namespace smt {
arith_util m_autil;
seq_skolem m_sk;
seq_axioms m_ax;
seq_unicode m_unicode;
arith_value m_arith_value;
th_trail_stack m_trail_stack;
stats m_stats;
@ -409,6 +411,7 @@ namespace smt {
expr_ref_vector m_ls, m_rs, m_lhs, m_rhs;
expr_ref_pair_vector m_new_eqs;
bool m_has_seq;
expr* m_unhandled_expr;
// maintain automata with regular expressions.
scoped_ptr_vector<eautomaton> m_automata;