3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-08-23 11:37:54 +00:00

reshuffle unicode support to use global parameter, and use bit-vectors on demand

This commit is contained in:
Nikolaj Bjorner 2021-01-21 14:24:26 -08:00
parent fb48481860
commit dafee71500
11 changed files with 367 additions and 267 deletions

View file

@ -719,6 +719,9 @@ br_status seq_rewriter::mk_app_core(func_decl * f, unsigned num_args, expr * con
SASSERT(num_args == 1);
st = mk_str_stoi(args[0], result);
break;
case OP_CHAR_LE:
case OP_CHAR_CONST:
break;
case _OP_STRING_CONCAT:
case _OP_STRING_PREFIX:
case _OP_STRING_SUFFIX:

View file

@ -16,6 +16,7 @@ Author:
Revision History:
--*/
#include "util/gparams.h"
#include "ast/seq_decl_plugin.h"
#include "ast/arith_decl_plugin.h"
#include "ast/array_decl_plugin.h"
@ -47,7 +48,7 @@ static bool is_octal_digit(char ch, unsigned& d) {
return false;
}
static bool is_escape_char(char const *& s, unsigned& result) {
bool zstring::is_escape_char(char const *& s, unsigned& result) {
unsigned d1, d2, d3;
if (*s != '\\' || *(s + 1) == 0) {
return false;
@ -89,10 +90,8 @@ static bool is_escape_char(char const *& s, unsigned& result) {
result = 16*result + d1;
}
else if (*(s+3+i) == '}') {
#if !Z3_USE_UNICODE
if (result > 255)
if (result > 255 && !uses_unicode())
throw default_exception("unicode characters outside of byte range are not supported");
#endif
s += 4 + i;
return true;
}
@ -113,10 +112,8 @@ static bool is_escape_char(char const *& s, unsigned& result) {
break;
}
}
#if !Z3_USE_UNICODE
if (result > 255)
if (result > 255 && !uses_unicode())
throw default_exception("unicode characters outside of byte range are not supported");
#endif
s += 3 + i;
return true;
}
@ -177,14 +174,8 @@ zstring::zstring(char const* s) {
SASSERT(well_formed());
}
zstring::zstring(unsigned num_bits, bool const* ch) {
SASSERT(num_bits == 8); // TBD for unicode
unsigned n = 0;
for (unsigned i = 0; i < num_bits; ++i) {
n |= (((unsigned)ch[i]) << i);
}
m_buffer.push_back(n);
SASSERT(well_formed());
bool zstring::uses_unicode() const {
return gparams::get_value("unicode") == "true";
}
bool zstring::well_formed() const {
@ -402,7 +393,9 @@ seq_decl_plugin::seq_decl_plugin(): m_init(false),
m_char(nullptr),
m_reglan(nullptr),
m_has_re(false),
m_has_seq(false) {}
m_has_seq(false) {
m_unicode = gparams::get_value("unicode") == "true";
}
void seq_decl_plugin::finalize() {
for (psig* s : m_sigs) {
@ -623,11 +616,9 @@ void seq_decl_plugin::init() {
m_sigs[OP_SEQ_REPLACE_RE] = alloc(psig, m, "str.replace_re", 1, 3, seqAreAseqA, seqA);
m_sigs[OP_SEQ_REPLACE_ALL] = alloc(psig, m, "str.replace_all", 1, 3, seqAseqAseqA, seqA);
m_sigs[OP_STRING_CONST] = nullptr;
#if Z3_USE_UNICODE
m_sigs[OP_CHAR_CONST] = nullptr;
sort* charTcharT[2] = { m_char, m_char };
m_sigs[OP_CHAR_LE] = alloc(psig, m, "char.<=", 0, 2, charTcharT, boolT);
#endif
m_sigs[OP_CHAR_LE] = unicode() ? alloc(psig, m, "char.<=", 0, 2, charTcharT, boolT) : nullptr;
m_sigs[_OP_STRING_STRIDOF] = alloc(psig, m, "str.indexof", 0, 3, str2TintT, intT);
m_sigs[_OP_STRING_STRREPL] = alloc(psig, m, "str.replace", 0, 3, str3T, strT);
m_sigs[_OP_STRING_FROM_CHAR] = alloc(psig, m, "char", 1, 0, nullptr, strT);
@ -654,11 +645,10 @@ void seq_decl_plugin::init() {
void seq_decl_plugin::set_manager(ast_manager* m, family_id id) {
decl_plugin::set_manager(m, id);
bv_util bv(*m);
#if Z3_USE_UNICODE
m_char = m->mk_sort(symbol("Unicode"), sort_info(m_family_id, _CHAR_SORT, 0, nullptr));
#else
m_char = bv.mk_sort(8);
#endif
if (unicode())
m_char = m->mk_sort(symbol("Unicode"), sort_info(m_family_id, _CHAR_SORT, 0, nullptr));
else
m_char = bv.mk_sort(8);
m->inc_ref(m_char);
parameter param(m_char);
m_string = m->mk_sort(symbol("String"), sort_info(m_family_id, SEQ_SORT, 1, &param));
@ -692,10 +682,8 @@ sort * seq_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter
}
return m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, num_parameters, parameters));
}
#if Z3_USE_UNICODE
case _CHAR_SORT:
return m_char;
#endif
case _STRING_SORT:
return m_string;
case _REGLAN_SORT:
@ -941,7 +929,6 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
m_has_re = true;
return mk_str_fun(k, arity, domain, range, OP_SEQ_TO_RE);
#if Z3_USE_UNICODE
case OP_CHAR_LE:
if (arity == 2 && domain[0] == m_char && domain[1] == m_char) {
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, m.mk_bool_sort(), func_decl_info(m_family_id, k, 0, nullptr));
@ -955,7 +942,6 @@ func_decl * seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters,
m.raise_exception("invalid character declaration");
}
return m.mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, OP_CHAR_CONST, num_parameters, parameters));
#endif
case OP_SEQ_IN_RE:
m_has_re = true;
@ -1046,14 +1032,15 @@ app* seq_decl_plugin::mk_string(zstring const& s) {
}
app* seq_decl_plugin::mk_char(unsigned u) {
#if Z3_USE_UNICODE
parameter param(u);
func_decl* f = m_manager->mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, OP_CHAR_CONST, 1, &param));
return m_manager->mk_const(f);
#else
UNREACHABLE();
return nullptr;
#endif
if (unicode()) {
parameter param(u);
func_decl* f = m_manager->mk_const_decl(m_charc_sym, m_char, func_decl_info(m_family_id, OP_CHAR_CONST, 1, &param));
return m_manager->mk_const(f);
}
else {
UNREACHABLE();
return nullptr;
}
}
@ -1117,13 +1104,11 @@ bool seq_decl_plugin::are_distinct(app* a, app* b) const {
if (is_app_of(b, m_family_id, OP_SEQ_EMPTY) &&
is_app_of(a, m_family_id, OP_SEQ_UNIT)) {
return true;
}
#if Z3_USE_UNICODE
if (is_app_of(a, m_family_id, OP_CHAR_CONST) &&
}
if (unicode() && is_app_of(a, m_family_id, OP_CHAR_CONST) &&
is_app_of(b, m_family_id, OP_CHAR_CONST)) {
return true;
}
#endif
return false;
}
@ -1160,6 +1145,17 @@ app* seq_util::str::mk_char(unsigned ch) const {
return u.mk_char(ch);
}
app* seq_util::str::mk_char_bit(expr* e, unsigned idx) {
return u.mk_char_bit(e, idx);
}
app* seq_util::mk_char_bit(expr* e, unsigned i) {
parameter params[2] = { parameter(symbol("char.bit")), parameter(i) };
sort* range = m.mk_bool_sort();
func_decl* f = m.mk_func_decl(get_family_id(), _OP_SEQ_SKOLEM, 2, params, 1, &e, range);
return m.mk_app(f, 1, &e);
}
bv_util& seq_util::bv() const {
if (!m_bv) m_bv = alloc(bv_util, m);
return *m_bv.get();
@ -1178,36 +1174,44 @@ unsigned seq_util::max_mul(unsigned x, unsigned y) const {
bool seq_util::is_const_char(expr* e, unsigned& c) const {
#if Z3_USE_UNICODE
return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true);
#else
rational r;
unsigned sz;
return bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned() && (c = r.get_unsigned(), true);
#endif
if (seq.unicode()) {
return is_app_of(e, m_fid, OP_CHAR_CONST) && (c = to_app(e)->get_parameter(0).get_int(), true);
}
else {
rational r;
unsigned sz;
return bv().is_numeral(e, r, sz) && sz == 8 && r.is_unsigned() && (c = r.get_unsigned(), true);
}
}
bool seq_util::is_char_le(expr const* e) const {
if (seq.unicode())
return is_app_of(e, m_fid, OP_CHAR_LE);
else
return bv().is_bv_ule(e) && is_char(to_app(e)->get_arg(0));
}
app* seq_util::mk_char(unsigned ch) const {
#if Z3_USE_UNICODE
return seq.mk_char(ch);
#else
return bv().mk_numeral(rational(ch), 8);
#endif
if (seq.unicode())
return seq.mk_char(ch);
else
return bv().mk_numeral(rational(ch), 8);
}
app* seq_util::mk_le(expr* ch1, expr* ch2) const {
expr_ref _ch1(ch1, m), _ch2(ch2, m);
#if Z3_USE_UNICODE
expr* es[2] = { ch1, ch2 };
return m.mk_app(m_fid, OP_CHAR_LE, 2, es);
#else
rational r1, r2;
if (bv().is_numeral(ch1, r1) && bv().is_numeral(ch2, r2)) {
return m.mk_bool_val(r1 <= r2);
if (seq.unicode()) {
expr* es[2] = { ch1, ch2 };
return m.mk_app(m_fid, OP_CHAR_LE, 2, es);
}
else {
rational r1, r2;
if (bv().is_numeral(ch1, r1) && bv().is_numeral(ch2, r2)) {
return m.mk_bool_val(r1 <= r2);
}
return bv().mk_ule(ch1, ch2);
}
return bv().mk_ule(ch1, ch2);
#endif
}
app* seq_util::mk_lt(expr* ch1, expr* ch2) const {

View file

@ -27,14 +27,11 @@ Revision History:
#include <string>
#include "util/lbool.h"
#define Z3_USE_UNICODE 0
enum seq_sort_kind {
SEQ_SORT,
RE_SORT,
#if Z3_USE_UNICODE
_CHAR_SORT, // internal only
#endif
_STRING_SORT,
_REGLAN_SORT
};
@ -90,10 +87,8 @@ enum seq_op_kind {
OP_STRING_TO_CODE,
OP_STRING_FROM_CODE,
#if Z3_USE_UNICODE
OP_CHAR_CONST, // constant character
OP_CHAR_LE, // Unicode comparison
#endif
// internal only operators. Converted to SEQ variants.
_OP_STRING_FROM_CHAR,
_OP_STRING_STRREPL,
@ -120,13 +115,15 @@ class zstring {
private:
buffer<unsigned> m_buffer;
bool well_formed() const;
bool uses_unicode() const;
bool is_escape_char(char const *& s, unsigned& result);
public:
static unsigned max_char() { return 196607; }
static unsigned num_bits() { return 16; }
zstring() {}
zstring(char const* s);
zstring(const std::string &str) : zstring(str.c_str()) {}
zstring(unsigned sz, unsigned const* s) { m_buffer.append(sz, s); SASSERT(well_formed()); }
zstring(unsigned num_bits, bool const* ch);
zstring(unsigned ch);
zstring replace(zstring const& src, zstring const& dst) const;
zstring reverse() const;
@ -174,6 +171,7 @@ class seq_decl_plugin : public decl_plugin {
sort* m_reglan;
bool m_has_re;
bool m_has_seq;
bool m_unicode { false };
void match(psig& sig, unsigned dsz, sort* const* dom, sort* range, sort_ref& rng);
@ -202,6 +200,8 @@ public:
~seq_decl_plugin() override {}
void finalize() override;
bool unicode() const { return m_unicode; }
decl_plugin * mk_fresh() override { return alloc(seq_decl_plugin); }
sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) override;
@ -260,14 +260,12 @@ public:
bool is_re(expr* e, sort*& seq) const { return is_re(m.get_sort(e), seq); }
bool is_char(expr* e) const { return is_char(m.get_sort(e)); }
bool is_const_char(expr* e, unsigned& c) const;
#if Z3_USE_UNICODE
bool is_char_le(expr const* e) const { return is_app_of(e, m_fid, OP_CHAR_LE); }
#else
bool is_char_le(expr const* e) const { return bv().is_bv_ule(e) && is_char(to_app(e)->get_arg(0)); }
#endif
bool is_const_char(expr* e) const { unsigned c; return is_const_char(e, c); }
bool is_char_le(expr const* e) const;
app* mk_char_bit(expr* e, unsigned i);
app* mk_char(unsigned ch) const;
app* mk_le(expr* ch1, expr* ch2) const;
app* mk_lt(expr* ch1, expr* ch2) const;
app* mk_lt(expr* ch1, expr* ch2) const;
app* mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range);
bool is_skolem(expr const* e) const { return is_app_of(e, m_fid, _OP_SEQ_SKOLEM); }
@ -317,6 +315,7 @@ public:
app* mk_replace(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c}; return m.mk_app(m_fid, OP_SEQ_REPLACE, 3, es); }
app* mk_unit(expr* u) const { return m.mk_app(m_fid, OP_SEQ_UNIT, 1, &u); }
app* mk_char(zstring const& s, unsigned idx) const;
app* mk_char_bit(expr* e, unsigned i);
app* mk_itos(expr* i) const { return m.mk_app(m_fid, OP_STRING_ITOS, 1, &i); }
app* mk_stoi(expr* s) const { return m.mk_app(m_fid, OP_STRING_STOI, 1, &s); }
app* mk_is_empty(expr* s) const;