3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-11 03:33:35 +00:00

Added 16 bit string-encoding (#5540)

This commit is contained in:
CEisenhofer 2021-09-09 11:35:16 +02:00 committed by GitHub
parent e70f501932
commit 47fdd6c060
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 74 additions and 35 deletions

View file

@ -22,7 +22,6 @@ Author:
char_decl_plugin::char_decl_plugin():
m_charc_sym("Char") {
m_unicode = gparams::get_value("unicode") != "false";
}
char_decl_plugin::~char_decl_plugin() {

View file

@ -40,7 +40,6 @@ enum char_op_kind {
class char_decl_plugin : public decl_plugin {
sort* m_char { nullptr };
symbol m_charc_sym;
bool m_unicode { true };
void set_manager(ast_manager * m, family_id id) override;
@ -96,8 +95,7 @@ public:
MATCH_UNARY(is_to_int);
MATCH_BINARY(is_le);
bool unicode() const { return m_unicode; }
unsigned max_char() const { return m_unicode ? zstring::unicode_max_char() : zstring::ascii_max_char(); }
unsigned num_bits() const { return m_unicode ? zstring::unicode_num_bits() : zstring::ascii_num_bits(); }
static unsigned max_char() { return zstring::max_char(); }
static unsigned num_bits() { return zstring::num_bits(); }
};

View file

@ -166,8 +166,6 @@ public:
void finalize() override;
bool unicode() const { return get_char_plugin().unicode(); }
decl_plugin * mk_fresh() override { return alloc(seq_decl_plugin); }
sort * mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) override;

View file

@ -106,8 +106,15 @@ void context_params::set(char const * param, char const * value) {
else if (p == "smtlib2_compliant") {
set_bool(m_smtlib2_compliant, param, value);
}
else if (p == "unicode") {
set_bool(m_unicode, param, value);
else if (p == "encoding") {
if (strcmp(value, "unicode") == 0 || strcmp(value, "bmp") == 0 || strcmp(value, "ascii") == 0) {
m_encoding = value;
}
else {
std::stringstream strm;
strm << "invalid value '" << value << "' for parameter '" << param << "' (supported: unicode, bmp, ascii)";
throw default_exception(strm.str());
}
}
else {
param_descrs d;
@ -140,7 +147,7 @@ void context_params::updt_params(params_ref const & p) {
m_debug_ref_count = p.get_bool("debug_ref_count", m_debug_ref_count);
m_smtlib2_compliant = p.get_bool("smtlib2_compliant", m_smtlib2_compliant);
m_statistics = p.get_bool("stats", m_statistics);
m_unicode = p.get_bool("unicode", m_unicode);
m_encoding = p.get_str("encoding", m_encoding.c_str());
}
void context_params::collect_param_descrs(param_descrs & d) {
@ -157,7 +164,7 @@ void context_params::collect_param_descrs(param_descrs & d) {
d.insert("debug_ref_count", CPK_BOOL, "debug support for AST reference counting", "false");
d.insert("smtlib2_compliant", CPK_BOOL, "enable/disable SMT-LIB 2.0 compliance", "false");
d.insert("stats", CPK_BOOL, "enable/disable statistics", "false");
d.insert("unicode", CPK_BOOL, "use unicode strings instead of ASCII strings");
d.insert("encoding", CPK_STRING, "string encoding used internally: unicode|bmp|ascii", "unicode");
// statistics are hidden as they are controlled by the /st option.
collect_solver_param_descrs(d);
}

View file

@ -20,6 +20,7 @@ Notes:
#pragma once
#include "util/params.h"
#include "util/zstring.h"
class context_params {
void set_bool(bool & opt, char const * param, char const * value);
@ -28,21 +29,21 @@ class context_params {
unsigned m_rlimit { 0 };
public:
unsigned m_timeout { UINT_MAX } ;
std::string m_dot_proof_file;
std::string m_trace_file_name;
bool m_auto_config { true };
bool m_proof { false };
bool m_debug_ref_count { false };
bool m_trace { false };
bool m_well_sorted_check { false };
bool m_model { true };
bool m_model_validate { false };
bool m_dump_models { false };
bool m_unsat_core { false };
bool m_smtlib2_compliant { false }; // it must be here because it enable/disable the use of coercions in the ast_manager.
bool m_statistics { false };
bool m_unicode { true };
unsigned m_timeout { UINT_MAX } ;
std::string m_dot_proof_file;
std::string m_trace_file_name;
bool m_auto_config { true };
bool m_proof { false };
bool m_debug_ref_count { false };
bool m_trace { false };
bool m_well_sorted_check { false };
bool m_model { true };
bool m_model_validate { false };
bool m_dump_models { false };
bool m_unsat_core { false };
bool m_smtlib2_compliant { false }; // it must be here because it enable/disable the use of coercions in the ast_manager.
bool m_statistics { false };
std::string m_encoding { "unicode" };
unsigned rlimit() const { return m_rlimit; }
context_params();

View file

@ -42,9 +42,7 @@ bool zstring::is_escape_char(char const *& s, unsigned& result) {
result = 16*result + d;
}
else if (*(s+3+i) == '}') {
if (result > 255 && !uses_unicode())
return false;
if (result > unicode_max_char())
if (result > max_char())
return false;
s += 4 + i;
return true;
@ -65,7 +63,7 @@ bool zstring::is_escape_char(char const *& s, unsigned& result) {
result = 16*result + d2;
result = 16*result + d3;
result = 16*result + d4;
if (result > unicode_max_char())
if (result > max_char())
return false;
s += 6;
return true;
@ -87,14 +85,22 @@ zstring::zstring(char const* s) {
SASSERT(well_formed());
}
bool zstring::uses_unicode() const {
return gparams::get_value("unicode") != "false";
string_encoding zstring::get_encoding() {
if (gparams::get_value("encoding") == "unicode") {
return unicode;
}
if (gparams::get_value("encoding") == "bmp") {
return bmp;
}
if (gparams::get_value("encoding") == "ascii") {
return ascii;
}
return unicode;
}
bool zstring::well_formed() const {
for (unsigned ch : m_buffer) {
if (ch > unicode_max_char()) {
if (ch > max_char()) {
IF_VERBOSE(0, verbose_stream() << "large character: " << ch << "\n";);
return false;
}

View file

@ -21,17 +21,47 @@ Author:
#include "util/buffer.h"
#include "util/rational.h"
enum string_encoding {
ascii, // exactly 8 bits
unicode,
bmp // basic multilingual plane; exactly 16 bits
};
class zstring {
private:
buffer<uint32_t> m_buffer;
bool well_formed() const;
bool uses_unicode() const;
bool is_escape_char(char const *& s, unsigned& result);
public:
static unsigned unicode_max_char() { return 196607; }
static unsigned unicode_num_bits() { return 18; }
static unsigned bmp_max_char() { return 65535; }
static unsigned bmp_num_bits() { return 16; }
static unsigned ascii_max_char() { return 255; }
static unsigned ascii_num_bits() { return 8; }
static unsigned max_char() {
switch (get_encoding()) {
case unicode:
return unicode_max_char();
case bmp:
return bmp_max_char();
case ascii:
return ascii_max_char();
}
return unicode_max_char();
}
static unsigned num_bits() {
switch (get_encoding()) {
case unicode:
return unicode_num_bits();
case bmp:
return bmp_num_bits();
case ascii:
return ascii_num_bits();
}
return unicode_num_bits();
}
static string_encoding get_encoding();
zstring() = default;
zstring(char const* s);
zstring(const std::string &str) : zstring(str.c_str()) {}