3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-23 17:15:31 +00:00

Added 16 bit string-encoding (#5540)

This commit is contained in:
CEisenhofer 2021-09-09 11:35:16 +02:00 committed by GitHub
parent e70f501932
commit 47fdd6c060
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 74 additions and 35 deletions

View file

@ -42,9 +42,7 @@ bool zstring::is_escape_char(char const *& s, unsigned& result) {
result = 16*result + d;
}
else if (*(s+3+i) == '}') {
if (result > 255 && !uses_unicode())
return false;
if (result > unicode_max_char())
if (result > max_char())
return false;
s += 4 + i;
return true;
@ -65,7 +63,7 @@ bool zstring::is_escape_char(char const *& s, unsigned& result) {
result = 16*result + d2;
result = 16*result + d3;
result = 16*result + d4;
if (result > unicode_max_char())
if (result > max_char())
return false;
s += 6;
return true;
@ -87,14 +85,22 @@ zstring::zstring(char const* s) {
SASSERT(well_formed());
}
bool zstring::uses_unicode() const {
return gparams::get_value("unicode") != "false";
string_encoding zstring::get_encoding() {
if (gparams::get_value("encoding") == "unicode") {
return unicode;
}
if (gparams::get_value("encoding") == "bmp") {
return bmp;
}
if (gparams::get_value("encoding") == "ascii") {
return ascii;
}
return unicode;
}
bool zstring::well_formed() const {
for (unsigned ch : m_buffer) {
if (ch > unicode_max_char()) {
if (ch > max_char()) {
IF_VERBOSE(0, verbose_stream() << "large character: " << ch << "\n";);
return false;
}

View file

@ -21,17 +21,47 @@ Author:
#include "util/buffer.h"
#include "util/rational.h"
enum string_encoding {
ascii, // exactly 8 bits
unicode,
bmp // basic multilingual plane; exactly 16 bits
};
class zstring {
private:
buffer<uint32_t> m_buffer;
bool well_formed() const;
bool uses_unicode() const;
bool is_escape_char(char const *& s, unsigned& result);
public:
static unsigned unicode_max_char() { return 196607; }
static unsigned unicode_num_bits() { return 18; }
static unsigned bmp_max_char() { return 65535; }
static unsigned bmp_num_bits() { return 16; }
static unsigned ascii_max_char() { return 255; }
static unsigned ascii_num_bits() { return 8; }
static unsigned max_char() {
switch (get_encoding()) {
case unicode:
return unicode_max_char();
case bmp:
return bmp_max_char();
case ascii:
return ascii_max_char();
}
return unicode_max_char();
}
static unsigned num_bits() {
switch (get_encoding()) {
case unicode:
return unicode_num_bits();
case bmp:
return bmp_num_bits();
case ascii:
return ascii_num_bits();
}
return unicode_num_bits();
}
static string_encoding get_encoding();
zstring() = default;
zstring(char const* s);
zstring(const std::string &str) : zstring(str.c_str()) {}