From 20332c6d3e548b971a98870f7165f2a4a4578fe6 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Tue, 26 Jan 2021 16:28:44 -0800 Subject: [PATCH] adding char decl plugin for separate theory treatment Signed-off-by: Nikolaj Bjorner --- src/ast/CMakeLists.txt | 1 + src/ast/seq_decl_plugin.cpp | 363 ---------------------------------- src/ast/seq_decl_plugin.h | 38 +--- src/smt/theory_char.h | 29 +-- src/util/CMakeLists.txt | 1 + src/util/zstring.cpp | 383 ++++++++++++++++++++++++++++++++++++ src/util/zstring.h | 57 ++++++ 7 files changed, 452 insertions(+), 420 deletions(-) create mode 100644 src/util/zstring.cpp create mode 100644 src/util/zstring.h diff --git a/src/ast/CMakeLists.txt b/src/ast/CMakeLists.txt index d4c7291f4..b0c9f7724 100644 --- a/src/ast/CMakeLists.txt +++ b/src/ast/CMakeLists.txt @@ -14,6 +14,7 @@ z3_add_component(ast ast_translation.cpp ast_util.cpp bv_decl_plugin.cpp + char_decl_plugin.cpp datatype_decl_plugin.cpp decl_collector.cpp display_dimacs.cpp diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 1273dba85..05cb4d8ab 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -24,369 +24,6 @@ Revision History: #include "ast/bv_decl_plugin.h" #include -static bool is_hex_digit(char ch, unsigned& d) { - if ('0' <= ch && ch <= '9') { - d = ch - '0'; - return true; - } - if ('A' <= ch && ch <= 'F') { - d = 10 + ch - 'A'; - return true; - } - if ('a' <= ch && ch <= 'f') { - d = 10 + ch - 'a'; - return true; - } - return false; -} - -static bool is_octal_digit(char ch, unsigned& d) { - if ('0' <= ch && ch <= '7') { - d = ch - '0'; - return true; - } - return false; -} - -bool zstring::is_escape_char(char const *& s, unsigned& result) { - unsigned d1, d2, d3; - if (*s != '\\' || *(s + 1) == 0) { - return false; - } - if (*(s + 1) == 'x' && - is_hex_digit(*(s + 2), d1) && is_hex_digit(*(s + 3), d2)) { - result = d1*16 + d2; - s += 4; - return true; - } - /* C-standard octal escapes: either 1, 2, or 3 octal digits, - * stopping either at 3 digits or at the first non-digit character. - */ - /* 1 octal digit */ - if (is_octal_digit(*(s + 1), d1) && !is_octal_digit(*(s + 2), d2)) { - result = d1; - s += 2; - return true; - } - /* 2 octal digits */ - if (is_octal_digit(*(s + 1), d1) && is_octal_digit(*(s + 2), d2) && - !is_octal_digit(*(s + 3), d3)) { - result = d1 * 8 + d2; - s += 3; - return true; - } - /* 3 octal digits */ - if (is_octal_digit(*(s + 1), d1) && is_octal_digit(*(s + 2), d2) && - is_octal_digit(*(s + 3), d3)) { - result = d1*64 + d2*8 + d3; - s += 4; - return true; - } - - if (*(s+1) == 'u' && *(s+2) == '{') { - result = 0; - for (unsigned i = 0; i < 5; ++i) { - if (is_hex_digit(*(s+3+i), d1)) { - result = 16*result + d1; - } - else if (*(s+3+i) == '}') { - if (result > 255 && !uses_unicode()) - throw default_exception("unicode characters outside of byte range are not supported"); - s += 4 + i; - return true; - } - else { - break; - } - } - return false; - } - if (*(s+1) == 'u' && is_hex_digit(*(s+2), d1)) { - result = d1; - unsigned i = 0; - for (; i < 4; ++i) { - if (is_hex_digit(*(s+3+i), d1)) { - result = 16*result + d1; - } - else { - break; - } - } - if (result > 255 && !uses_unicode()) - throw default_exception("unicode characters outside of byte range are not supported"); - s += 3 + i; - return true; - } - switch (*(s + 1)) { - case 'a': - result = '\a'; - s += 2; - return true; - case 'b': - result = '\b'; - s += 2; - return true; -#if 0 - case 'e': - result = '\e'; - s += 2; - return true; -#endif - case 'f': - result = '\f'; - s += 2; - return true; - case 'n': - result = '\n'; - s += 2; - return true; - case 'r': - result = '\r'; - s += 2; - return true; - case 't': - result = '\t'; - s += 2; - return true; - case 'v': - result = '\v'; - s += 2; - return true; - default: - result = *(s + 1); - s += 2; - return true; - } - return false; -} - -zstring::zstring(char const* s) { - while (*s) { - unsigned ch = 0; - if (is_escape_char(s, ch)) { - m_buffer.push_back(ch); - } - else { - m_buffer.push_back(*s); - ++s; - } - } - SASSERT(well_formed()); -} - -bool zstring::uses_unicode() const { - return gparams::get_value("unicode") == "true"; -} - -bool zstring::well_formed() const { - for (unsigned ch : m_buffer) { - if (ch > unicode_max_char()) { - IF_VERBOSE(0, verbose_stream() << "large character: " << ch << "\n";); - return false; - } - } - return true; -} - -zstring::zstring(unsigned ch) { - m_buffer.push_back(ch); -} - -zstring zstring::reverse() const { - zstring result; - for (unsigned i = length(); i-- > 0; ) { - result.m_buffer.push_back(m_buffer[i]); - } - return result; -} - -zstring zstring::replace(zstring const& src, zstring const& dst) const { - zstring result; - if (length() < src.length()) { - return zstring(*this); - } - if (src.length() == 0) { - return dst + zstring(*this); - } - bool found = false; - for (unsigned i = 0; i < length(); ++i) { - bool eq = !found && i + src.length() <= length(); - for (unsigned j = 0; eq && j < src.length(); ++j) { - eq = m_buffer[i+j] == src[j]; - } - if (eq) { - result.m_buffer.append(dst.m_buffer); - found = true; - i += src.length() - 1; - } - else { - result.m_buffer.push_back(m_buffer[i]); - } - } - return result; -} - -static const char esc_table[32][6] = - { "\\x00", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07", "\\x08", "\\x09", "\\n", "\\v", "\\f", "\\r", "\\x0E", "\\x0F", - "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", "\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F" -}; - -std::string zstring::encode() const { - std::ostringstream strm; - char buffer[100]; - unsigned offset = 0; -#define _flush() if (offset > 0) { buffer[offset] = 0; strm << buffer; offset = 0; } - for (unsigned i = 0; i < m_buffer.size(); ++i) { - unsigned ch = m_buffer[i]; - if (0 <= ch && ch < 32) { - _flush(); - strm << esc_table[ch]; - } - else if (ch == '\\') { - _flush(); - strm << "\\\\"; - } - else if (ch >= 256) { - _flush(); - strm << "\\u{" << std::hex << ch << std::dec << "}"; - } - else if (ch >= 128) { - _flush(); - strm << "\\x" << std::hex << ch << std::dec; - } - else { - if (offset == 99) { - _flush(); - } - buffer[offset++] = (char)ch; - } - } - _flush(); - return strm.str(); -} - -bool zstring::suffixof(zstring const& other) const { - if (length() > other.length()) return false; - bool suffix = true; - for (unsigned i = 0; suffix && i < length(); ++i) { - suffix = m_buffer[length()-i-1] == other[other.length()-i-1]; - } - return suffix; -} - -bool zstring::prefixof(zstring const& other) const { - if (length() > other.length()) return false; - bool prefix = true; - for (unsigned i = 0; prefix && i < length(); ++i) { - prefix = m_buffer[i] == other[i]; - } - return prefix; -} - -bool zstring::contains(zstring const& other) const { - if (other.length() > length()) return false; - unsigned last = length() - other.length(); - bool cont = false; - for (unsigned i = 0; !cont && i <= last; ++i) { - cont = true; - for (unsigned j = 0; cont && j < other.length(); ++j) { - cont = other[j] == m_buffer[j+i]; - } - } - return cont; -} - -int zstring::indexofu(zstring const& other, unsigned offset) const { - if (offset <= length() && other.length() == 0) return offset; - if (offset == length()) return -1; - if (offset > other.length() + offset) return -1; - if (other.length() + offset > length()) return -1; - unsigned last = length() - other.length(); - for (unsigned i = offset; i <= last; ++i) { - bool prefix = true; - for (unsigned j = 0; prefix && j < other.length(); ++j) { - prefix = m_buffer[i + j] == other[j]; - } - if (prefix) { - return static_cast(i); - } - } - return -1; -} - -int zstring::last_indexof(zstring const& other) const { - if (other.length() == 0) return length(); - if (other.length() > length()) return -1; - for (unsigned last = length() - other.length(); last-- > 0; ) { - bool suffix = true; - for (unsigned j = 0; suffix && j < other.length(); ++j) { - suffix = m_buffer[last + j] == other[j]; - } - if (suffix) { - return static_cast(last); - } - } - return -1; -} - -zstring zstring::extract(unsigned offset, unsigned len) const { - zstring result; - if (offset + len < offset) return result; - int last = std::min(offset+len, length()); - for (int i = offset; i < last; ++i) { - result.m_buffer.push_back(m_buffer[i]); - } - return result; -} - -zstring zstring::operator+(zstring const& other) const { - zstring result(*this); - result.m_buffer.append(other.m_buffer); - return result; -} - -bool zstring::operator==(const zstring& other) const { - // two strings are equal iff they have the same length and characters - if (length() != other.length()) { - return false; - } - for (unsigned i = 0; i < length(); ++i) { - if (m_buffer[i] != other[i]) { - return false; - } - } - return true; -} - -bool zstring::operator!=(const zstring& other) const { - return !(*this == other); -} - -std::ostream& operator<<(std::ostream &os, const zstring &str) { - return os << str.encode(); -} - -bool operator<(const zstring& lhs, const zstring& rhs) { - // This has the same semantics as strcmp() - unsigned len = lhs.length(); - if (rhs.length() < len) { - len = rhs.length(); - } - for (unsigned i = 0; i < len; ++i) { - unsigned Li = lhs[i]; - unsigned Ri = rhs[i]; - if (Li < Ri) { - return true; - } - else if (Li > Ri) { - return false; - } - } - // at this point, all compared characters are equal, - // so decide based on the relative lengths - return lhs.length() < rhs.length(); -} - seq_decl_plugin::seq_decl_plugin(): m_init(false), m_stringc_sym("String"), diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 4fdc8a065..3cc1abc8f 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -24,8 +24,8 @@ Revision History: #include "ast/ast.h" #include "ast/bv_decl_plugin.h" -#include #include "util/lbool.h" +#include "util/zstring.h" enum seq_sort_kind { SEQ_SORT, @@ -110,42 +110,6 @@ enum seq_op_kind { }; -class zstring { -private: - buffer m_buffer; - bool well_formed() const; - bool uses_unicode() const; - bool is_escape_char(char const *& s, unsigned& result); -public: - static unsigned unicode_max_char() { return 196607; } - static unsigned unicode_num_bits() { return 18; } - static unsigned ascii_max_char() { return 255; } - static unsigned ascii_num_bits() { return 8; } - zstring() {} - zstring(char const* s); - zstring(const std::string &str) : zstring(str.c_str()) {} - zstring(unsigned sz, unsigned const* s) { m_buffer.append(sz, s); SASSERT(well_formed()); } - zstring(unsigned ch); - zstring replace(zstring const& src, zstring const& dst) const; - zstring reverse() const; - std::string encode() const; - unsigned length() const { return m_buffer.size(); } - unsigned operator[](unsigned i) const { return m_buffer[i]; } - bool empty() const { return m_buffer.empty(); } - bool suffixof(zstring const& other) const; - bool prefixof(zstring const& other) const; - bool contains(zstring const& other) const; - int indexofu(zstring const& other, unsigned offset) const; - int last_indexof(zstring const& other) const; - zstring extract(unsigned lo, unsigned hi) const; - zstring operator+(zstring const& other) const; - bool operator==(const zstring& other) const; - bool operator!=(const zstring& other) const; - - friend std::ostream& operator<<(std::ostream &os, const zstring &str); - friend bool operator<(const zstring& lhs, const zstring& rhs); -}; - class seq_decl_plugin : public decl_plugin { struct psig { symbol m_name; diff --git a/src/smt/theory_char.h b/src/smt/theory_char.h index 6a829487a..9e1664c27 100644 --- a/src/smt/theory_char.h +++ b/src/smt/theory_char.h @@ -34,34 +34,26 @@ namespace smt { void reset() { memset(this, 0, sizeof(*this)); } }; - seq_util seq; + seq_util seq; vector m_bits; vector m_ebits; - unsigned_vector m_var2value; - svector m_value2var; - bool m_enabled { false }; - bit_blaster m_bb; - stats m_stats; - symbol m_bits2char; - seq_factory* m_factory { nullptr }; + unsigned_vector m_var2value; + svector m_value2var; + bool m_enabled { false }; + bit_blaster m_bb; + stats m_stats; + symbol m_bits2char; + seq_factory* m_factory { nullptr }; struct reset_bits; - literal_vector const& get_bits(theory_var v); - expr_ref_vector const& get_ebits(theory_var v); - bool has_bits(theory_var v) const; - void init_bits(theory_var v); - - bool get_char_value(theory_var v, unsigned& c); - + bool get_char_value(theory_var v, unsigned& c); void enforce_ackerman(theory_var v, theory_var w); - void enforce_value_bound(theory_var v); - void enforce_bits(); public: @@ -83,9 +75,6 @@ namespace smt { // ensure coherence for character codes and equalities of shared symbols. bool enabled() const { return m_enabled; } bool final_check(); - // <= atomic constraints on characters - void assign_le(theory_var v1, theory_var v2, literal lit); - void assign_lt(theory_var v1, theory_var v2, literal lit); void new_const_char(theory_var v, unsigned c); unsigned get_char_value(theory_var v); diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 2b09230b9..35727bce2 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -60,6 +60,7 @@ z3_add_component(util util.cpp warning.cpp z3_exception.cpp + zstring.cpp EXTRA_REGISTER_MODULE_HEADERS env_params.h MEMORY_INIT_FINALIZER_HEADERS diff --git a/src/util/zstring.cpp b/src/util/zstring.cpp new file mode 100644 index 000000000..73e195a3c --- /dev/null +++ b/src/util/zstring.cpp @@ -0,0 +1,383 @@ +/*++ +Copyright (c) 2011 Microsoft Corporation + +Module Name: + + zstring.cpp + +Abstract: + + String wrapper for unicode/ascii internal strings as vectors + +Author: + + Nikolaj Bjorner (nbjorner) 2021-01-26 + +--*/ +#include "util/gparams.h" +#include "util/zstring.h" + +static bool is_hex_digit(char ch, unsigned& d) { + if ('0' <= ch && ch <= '9') { + d = ch - '0'; + return true; + } + if ('A' <= ch && ch <= 'F') { + d = 10 + ch - 'A'; + return true; + } + if ('a' <= ch && ch <= 'f') { + d = 10 + ch - 'a'; + return true; + } + return false; +} + +static bool is_octal_digit(char ch, unsigned& d) { + if ('0' <= ch && ch <= '7') { + d = ch - '0'; + return true; + } + return false; +} + +bool zstring::is_escape_char(char const *& s, unsigned& result) { + unsigned d1, d2, d3; + if (*s != '\\' || *(s + 1) == 0) { + return false; + } + if (*(s + 1) == 'x' && + is_hex_digit(*(s + 2), d1) && is_hex_digit(*(s + 3), d2)) { + result = d1*16 + d2; + s += 4; + return true; + } + /* C-standard octal escapes: either 1, 2, or 3 octal digits, + * stopping either at 3 digits or at the first non-digit character. + */ + /* 1 octal digit */ + if (is_octal_digit(*(s + 1), d1) && !is_octal_digit(*(s + 2), d2)) { + result = d1; + s += 2; + return true; + } + /* 2 octal digits */ + if (is_octal_digit(*(s + 1), d1) && is_octal_digit(*(s + 2), d2) && + !is_octal_digit(*(s + 3), d3)) { + result = d1 * 8 + d2; + s += 3; + return true; + } + /* 3 octal digits */ + if (is_octal_digit(*(s + 1), d1) && is_octal_digit(*(s + 2), d2) && + is_octal_digit(*(s + 3), d3)) { + result = d1*64 + d2*8 + d3; + s += 4; + return true; + } + + if (*(s+1) == 'u' && *(s+2) == '{') { + result = 0; + for (unsigned i = 0; i < 5; ++i) { + if (is_hex_digit(*(s+3+i), d1)) { + result = 16*result + d1; + } + else if (*(s+3+i) == '}') { + if (result > 255 && !uses_unicode()) + throw default_exception("unicode characters outside of byte range are not supported"); + s += 4 + i; + return true; + } + else { + break; + } + } + return false; + } + if (*(s+1) == 'u' && is_hex_digit(*(s+2), d1)) { + result = d1; + unsigned i = 0; + for (; i < 4; ++i) { + if (is_hex_digit(*(s+3+i), d1)) { + result = 16*result + d1; + } + else { + break; + } + } + if (result > 255 && !uses_unicode()) + throw default_exception("unicode characters outside of byte range are not supported"); + s += 3 + i; + return true; + } + switch (*(s + 1)) { + case 'a': + result = '\a'; + s += 2; + return true; + case 'b': + result = '\b'; + s += 2; + return true; +#if 0 + case 'e': + result = '\e'; + s += 2; + return true; +#endif + case 'f': + result = '\f'; + s += 2; + return true; + case 'n': + result = '\n'; + s += 2; + return true; + case 'r': + result = '\r'; + s += 2; + return true; + case 't': + result = '\t'; + s += 2; + return true; + case 'v': + result = '\v'; + s += 2; + return true; + default: + result = *(s + 1); + s += 2; + return true; + } + return false; +} + +zstring::zstring(char const* s) { + while (*s) { + unsigned ch = 0; + if (is_escape_char(s, ch)) { + m_buffer.push_back(ch); + } + else { + m_buffer.push_back(*s); + ++s; + } + } + SASSERT(well_formed()); +} + +bool zstring::uses_unicode() const { + return gparams::get_value("unicode") == "true"; +} + +bool zstring::well_formed() const { + for (unsigned ch : m_buffer) { + if (ch > unicode_max_char()) { + IF_VERBOSE(0, verbose_stream() << "large character: " << ch << "\n";); + return false; + } + } + return true; +} + +zstring::zstring(unsigned ch) { + m_buffer.push_back(ch); +} + +zstring zstring::reverse() const { + zstring result; + for (unsigned i = length(); i-- > 0; ) { + result.m_buffer.push_back(m_buffer[i]); + } + return result; +} + +zstring zstring::replace(zstring const& src, zstring const& dst) const { + zstring result; + if (length() < src.length()) { + return zstring(*this); + } + if (src.length() == 0) { + return dst + zstring(*this); + } + bool found = false; + for (unsigned i = 0; i < length(); ++i) { + bool eq = !found && i + src.length() <= length(); + for (unsigned j = 0; eq && j < src.length(); ++j) { + eq = m_buffer[i+j] == src[j]; + } + if (eq) { + result.m_buffer.append(dst.m_buffer); + found = true; + i += src.length() - 1; + } + else { + result.m_buffer.push_back(m_buffer[i]); + } + } + return result; +} + +static const char esc_table[32][6] = + { "\\x00", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07", "\\x08", "\\x09", "\\n", "\\v", "\\f", "\\r", "\\x0E", "\\x0F", + "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", "\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F" +}; + +std::string zstring::encode() const { + std::ostringstream strm; + char buffer[100]; + unsigned offset = 0; +#define _flush() if (offset > 0) { buffer[offset] = 0; strm << buffer; offset = 0; } + for (unsigned i = 0; i < m_buffer.size(); ++i) { + unsigned ch = m_buffer[i]; + if (0 <= ch && ch < 32) { + _flush(); + strm << esc_table[ch]; + } + else if (ch == '\\') { + _flush(); + strm << "\\\\"; + } + else if (ch >= 256) { + _flush(); + strm << "\\u{" << std::hex << ch << std::dec << "}"; + } + else if (ch >= 128) { + _flush(); + strm << "\\x" << std::hex << ch << std::dec; + } + else { + if (offset == 99) { + _flush(); + } + buffer[offset++] = (char)ch; + } + } + _flush(); + return strm.str(); +} + +bool zstring::suffixof(zstring const& other) const { + if (length() > other.length()) return false; + bool suffix = true; + for (unsigned i = 0; suffix && i < length(); ++i) { + suffix = m_buffer[length()-i-1] == other[other.length()-i-1]; + } + return suffix; +} + +bool zstring::prefixof(zstring const& other) const { + if (length() > other.length()) return false; + bool prefix = true; + for (unsigned i = 0; prefix && i < length(); ++i) { + prefix = m_buffer[i] == other[i]; + } + return prefix; +} + +bool zstring::contains(zstring const& other) const { + if (other.length() > length()) return false; + unsigned last = length() - other.length(); + bool cont = false; + for (unsigned i = 0; !cont && i <= last; ++i) { + cont = true; + for (unsigned j = 0; cont && j < other.length(); ++j) { + cont = other[j] == m_buffer[j+i]; + } + } + return cont; +} + +int zstring::indexofu(zstring const& other, unsigned offset) const { + if (offset <= length() && other.length() == 0) return offset; + if (offset == length()) return -1; + if (offset > other.length() + offset) return -1; + if (other.length() + offset > length()) return -1; + unsigned last = length() - other.length(); + for (unsigned i = offset; i <= last; ++i) { + bool prefix = true; + for (unsigned j = 0; prefix && j < other.length(); ++j) { + prefix = m_buffer[i + j] == other[j]; + } + if (prefix) { + return static_cast(i); + } + } + return -1; +} + +int zstring::last_indexof(zstring const& other) const { + if (other.length() == 0) return length(); + if (other.length() > length()) return -1; + for (unsigned last = length() - other.length(); last-- > 0; ) { + bool suffix = true; + for (unsigned j = 0; suffix && j < other.length(); ++j) { + suffix = m_buffer[last + j] == other[j]; + } + if (suffix) { + return static_cast(last); + } + } + return -1; +} + +zstring zstring::extract(unsigned offset, unsigned len) const { + zstring result; + if (offset + len < offset) return result; + int last = std::min(offset+len, length()); + for (int i = offset; i < last; ++i) { + result.m_buffer.push_back(m_buffer[i]); + } + return result; +} + +zstring zstring::operator+(zstring const& other) const { + zstring result(*this); + result.m_buffer.append(other.m_buffer); + return result; +} + +bool zstring::operator==(const zstring& other) const { + // two strings are equal iff they have the same length and characters + if (length() != other.length()) { + return false; + } + for (unsigned i = 0; i < length(); ++i) { + if (m_buffer[i] != other[i]) { + return false; + } + } + return true; +} + +bool zstring::operator!=(const zstring& other) const { + return !(*this == other); +} + +std::ostream& operator<<(std::ostream &os, const zstring &str) { + return os << str.encode(); +} + +bool operator<(const zstring& lhs, const zstring& rhs) { + // This has the same semantics as strcmp() + unsigned len = lhs.length(); + if (rhs.length() < len) { + len = rhs.length(); + } + for (unsigned i = 0; i < len; ++i) { + unsigned Li = lhs[i]; + unsigned Ri = rhs[i]; + if (Li < Ri) { + return true; + } + else if (Li > Ri) { + return false; + } + } + // at this point, all compared characters are equal, + // so decide based on the relative lengths + return lhs.length() < rhs.length(); +} + + diff --git a/src/util/zstring.h b/src/util/zstring.h new file mode 100644 index 000000000..836792252 --- /dev/null +++ b/src/util/zstring.h @@ -0,0 +1,57 @@ +/*++ +Copyright (c) 2011 Microsoft Corporation + +Module Name: + + zstring.h + +Abstract: + + String wrapper for unicode/ascii internal strings as vectors + +Author: + + Nikolaj Bjorner (nbjorner) 2021-01-26 + +--*/ +#pragma once + +#include +#include "util/vector.h" +#include "util/buffer.h" + +class zstring { +private: + buffer m_buffer; + bool well_formed() const; + bool uses_unicode() const; + bool is_escape_char(char const *& s, unsigned& result); +public: + static unsigned unicode_max_char() { return 196607; } + static unsigned unicode_num_bits() { return 18; } + static unsigned ascii_max_char() { return 255; } + static unsigned ascii_num_bits() { return 8; } + zstring() {} + zstring(char const* s); + zstring(const std::string &str) : zstring(str.c_str()) {} + zstring(unsigned sz, unsigned const* s) { m_buffer.append(sz, s); SASSERT(well_formed()); } + zstring(unsigned ch); + zstring replace(zstring const& src, zstring const& dst) const; + zstring reverse() const; + std::string encode() const; + unsigned length() const { return m_buffer.size(); } + unsigned operator[](unsigned i) const { return m_buffer[i]; } + bool empty() const { return m_buffer.empty(); } + bool suffixof(zstring const& other) const; + bool prefixof(zstring const& other) const; + bool contains(zstring const& other) const; + int indexofu(zstring const& other, unsigned offset) const; + int last_indexof(zstring const& other) const; + zstring extract(unsigned lo, unsigned hi) const; + zstring operator+(zstring const& other) const; + bool operator==(const zstring& other) const; + bool operator!=(const zstring& other) const; + + friend std::ostream& operator<<(std::ostream &os, const zstring &str); + friend bool operator<(const zstring& lhs, const zstring& rhs); +};