mirror of
https://github.com/Z3Prover/z3
synced 2026-03-11 07:40:31 +00:00
Merge pull request #8923 from Z3Prover/copilot/host-char-range-and-set
Move char_range and char_set from seq_nielsen to zstring
This commit is contained in:
commit
3f0e821207
4 changed files with 212 additions and 216 deletions
|
|
@ -95,149 +95,6 @@ namespace seq {
|
|||
return true;
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// char_set
|
||||
// -----------------------------------------------
|
||||
|
||||
unsigned char_set::char_count() const {
|
||||
unsigned count = 0;
|
||||
for (auto const& r : m_ranges)
|
||||
count += r.length();
|
||||
return count;
|
||||
}
|
||||
|
||||
bool char_set::contains(unsigned c) const {
|
||||
// binary search over sorted non-overlapping ranges
|
||||
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
|
||||
while (lo <= hi) {
|
||||
int mid = lo + (hi - lo) / 2;
|
||||
if (c < m_ranges[mid].m_lo)
|
||||
hi = mid - 1;
|
||||
else if (c >= m_ranges[mid].m_hi)
|
||||
lo = mid + 1;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void char_set::add(unsigned c) {
|
||||
if (m_ranges.empty()) {
|
||||
m_ranges.push_back(char_range(c));
|
||||
return;
|
||||
}
|
||||
// binary search for insertion point
|
||||
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
|
||||
while (lo <= hi) {
|
||||
int mid = lo + (hi - lo) / 2;
|
||||
if (c < m_ranges[mid].m_lo)
|
||||
hi = mid - 1;
|
||||
else if (c >= m_ranges[mid].m_hi)
|
||||
lo = mid + 1;
|
||||
else
|
||||
return; // already contained
|
||||
}
|
||||
// lo is the insertion point
|
||||
unsigned idx = static_cast<unsigned>(lo);
|
||||
bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c;
|
||||
bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1;
|
||||
if (merge_left && merge_right) {
|
||||
m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi;
|
||||
m_ranges.erase(m_ranges.begin() + idx);
|
||||
} else if (merge_left) {
|
||||
m_ranges[idx - 1].m_hi = c + 1;
|
||||
} else if (merge_right) {
|
||||
m_ranges[idx].m_lo = c;
|
||||
} else {
|
||||
// positional insert: shift elements right and place new element
|
||||
m_ranges.push_back(char_range());
|
||||
for (unsigned k = m_ranges.size() - 1; k > idx; --k)
|
||||
m_ranges[k] = m_ranges[k - 1];
|
||||
m_ranges[idx] = char_range(c);
|
||||
}
|
||||
}
|
||||
|
||||
void char_set::add(char_set const& other) {
|
||||
for (auto const& r : other.m_ranges) {
|
||||
for (unsigned c = r.m_lo; c < r.m_hi; ++c)
|
||||
add(c);
|
||||
}
|
||||
}
|
||||
|
||||
char_set char_set::intersect_with(char_set const& other) const {
|
||||
char_set result;
|
||||
unsigned i = 0, j = 0;
|
||||
while (i < m_ranges.size() && j < other.m_ranges.size()) {
|
||||
unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo);
|
||||
unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi);
|
||||
if (lo < hi)
|
||||
result.m_ranges.push_back(char_range(lo, hi));
|
||||
if (m_ranges[i].m_hi < other.m_ranges[j].m_hi)
|
||||
++i;
|
||||
else
|
||||
++j;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
char_set char_set::complement(unsigned max_char) const {
|
||||
char_set result;
|
||||
if (m_ranges.empty()) {
|
||||
result.m_ranges.push_back(char_range(0, max_char + 1));
|
||||
return result;
|
||||
}
|
||||
unsigned from = 0;
|
||||
for (auto const& r : m_ranges) {
|
||||
if (from < r.m_lo)
|
||||
result.m_ranges.push_back(char_range(from, r.m_lo));
|
||||
from = r.m_hi;
|
||||
}
|
||||
if (from <= max_char)
|
||||
result.m_ranges.push_back(char_range(from, max_char + 1));
|
||||
return result;
|
||||
}
|
||||
|
||||
bool char_set::is_disjoint(char_set const& other) const {
|
||||
unsigned i = 0, j = 0;
|
||||
while (i < m_ranges.size() && j < other.m_ranges.size()) {
|
||||
if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo)
|
||||
++i;
|
||||
else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo)
|
||||
++j;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream& char_set::display(std::ostream& out) const {
|
||||
if (m_ranges.empty()) {
|
||||
out << "{}";
|
||||
return out;
|
||||
}
|
||||
out << "{ ";
|
||||
bool first = true;
|
||||
for (auto const& r : m_ranges) {
|
||||
if (!first) out << ", ";
|
||||
first = false;
|
||||
if (r.is_unit()) {
|
||||
unsigned c = r.m_lo;
|
||||
if (c >= 'a' && c <= 'z')
|
||||
out << (char)c;
|
||||
else if (c >= 'A' && c <= 'Z')
|
||||
out << (char)c;
|
||||
else if (c >= '0' && c <= '9')
|
||||
out << (char)c;
|
||||
else
|
||||
out << "#[" << c << "]";
|
||||
} else {
|
||||
out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]";
|
||||
}
|
||||
}
|
||||
out << " }";
|
||||
return out;
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// nielsen_edge
|
||||
// -----------------------------------------------
|
||||
|
|
|
|||
|
|
@ -276,79 +276,6 @@ namespace seq {
|
|||
// mirrors ZIPT's DependencyTracker
|
||||
using dep_tracker = uint_set;
|
||||
|
||||
// -----------------------------------------------
|
||||
// character range and set types
|
||||
// mirrors ZIPT's CharacterRange and CharacterSet
|
||||
// -----------------------------------------------
|
||||
|
||||
// half-open character interval [lo, hi)
|
||||
// mirrors ZIPT's CharacterRange
|
||||
struct char_range {
|
||||
unsigned m_lo;
|
||||
unsigned m_hi; // exclusive
|
||||
|
||||
char_range(): m_lo(0), m_hi(0) {}
|
||||
char_range(unsigned c): m_lo(c), m_hi(c + 1) {}
|
||||
char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); }
|
||||
|
||||
bool is_empty() const { return m_lo == m_hi; }
|
||||
bool is_unit() const { return m_hi == m_lo + 1; }
|
||||
unsigned length() const { return m_hi - m_lo; }
|
||||
bool contains(unsigned c) const { return c >= m_lo && c < m_hi; }
|
||||
|
||||
bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; }
|
||||
bool operator<(char_range const& o) const {
|
||||
return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi);
|
||||
}
|
||||
};
|
||||
|
||||
// sorted list of non-overlapping character intervals
|
||||
// mirrors ZIPT's CharacterSet
|
||||
class char_set {
|
||||
svector<char_range> m_ranges;
|
||||
public:
|
||||
char_set() = default;
|
||||
explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); }
|
||||
|
||||
static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); }
|
||||
|
||||
bool is_empty() const { return m_ranges.empty(); }
|
||||
bool is_full(unsigned max_char) const {
|
||||
return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1;
|
||||
}
|
||||
bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); }
|
||||
unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; }
|
||||
|
||||
svector<char_range> const& ranges() const { return m_ranges; }
|
||||
|
||||
// total number of characters in the set
|
||||
unsigned char_count() const;
|
||||
|
||||
// membership test via binary search
|
||||
bool contains(unsigned c) const;
|
||||
|
||||
// add a single character
|
||||
void add(unsigned c);
|
||||
|
||||
// union with another char_set
|
||||
void add(char_set const& other);
|
||||
|
||||
// intersect with another char_set, returns the result
|
||||
char_set intersect_with(char_set const& other) const;
|
||||
|
||||
// complement relative to [0, max_char]
|
||||
char_set complement(unsigned max_char) const;
|
||||
|
||||
// check if two sets are disjoint
|
||||
bool is_disjoint(char_set const& other) const;
|
||||
|
||||
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
|
||||
|
||||
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }
|
||||
|
||||
std::ostream& display(std::ostream& out) const;
|
||||
};
|
||||
|
||||
// -----------------------------------------------
|
||||
// character-level substitution
|
||||
// mirrors ZIPT's CharSubst
|
||||
|
|
|
|||
|
|
@ -286,3 +286,146 @@ bool operator<(const zstring& lhs, const zstring& rhs) {
|
|||
// so decide based on the relative lengths
|
||||
return lhs.length() < rhs.length();
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// char_set
|
||||
// -----------------------------------------------
|
||||
|
||||
unsigned char_set::char_count() const {
|
||||
unsigned count = 0;
|
||||
for (auto const& r : m_ranges)
|
||||
count += r.length();
|
||||
return count;
|
||||
}
|
||||
|
||||
bool char_set::contains(unsigned c) const {
|
||||
// binary search over sorted non-overlapping ranges
|
||||
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
|
||||
while (lo <= hi) {
|
||||
int mid = lo + (hi - lo) / 2;
|
||||
if (c < m_ranges[mid].m_lo)
|
||||
hi = mid - 1;
|
||||
else if (c >= m_ranges[mid].m_hi)
|
||||
lo = mid + 1;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void char_set::add(unsigned c) {
|
||||
if (m_ranges.empty()) {
|
||||
m_ranges.push_back(char_range(c));
|
||||
return;
|
||||
}
|
||||
// binary search for insertion point
|
||||
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
|
||||
while (lo <= hi) {
|
||||
int mid = lo + (hi - lo) / 2;
|
||||
if (c < m_ranges[mid].m_lo)
|
||||
hi = mid - 1;
|
||||
else if (c >= m_ranges[mid].m_hi)
|
||||
lo = mid + 1;
|
||||
else
|
||||
return; // already contained
|
||||
}
|
||||
// lo is the insertion point
|
||||
unsigned idx = static_cast<unsigned>(lo);
|
||||
bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c;
|
||||
bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1;
|
||||
if (merge_left && merge_right) {
|
||||
m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi;
|
||||
m_ranges.erase(m_ranges.begin() + idx);
|
||||
} else if (merge_left) {
|
||||
m_ranges[idx - 1].m_hi = c + 1;
|
||||
} else if (merge_right) {
|
||||
m_ranges[idx].m_lo = c;
|
||||
} else {
|
||||
// positional insert: shift elements right and place new element
|
||||
m_ranges.push_back(char_range());
|
||||
for (unsigned k = m_ranges.size() - 1; k > idx; --k)
|
||||
m_ranges[k] = m_ranges[k - 1];
|
||||
m_ranges[idx] = char_range(c);
|
||||
}
|
||||
}
|
||||
|
||||
void char_set::add(char_set const& other) {
|
||||
for (auto const& r : other.m_ranges) {
|
||||
for (unsigned c = r.m_lo; c < r.m_hi; ++c)
|
||||
add(c);
|
||||
}
|
||||
}
|
||||
|
||||
char_set char_set::intersect_with(char_set const& other) const {
|
||||
char_set result;
|
||||
unsigned i = 0, j = 0;
|
||||
while (i < m_ranges.size() && j < other.m_ranges.size()) {
|
||||
unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo);
|
||||
unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi);
|
||||
if (lo < hi)
|
||||
result.m_ranges.push_back(char_range(lo, hi));
|
||||
if (m_ranges[i].m_hi < other.m_ranges[j].m_hi)
|
||||
++i;
|
||||
else
|
||||
++j;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
char_set char_set::complement(unsigned max_char) const {
|
||||
char_set result;
|
||||
if (m_ranges.empty()) {
|
||||
result.m_ranges.push_back(char_range(0, max_char + 1));
|
||||
return result;
|
||||
}
|
||||
unsigned from = 0;
|
||||
for (auto const& r : m_ranges) {
|
||||
if (from < r.m_lo)
|
||||
result.m_ranges.push_back(char_range(from, r.m_lo));
|
||||
from = r.m_hi;
|
||||
}
|
||||
if (from <= max_char)
|
||||
result.m_ranges.push_back(char_range(from, max_char + 1));
|
||||
return result;
|
||||
}
|
||||
|
||||
bool char_set::is_disjoint(char_set const& other) const {
|
||||
unsigned i = 0, j = 0;
|
||||
while (i < m_ranges.size() && j < other.m_ranges.size()) {
|
||||
if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo)
|
||||
++i;
|
||||
else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo)
|
||||
++j;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream& char_set::display(std::ostream& out) const {
|
||||
if (m_ranges.empty()) {
|
||||
out << "{}";
|
||||
return out;
|
||||
}
|
||||
out << "{ ";
|
||||
bool first = true;
|
||||
for (auto const& r : m_ranges) {
|
||||
if (!first) out << ", ";
|
||||
first = false;
|
||||
if (r.is_unit()) {
|
||||
unsigned c = r.m_lo;
|
||||
if (c >= 'a' && c <= 'z')
|
||||
out << (char)c;
|
||||
else if (c >= 'A' && c <= 'Z')
|
||||
out << (char)c;
|
||||
else if (c >= '0' && c <= '9')
|
||||
out << (char)c;
|
||||
else
|
||||
out << "#[" << c << "]";
|
||||
} else {
|
||||
out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]";
|
||||
}
|
||||
}
|
||||
out << " }";
|
||||
return out;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ Author:
|
|||
#include <string>
|
||||
#include "util/buffer.h"
|
||||
#include "util/rational.h"
|
||||
#include "util/vector.h"
|
||||
|
||||
enum class string_encoding {
|
||||
ascii, // exactly 8 bits
|
||||
|
|
@ -93,3 +94,71 @@ public:
|
|||
friend std::ostream& operator<<(std::ostream &os, const zstring &str);
|
||||
friend bool operator<(const zstring& lhs, const zstring& rhs);
|
||||
};
|
||||
|
||||
// half-open character interval [lo, hi)
|
||||
// mirrors ZIPT's CharacterRange
|
||||
struct char_range {
|
||||
unsigned m_lo;
|
||||
unsigned m_hi; // exclusive
|
||||
|
||||
char_range(): m_lo(0), m_hi(0) {}
|
||||
char_range(unsigned c): m_lo(c), m_hi(c + 1) {}
|
||||
char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); }
|
||||
|
||||
bool is_empty() const { return m_lo == m_hi; }
|
||||
bool is_unit() const { return m_hi == m_lo + 1; }
|
||||
unsigned length() const { return m_hi - m_lo; }
|
||||
bool contains(unsigned c) const { return c >= m_lo && c < m_hi; }
|
||||
|
||||
bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; }
|
||||
bool operator<(char_range const& o) const {
|
||||
return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi);
|
||||
}
|
||||
};
|
||||
|
||||
// sorted list of non-overlapping character intervals
|
||||
// mirrors ZIPT's CharacterSet
|
||||
class char_set {
|
||||
svector<char_range> m_ranges;
|
||||
public:
|
||||
char_set() = default;
|
||||
explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); }
|
||||
|
||||
static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); }
|
||||
|
||||
bool is_empty() const { return m_ranges.empty(); }
|
||||
bool is_full(unsigned max_char) const {
|
||||
return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1;
|
||||
}
|
||||
bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); }
|
||||
unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; }
|
||||
|
||||
svector<char_range> const& ranges() const { return m_ranges; }
|
||||
|
||||
// total number of characters in the set
|
||||
unsigned char_count() const;
|
||||
|
||||
// membership test via binary search
|
||||
bool contains(unsigned c) const;
|
||||
|
||||
// add a single character
|
||||
void add(unsigned c);
|
||||
|
||||
// union with another char_set
|
||||
void add(char_set const& other);
|
||||
|
||||
// intersect with another char_set, returns the result
|
||||
char_set intersect_with(char_set const& other) const;
|
||||
|
||||
// complement relative to [0, max_char]
|
||||
char_set complement(unsigned max_char) const;
|
||||
|
||||
// check if two sets are disjoint
|
||||
bool is_disjoint(char_set const& other) const;
|
||||
|
||||
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
|
||||
|
||||
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }
|
||||
|
||||
std::ostream& display(std::ostream& out) const;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue