3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-11 07:40:31 +00:00

Merge pull request #8923 from Z3Prover/copilot/host-char-range-and-set

Move char_range and char_set from seq_nielsen to zstring
This commit is contained in:
Nikolaj Bjorner 2026-03-10 10:18:58 -07:00 committed by GitHub
commit 3f0e821207
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 212 additions and 216 deletions

View file

@ -95,149 +95,6 @@ namespace seq {
return true;
}
// -----------------------------------------------
// char_set
// -----------------------------------------------
unsigned char_set::char_count() const {
unsigned count = 0;
for (auto const& r : m_ranges)
count += r.length();
return count;
}
bool char_set::contains(unsigned c) const {
// binary search over sorted non-overlapping ranges
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
while (lo <= hi) {
int mid = lo + (hi - lo) / 2;
if (c < m_ranges[mid].m_lo)
hi = mid - 1;
else if (c >= m_ranges[mid].m_hi)
lo = mid + 1;
else
return true;
}
return false;
}
void char_set::add(unsigned c) {
if (m_ranges.empty()) {
m_ranges.push_back(char_range(c));
return;
}
// binary search for insertion point
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
while (lo <= hi) {
int mid = lo + (hi - lo) / 2;
if (c < m_ranges[mid].m_lo)
hi = mid - 1;
else if (c >= m_ranges[mid].m_hi)
lo = mid + 1;
else
return; // already contained
}
// lo is the insertion point
unsigned idx = static_cast<unsigned>(lo);
bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c;
bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1;
if (merge_left && merge_right) {
m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi;
m_ranges.erase(m_ranges.begin() + idx);
} else if (merge_left) {
m_ranges[idx - 1].m_hi = c + 1;
} else if (merge_right) {
m_ranges[idx].m_lo = c;
} else {
// positional insert: shift elements right and place new element
m_ranges.push_back(char_range());
for (unsigned k = m_ranges.size() - 1; k > idx; --k)
m_ranges[k] = m_ranges[k - 1];
m_ranges[idx] = char_range(c);
}
}
void char_set::add(char_set const& other) {
for (auto const& r : other.m_ranges) {
for (unsigned c = r.m_lo; c < r.m_hi; ++c)
add(c);
}
}
char_set char_set::intersect_with(char_set const& other) const {
char_set result;
unsigned i = 0, j = 0;
while (i < m_ranges.size() && j < other.m_ranges.size()) {
unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo);
unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi);
if (lo < hi)
result.m_ranges.push_back(char_range(lo, hi));
if (m_ranges[i].m_hi < other.m_ranges[j].m_hi)
++i;
else
++j;
}
return result;
}
char_set char_set::complement(unsigned max_char) const {
char_set result;
if (m_ranges.empty()) {
result.m_ranges.push_back(char_range(0, max_char + 1));
return result;
}
unsigned from = 0;
for (auto const& r : m_ranges) {
if (from < r.m_lo)
result.m_ranges.push_back(char_range(from, r.m_lo));
from = r.m_hi;
}
if (from <= max_char)
result.m_ranges.push_back(char_range(from, max_char + 1));
return result;
}
bool char_set::is_disjoint(char_set const& other) const {
unsigned i = 0, j = 0;
while (i < m_ranges.size() && j < other.m_ranges.size()) {
if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo)
++i;
else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo)
++j;
else
return false;
}
return true;
}
std::ostream& char_set::display(std::ostream& out) const {
if (m_ranges.empty()) {
out << "{}";
return out;
}
out << "{ ";
bool first = true;
for (auto const& r : m_ranges) {
if (!first) out << ", ";
first = false;
if (r.is_unit()) {
unsigned c = r.m_lo;
if (c >= 'a' && c <= 'z')
out << (char)c;
else if (c >= 'A' && c <= 'Z')
out << (char)c;
else if (c >= '0' && c <= '9')
out << (char)c;
else
out << "#[" << c << "]";
} else {
out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]";
}
}
out << " }";
return out;
}
// -----------------------------------------------
// nielsen_edge
// -----------------------------------------------

View file

@ -276,79 +276,6 @@ namespace seq {
// mirrors ZIPT's DependencyTracker
using dep_tracker = uint_set;
// -----------------------------------------------
// character range and set types
// mirrors ZIPT's CharacterRange and CharacterSet
// -----------------------------------------------
// half-open character interval [lo, hi)
// mirrors ZIPT's CharacterRange
struct char_range {
unsigned m_lo;
unsigned m_hi; // exclusive
char_range(): m_lo(0), m_hi(0) {}
char_range(unsigned c): m_lo(c), m_hi(c + 1) {}
char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); }
bool is_empty() const { return m_lo == m_hi; }
bool is_unit() const { return m_hi == m_lo + 1; }
unsigned length() const { return m_hi - m_lo; }
bool contains(unsigned c) const { return c >= m_lo && c < m_hi; }
bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; }
bool operator<(char_range const& o) const {
return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi);
}
};
// sorted list of non-overlapping character intervals
// mirrors ZIPT's CharacterSet
class char_set {
svector<char_range> m_ranges;
public:
char_set() = default;
explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); }
static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); }
bool is_empty() const { return m_ranges.empty(); }
bool is_full(unsigned max_char) const {
return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1;
}
bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); }
unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; }
svector<char_range> const& ranges() const { return m_ranges; }
// total number of characters in the set
unsigned char_count() const;
// membership test via binary search
bool contains(unsigned c) const;
// add a single character
void add(unsigned c);
// union with another char_set
void add(char_set const& other);
// intersect with another char_set, returns the result
char_set intersect_with(char_set const& other) const;
// complement relative to [0, max_char]
char_set complement(unsigned max_char) const;
// check if two sets are disjoint
bool is_disjoint(char_set const& other) const;
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }
std::ostream& display(std::ostream& out) const;
};
// -----------------------------------------------
// character-level substitution
// mirrors ZIPT's CharSubst

View file

@ -286,3 +286,146 @@ bool operator<(const zstring& lhs, const zstring& rhs) {
// so decide based on the relative lengths
return lhs.length() < rhs.length();
}
// -----------------------------------------------
// char_set
// -----------------------------------------------
unsigned char_set::char_count() const {
unsigned count = 0;
for (auto const& r : m_ranges)
count += r.length();
return count;
}
bool char_set::contains(unsigned c) const {
// binary search over sorted non-overlapping ranges
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
while (lo <= hi) {
int mid = lo + (hi - lo) / 2;
if (c < m_ranges[mid].m_lo)
hi = mid - 1;
else if (c >= m_ranges[mid].m_hi)
lo = mid + 1;
else
return true;
}
return false;
}
void char_set::add(unsigned c) {
if (m_ranges.empty()) {
m_ranges.push_back(char_range(c));
return;
}
// binary search for insertion point
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
while (lo <= hi) {
int mid = lo + (hi - lo) / 2;
if (c < m_ranges[mid].m_lo)
hi = mid - 1;
else if (c >= m_ranges[mid].m_hi)
lo = mid + 1;
else
return; // already contained
}
// lo is the insertion point
unsigned idx = static_cast<unsigned>(lo);
bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c;
bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1;
if (merge_left && merge_right) {
m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi;
m_ranges.erase(m_ranges.begin() + idx);
} else if (merge_left) {
m_ranges[idx - 1].m_hi = c + 1;
} else if (merge_right) {
m_ranges[idx].m_lo = c;
} else {
// positional insert: shift elements right and place new element
m_ranges.push_back(char_range());
for (unsigned k = m_ranges.size() - 1; k > idx; --k)
m_ranges[k] = m_ranges[k - 1];
m_ranges[idx] = char_range(c);
}
}
void char_set::add(char_set const& other) {
for (auto const& r : other.m_ranges) {
for (unsigned c = r.m_lo; c < r.m_hi; ++c)
add(c);
}
}
char_set char_set::intersect_with(char_set const& other) const {
char_set result;
unsigned i = 0, j = 0;
while (i < m_ranges.size() && j < other.m_ranges.size()) {
unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo);
unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi);
if (lo < hi)
result.m_ranges.push_back(char_range(lo, hi));
if (m_ranges[i].m_hi < other.m_ranges[j].m_hi)
++i;
else
++j;
}
return result;
}
char_set char_set::complement(unsigned max_char) const {
char_set result;
if (m_ranges.empty()) {
result.m_ranges.push_back(char_range(0, max_char + 1));
return result;
}
unsigned from = 0;
for (auto const& r : m_ranges) {
if (from < r.m_lo)
result.m_ranges.push_back(char_range(from, r.m_lo));
from = r.m_hi;
}
if (from <= max_char)
result.m_ranges.push_back(char_range(from, max_char + 1));
return result;
}
bool char_set::is_disjoint(char_set const& other) const {
unsigned i = 0, j = 0;
while (i < m_ranges.size() && j < other.m_ranges.size()) {
if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo)
++i;
else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo)
++j;
else
return false;
}
return true;
}
std::ostream& char_set::display(std::ostream& out) const {
if (m_ranges.empty()) {
out << "{}";
return out;
}
out << "{ ";
bool first = true;
for (auto const& r : m_ranges) {
if (!first) out << ", ";
first = false;
if (r.is_unit()) {
unsigned c = r.m_lo;
if (c >= 'a' && c <= 'z')
out << (char)c;
else if (c >= 'A' && c <= 'Z')
out << (char)c;
else if (c >= '0' && c <= '9')
out << (char)c;
else
out << "#[" << c << "]";
} else {
out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]";
}
}
out << " }";
return out;
}

View file

@ -20,6 +20,7 @@ Author:
#include <string>
#include "util/buffer.h"
#include "util/rational.h"
#include "util/vector.h"
enum class string_encoding {
ascii, // exactly 8 bits
@ -93,3 +94,71 @@ public:
friend std::ostream& operator<<(std::ostream &os, const zstring &str);
friend bool operator<(const zstring& lhs, const zstring& rhs);
};
// half-open character interval [lo, hi)
// mirrors ZIPT's CharacterRange
struct char_range {
unsigned m_lo;
unsigned m_hi; // exclusive
char_range(): m_lo(0), m_hi(0) {}
char_range(unsigned c): m_lo(c), m_hi(c + 1) {}
char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); }
bool is_empty() const { return m_lo == m_hi; }
bool is_unit() const { return m_hi == m_lo + 1; }
unsigned length() const { return m_hi - m_lo; }
bool contains(unsigned c) const { return c >= m_lo && c < m_hi; }
bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; }
bool operator<(char_range const& o) const {
return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi);
}
};
// sorted list of non-overlapping character intervals
// mirrors ZIPT's CharacterSet
class char_set {
svector<char_range> m_ranges;
public:
char_set() = default;
explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); }
static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); }
bool is_empty() const { return m_ranges.empty(); }
bool is_full(unsigned max_char) const {
return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1;
}
bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); }
unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; }
svector<char_range> const& ranges() const { return m_ranges; }
// total number of characters in the set
unsigned char_count() const;
// membership test via binary search
bool contains(unsigned c) const;
// add a single character
void add(unsigned c);
// union with another char_set
void add(char_set const& other);
// intersect with another char_set, returns the result
char_set intersect_with(char_set const& other) const;
// complement relative to [0, max_char]
char_set complement(unsigned max_char) const;
// check if two sets are disjoint
bool is_disjoint(char_set const& other) const;
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }
std::ostream& display(std::ostream& out) const;
};