From e3922f9f2bb1d9a6dae5609a20854119545de25a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 16:33:45 +0000 Subject: [PATCH 1/2] Initial plan From 430a43045a12b3116642821bb74cea82421e5c1c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 16:57:43 +0000 Subject: [PATCH 2/2] Move char_range and char_set from seq_nielsen to zstring Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/seq/seq_nielsen.cpp | 143 ------------------------------------ src/smt/seq/seq_nielsen.h | 73 ------------------ src/util/zstring.cpp | 143 ++++++++++++++++++++++++++++++++++++ src/util/zstring.h | 69 +++++++++++++++++ 4 files changed, 212 insertions(+), 216 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index cb5e6aa56..9b1669589 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -148,149 +148,6 @@ namespace seq { return true; } - // ----------------------------------------------- - // char_set - // ----------------------------------------------- - - unsigned char_set::char_count() const { - unsigned count = 0; - for (auto const& r : m_ranges) - count += r.length(); - return count; - } - - bool char_set::contains(unsigned c) const { - // binary search over sorted non-overlapping ranges - int lo = 0, hi = static_cast(m_ranges.size()) - 1; - while (lo <= hi) { - int mid = lo + (hi - lo) / 2; - if (c < m_ranges[mid].m_lo) - hi = mid - 1; - else if (c >= m_ranges[mid].m_hi) - lo = mid + 1; - else - return true; - } - return false; - } - - void char_set::add(unsigned c) { - if (m_ranges.empty()) { - m_ranges.push_back(char_range(c)); - return; - } - // binary search for insertion point - int lo = 0, hi = static_cast(m_ranges.size()) - 1; - while (lo <= hi) { - int mid = lo + (hi - lo) / 2; - if (c < m_ranges[mid].m_lo) - hi = mid - 1; - else if (c >= m_ranges[mid].m_hi) - lo = mid + 1; - else - return; // already contained - } - // lo is the insertion point - unsigned idx = static_cast(lo); - bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c; - bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1; - if (merge_left && merge_right) { - m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi; - m_ranges.erase(m_ranges.begin() + idx); - } else if (merge_left) { - m_ranges[idx - 1].m_hi = c + 1; - } else if (merge_right) { - m_ranges[idx].m_lo = c; - } else { - // positional insert: shift elements right and place new element - m_ranges.push_back(char_range()); - for (unsigned k = m_ranges.size() - 1; k > idx; --k) - m_ranges[k] = m_ranges[k - 1]; - m_ranges[idx] = char_range(c); - } - } - - void char_set::add(char_set const& other) { - for (auto const& r : other.m_ranges) { - for (unsigned c = r.m_lo; c < r.m_hi; ++c) - add(c); - } - } - - char_set char_set::intersect_with(char_set const& other) const { - char_set result; - unsigned i = 0, j = 0; - while (i < m_ranges.size() && j < other.m_ranges.size()) { - unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo); - unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi); - if (lo < hi) - result.m_ranges.push_back(char_range(lo, hi)); - if (m_ranges[i].m_hi < other.m_ranges[j].m_hi) - ++i; - else - ++j; - } - return result; - } - - char_set char_set::complement(unsigned max_char) const { - char_set result; - if (m_ranges.empty()) { - result.m_ranges.push_back(char_range(0, max_char + 1)); - return result; - } - unsigned from = 0; - for (auto const& r : m_ranges) { - if (from < r.m_lo) - result.m_ranges.push_back(char_range(from, r.m_lo)); - from = r.m_hi; - } - if (from <= max_char) - result.m_ranges.push_back(char_range(from, max_char + 1)); - return result; - } - - bool char_set::is_disjoint(char_set const& other) const { - unsigned i = 0, j = 0; - while (i < m_ranges.size() && j < other.m_ranges.size()) { - if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo) - ++i; - else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo) - ++j; - else - return false; - } - return true; - } - - std::ostream& char_set::display(std::ostream& out) const { - if (m_ranges.empty()) { - out << "{}"; - return out; - } - out << "{ "; - bool first = true; - for (auto const& r : m_ranges) { - if (!first) out << ", "; - first = false; - if (r.is_unit()) { - unsigned c = r.m_lo; - if (c >= 'a' && c <= 'z') - out << (char)c; - else if (c >= 'A' && c <= 'Z') - out << (char)c; - else if (c >= '0' && c <= '9') - out << (char)c; - else - out << "#[" << c << "]"; - } else { - out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]"; - } - } - out << " }"; - return out; - } - // ----------------------------------------------- // nielsen_edge // ----------------------------------------------- diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 5b7ac2b21..61e938983 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -294,79 +294,6 @@ namespace seq { bool operator!=(dep_tracker const& other) const { return !(*this == other); } }; - // ----------------------------------------------- - // character range and set types - // mirrors ZIPT's CharacterRange and CharacterSet - // ----------------------------------------------- - - // half-open character interval [lo, hi) - // mirrors ZIPT's CharacterRange - struct char_range { - unsigned m_lo; - unsigned m_hi; // exclusive - - char_range(): m_lo(0), m_hi(0) {} - char_range(unsigned c): m_lo(c), m_hi(c + 1) {} - char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); } - - bool is_empty() const { return m_lo == m_hi; } - bool is_unit() const { return m_hi == m_lo + 1; } - unsigned length() const { return m_hi - m_lo; } - bool contains(unsigned c) const { return c >= m_lo && c < m_hi; } - - bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; } - bool operator<(char_range const& o) const { - return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi); - } - }; - - // sorted list of non-overlapping character intervals - // mirrors ZIPT's CharacterSet - class char_set { - svector m_ranges; - public: - char_set() = default; - explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); } - - static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); } - - bool is_empty() const { return m_ranges.empty(); } - bool is_full(unsigned max_char) const { - return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1; - } - bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); } - unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; } - - svector const& ranges() const { return m_ranges; } - - // total number of characters in the set - unsigned char_count() const; - - // membership test via binary search - bool contains(unsigned c) const; - - // add a single character - void add(unsigned c); - - // union with another char_set - void add(char_set const& other); - - // intersect with another char_set, returns the result - char_set intersect_with(char_set const& other) const; - - // complement relative to [0, max_char] - char_set complement(unsigned max_char) const; - - // check if two sets are disjoint - bool is_disjoint(char_set const& other) const; - - bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; } - - char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; } - - std::ostream& display(std::ostream& out) const; - }; - // ----------------------------------------------- // character-level substitution // mirrors ZIPT's CharSubst diff --git a/src/util/zstring.cpp b/src/util/zstring.cpp index f60b8d946..fa6dde45a 100644 --- a/src/util/zstring.cpp +++ b/src/util/zstring.cpp @@ -286,3 +286,146 @@ bool operator<(const zstring& lhs, const zstring& rhs) { // so decide based on the relative lengths return lhs.length() < rhs.length(); } + +// ----------------------------------------------- +// char_set +// ----------------------------------------------- + +unsigned char_set::char_count() const { + unsigned count = 0; + for (auto const& r : m_ranges) + count += r.length(); + return count; +} + +bool char_set::contains(unsigned c) const { + // binary search over sorted non-overlapping ranges + int lo = 0, hi = static_cast(m_ranges.size()) - 1; + while (lo <= hi) { + int mid = lo + (hi - lo) / 2; + if (c < m_ranges[mid].m_lo) + hi = mid - 1; + else if (c >= m_ranges[mid].m_hi) + lo = mid + 1; + else + return true; + } + return false; +} + +void char_set::add(unsigned c) { + if (m_ranges.empty()) { + m_ranges.push_back(char_range(c)); + return; + } + // binary search for insertion point + int lo = 0, hi = static_cast(m_ranges.size()) - 1; + while (lo <= hi) { + int mid = lo + (hi - lo) / 2; + if (c < m_ranges[mid].m_lo) + hi = mid - 1; + else if (c >= m_ranges[mid].m_hi) + lo = mid + 1; + else + return; // already contained + } + // lo is the insertion point + unsigned idx = static_cast(lo); + bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c; + bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1; + if (merge_left && merge_right) { + m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi; + m_ranges.erase(m_ranges.begin() + idx); + } else if (merge_left) { + m_ranges[idx - 1].m_hi = c + 1; + } else if (merge_right) { + m_ranges[idx].m_lo = c; + } else { + // positional insert: shift elements right and place new element + m_ranges.push_back(char_range()); + for (unsigned k = m_ranges.size() - 1; k > idx; --k) + m_ranges[k] = m_ranges[k - 1]; + m_ranges[idx] = char_range(c); + } +} + +void char_set::add(char_set const& other) { + for (auto const& r : other.m_ranges) { + for (unsigned c = r.m_lo; c < r.m_hi; ++c) + add(c); + } +} + +char_set char_set::intersect_with(char_set const& other) const { + char_set result; + unsigned i = 0, j = 0; + while (i < m_ranges.size() && j < other.m_ranges.size()) { + unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo); + unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi); + if (lo < hi) + result.m_ranges.push_back(char_range(lo, hi)); + if (m_ranges[i].m_hi < other.m_ranges[j].m_hi) + ++i; + else + ++j; + } + return result; +} + +char_set char_set::complement(unsigned max_char) const { + char_set result; + if (m_ranges.empty()) { + result.m_ranges.push_back(char_range(0, max_char + 1)); + return result; + } + unsigned from = 0; + for (auto const& r : m_ranges) { + if (from < r.m_lo) + result.m_ranges.push_back(char_range(from, r.m_lo)); + from = r.m_hi; + } + if (from <= max_char) + result.m_ranges.push_back(char_range(from, max_char + 1)); + return result; +} + +bool char_set::is_disjoint(char_set const& other) const { + unsigned i = 0, j = 0; + while (i < m_ranges.size() && j < other.m_ranges.size()) { + if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo) + ++i; + else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo) + ++j; + else + return false; + } + return true; +} + +std::ostream& char_set::display(std::ostream& out) const { + if (m_ranges.empty()) { + out << "{}"; + return out; + } + out << "{ "; + bool first = true; + for (auto const& r : m_ranges) { + if (!first) out << ", "; + first = false; + if (r.is_unit()) { + unsigned c = r.m_lo; + if (c >= 'a' && c <= 'z') + out << (char)c; + else if (c >= 'A' && c <= 'Z') + out << (char)c; + else if (c >= '0' && c <= '9') + out << (char)c; + else + out << "#[" << c << "]"; + } else { + out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]"; + } + } + out << " }"; + return out; +} diff --git a/src/util/zstring.h b/src/util/zstring.h index 44dfcd36c..f7df66455 100644 --- a/src/util/zstring.h +++ b/src/util/zstring.h @@ -20,6 +20,7 @@ Author: #include #include "util/buffer.h" #include "util/rational.h" +#include "util/vector.h" enum class string_encoding { ascii, // exactly 8 bits @@ -93,3 +94,71 @@ public: friend std::ostream& operator<<(std::ostream &os, const zstring &str); friend bool operator<(const zstring& lhs, const zstring& rhs); }; + +// half-open character interval [lo, hi) +// mirrors ZIPT's CharacterRange +struct char_range { + unsigned m_lo; + unsigned m_hi; // exclusive + + char_range(): m_lo(0), m_hi(0) {} + char_range(unsigned c): m_lo(c), m_hi(c + 1) {} + char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); } + + bool is_empty() const { return m_lo == m_hi; } + bool is_unit() const { return m_hi == m_lo + 1; } + unsigned length() const { return m_hi - m_lo; } + bool contains(unsigned c) const { return c >= m_lo && c < m_hi; } + + bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; } + bool operator<(char_range const& o) const { + return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi); + } +}; + +// sorted list of non-overlapping character intervals +// mirrors ZIPT's CharacterSet +class char_set { + svector m_ranges; +public: + char_set() = default; + explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); } + + static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); } + + bool is_empty() const { return m_ranges.empty(); } + bool is_full(unsigned max_char) const { + return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1; + } + bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); } + unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; } + + svector const& ranges() const { return m_ranges; } + + // total number of characters in the set + unsigned char_count() const; + + // membership test via binary search + bool contains(unsigned c) const; + + // add a single character + void add(unsigned c); + + // union with another char_set + void add(char_set const& other); + + // intersect with another char_set, returns the result + char_set intersect_with(char_set const& other) const; + + // complement relative to [0, max_char] + char_set complement(unsigned max_char) const; + + // check if two sets are disjoint + bool is_disjoint(char_set const& other) const; + + bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; } + + char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; } + + std::ostream& display(std::ostream& out) const; +};