3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-11 15:50:29 +00:00

Merge branch 'c3' into copilot/replace-lp-solver-with-abstract-solver

This commit is contained in:
Nikolaj Bjorner 2026-03-10 10:35:44 -07:00 committed by GitHub
commit 2e636ec25d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 270 additions and 291 deletions

View file

@ -15,8 +15,8 @@ Abstract:
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/

View file

@ -32,8 +32,8 @@ Abstract:
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/

View file

@ -11,8 +11,8 @@ Abstract:
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/

View file

@ -49,8 +49,8 @@ Abstract:
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/

View file

@ -16,8 +16,8 @@ Abstract:
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/

View file

@ -14,8 +14,8 @@ Abstract:
Author:
Nikolaj Bjorner (nbjorner) 2026-03-02
Clemens Eisenhofer 2026-03-02
Nikolaj Bjorner (nbjorner) 2026-03-02
--*/
@ -23,7 +23,6 @@ Author:
#include "ast/arith_decl_plugin.h"
#include "ast/ast_pp.h"
#include "math/lp/lar_solver.h"
#include "util/bit_util.h"
#include "util/hashtable.h"
#include <algorithm>
#include <cstdlib>
@ -321,149 +320,6 @@ namespace seq {
return true;
}
// -----------------------------------------------
// char_set
// -----------------------------------------------
unsigned char_set::char_count() const {
unsigned count = 0;
for (auto const& r : m_ranges)
count += r.length();
return count;
}
bool char_set::contains(unsigned c) const {
// binary search over sorted non-overlapping ranges
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
while (lo <= hi) {
int mid = lo + (hi - lo) / 2;
if (c < m_ranges[mid].m_lo)
hi = mid - 1;
else if (c >= m_ranges[mid].m_hi)
lo = mid + 1;
else
return true;
}
return false;
}
void char_set::add(unsigned c) {
if (m_ranges.empty()) {
m_ranges.push_back(char_range(c));
return;
}
// binary search for insertion point
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
while (lo <= hi) {
int mid = lo + (hi - lo) / 2;
if (c < m_ranges[mid].m_lo)
hi = mid - 1;
else if (c >= m_ranges[mid].m_hi)
lo = mid + 1;
else
return; // already contained
}
// lo is the insertion point
unsigned idx = static_cast<unsigned>(lo);
bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c;
bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1;
if (merge_left && merge_right) {
m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi;
m_ranges.erase(m_ranges.begin() + idx);
} else if (merge_left) {
m_ranges[idx - 1].m_hi = c + 1;
} else if (merge_right) {
m_ranges[idx].m_lo = c;
} else {
// positional insert: shift elements right and place new element
m_ranges.push_back(char_range());
for (unsigned k = m_ranges.size() - 1; k > idx; --k)
m_ranges[k] = m_ranges[k - 1];
m_ranges[idx] = char_range(c);
}
}
void char_set::add(char_set const& other) {
for (auto const& r : other.m_ranges) {
for (unsigned c = r.m_lo; c < r.m_hi; ++c)
add(c);
}
}
char_set char_set::intersect_with(char_set const& other) const {
char_set result;
unsigned i = 0, j = 0;
while (i < m_ranges.size() && j < other.m_ranges.size()) {
unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo);
unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi);
if (lo < hi)
result.m_ranges.push_back(char_range(lo, hi));
if (m_ranges[i].m_hi < other.m_ranges[j].m_hi)
++i;
else
++j;
}
return result;
}
char_set char_set::complement(unsigned max_char) const {
char_set result;
if (m_ranges.empty()) {
result.m_ranges.push_back(char_range(0, max_char + 1));
return result;
}
unsigned from = 0;
for (auto const& r : m_ranges) {
if (from < r.m_lo)
result.m_ranges.push_back(char_range(from, r.m_lo));
from = r.m_hi;
}
if (from <= max_char)
result.m_ranges.push_back(char_range(from, max_char + 1));
return result;
}
bool char_set::is_disjoint(char_set const& other) const {
unsigned i = 0, j = 0;
while (i < m_ranges.size() && j < other.m_ranges.size()) {
if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo)
++i;
else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo)
++j;
else
return false;
}
return true;
}
std::ostream& char_set::display(std::ostream& out) const {
if (m_ranges.empty()) {
out << "{}";
return out;
}
out << "{ ";
bool first = true;
for (auto const& r : m_ranges) {
if (!first) out << ", ";
first = false;
if (r.is_unit()) {
unsigned c = r.m_lo;
if (c >= 'a' && c <= 'z')
out << (char)c;
else if (c >= 'A' && c <= 'Z')
out << (char)c;
else if (c >= '0' && c <= '9')
out << (char)c;
else
out << "#[" << c << "]";
} else {
out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]";
}
}
out << " }";
return out;
}
// -----------------------------------------------
// nielsen_edge
// -----------------------------------------------
@ -511,7 +367,7 @@ namespace seq {
str_eq& eq = m_str_eq[i];
eq.m_lhs = sg.subst(eq.m_lhs, s.m_var, s.m_replacement);
eq.m_rhs = sg.subst(eq.m_rhs, s.m_var, s.m_replacement);
eq.m_dep.merge(s.m_dep);
eq.m_dep |= s.m_dep;
eq.sort();
}
for (unsigned i = 0; i < m_str_mem.size(); ++i) {
@ -519,7 +375,7 @@ namespace seq {
mem.m_str = sg.subst(mem.m_str, s.m_var, s.m_replacement);
// regex is typically ground, but apply subst for generality
mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement);
mem.m_dep.merge(s.m_dep);
mem.m_dep |= s.m_dep;
}
}
@ -654,8 +510,8 @@ namespace seq {
void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) {
if (!m_root)
m_root = mk_node();
dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
m_root->str_eqs().size());
dep_tracker dep;
dep.insert(m_root->str_eqs().size());
str_eq eq(lhs, rhs, dep);
eq.sort();
m_root->add_str_eq(eq);
@ -665,8 +521,8 @@ namespace seq {
void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) {
if (!m_root)
m_root = mk_node();
dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
m_root->str_eqs().size() + m_root->str_mems().size());
dep_tracker dep;
dep.insert(m_root->str_eqs().size() + m_root->str_mems().size());
euf::snode* history = m_sg.mk_empty();
unsigned id = next_mem_id();
m_root->add_str_mem(str_mem(str, regex, history, id, dep));
@ -2574,9 +2430,9 @@ namespace seq {
if (!n->is_currently_conflict())
continue;
for (str_eq const& eq : n->str_eqs())
deps.merge(eq.m_dep);
deps |= eq.m_dep;
for (str_mem const& mem : n->str_mems())
deps.merge(mem.m_dep);
deps |= mem.m_dep;
}
}
@ -2584,9 +2440,7 @@ namespace seq {
SASSERT(m_root);
dep_tracker deps;
collect_conflict_deps(deps);
unsigned_vector bits;
deps.get_set_bits(bits);
for (unsigned b : bits) {
for (unsigned b : deps) {
if (b < m_num_input_eqs)
eq_indices.push_back(b);
else

View file

@ -98,10 +98,8 @@ Abstract:
------------------------------------
1. dep_tracker (DependencyTracker): ZIPT's DependencyTracker is a .NET
class using a BitArray-like structure for tracking constraint origins.
Z3's dep_tracker uses a dense bitvector stored as svector<unsigned>
(32-bit words). The merge/is_superset/empty semantics are equivalent,
but the representation is more cache-friendly and avoids managed-heap
allocation.
Z3 uses uint_set (a dense bitvector from util/uint_set.h) for the same
purpose. The |=/subset_of/empty semantics are equivalent.
2. Substitution application (nielsen_node::apply_subst): ZIPT uses an
immutable, functional style -- Apply() returns a new constraint if
@ -223,8 +221,8 @@ Abstract:
Author:
Nikolaj Bjorner (nbjorner) 2026-03-02
Clemens Eisenhofer 2026-03-02
Nikolaj Bjorner (nbjorner) 2026-03-02
--*/
@ -293,96 +291,7 @@ namespace seq {
// dependency tracker: bitvector tracking which input constraints
// contributed to deriving a given constraint
// mirrors ZIPT's DependencyTracker
class dep_tracker {
svector<unsigned> m_bits;
public:
dep_tracker() = default;
explicit dep_tracker(unsigned num_bits);
dep_tracker(unsigned num_bits, unsigned set_bit);
void merge(dep_tracker const& other);
bool is_superset(dep_tracker const& other) const;
bool empty() const;
// collect indices of all set bits into 'indices'
void get_set_bits(unsigned_vector& indices) const;
bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; }
bool operator!=(dep_tracker const& other) const { return !(*this == other); }
};
// -----------------------------------------------
// character range and set types
// mirrors ZIPT's CharacterRange and CharacterSet
// -----------------------------------------------
// half-open character interval [lo, hi)
// mirrors ZIPT's CharacterRange
struct char_range {
unsigned m_lo;
unsigned m_hi; // exclusive
char_range(): m_lo(0), m_hi(0) {}
char_range(unsigned c): m_lo(c), m_hi(c + 1) {}
char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); }
bool is_empty() const { return m_lo == m_hi; }
bool is_unit() const { return m_hi == m_lo + 1; }
unsigned length() const { return m_hi - m_lo; }
bool contains(unsigned c) const { return c >= m_lo && c < m_hi; }
bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; }
bool operator<(char_range const& o) const {
return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi);
}
};
// sorted list of non-overlapping character intervals
// mirrors ZIPT's CharacterSet
class char_set {
svector<char_range> m_ranges;
public:
char_set() = default;
explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); }
static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); }
bool is_empty() const { return m_ranges.empty(); }
bool is_full(unsigned max_char) const {
return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1;
}
bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); }
unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; }
svector<char_range> const& ranges() const { return m_ranges; }
// total number of characters in the set
unsigned char_count() const;
// membership test via binary search
bool contains(unsigned c) const;
// add a single character
void add(unsigned c);
// union with another char_set
void add(char_set const& other);
// intersect with another char_set, returns the result
char_set intersect_with(char_set const& other) const;
// complement relative to [0, max_char]
char_set complement(unsigned max_char) const;
// check if two sets are disjoint
bool is_disjoint(char_set const& other) const;
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }
std::ostream& display(std::ostream& out) const;
};
using dep_tracker = uint_set;
// -----------------------------------------------
// character-level substitution

View file

@ -450,10 +450,8 @@ namespace smt {
void theory_nseq::deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits) {
context& ctx = get_context();
unsigned_vector bits;
deps.get_set_bits(bits);
unsigned num_input_eqs = m_nielsen.num_input_eqs();
for (unsigned b : bits) {
for (unsigned b : deps) {
if (b < num_input_eqs) {
eq_source const& src = m_state.get_eq_source(b);
if (src.m_n1->get_root() == src.m_n2->get_root())

View file

@ -22,7 +22,7 @@ Abstract:
#include "ast/ast_pp.h"
#include <iostream>
// test dep_tracker basic operations
// test dep_tracker (uint_set) basic operations
static void test_dep_tracker() {
std::cout << "test_dep_tracker\n";
@ -31,23 +31,26 @@ static void test_dep_tracker() {
SASSERT(d0.empty());
// tracker with one bit set
seq::dep_tracker d1(8, 3);
seq::dep_tracker d1;
d1.insert(3);
SASSERT(!d1.empty());
// tracker with another bit
seq::dep_tracker d2(8, 5);
seq::dep_tracker d2;
d2.insert(5);
SASSERT(!d2.empty());
// merge
seq::dep_tracker d3 = d1;
d3.merge(d2);
d3 |= d2;
SASSERT(!d3.empty());
SASSERT(d3.is_superset(d1));
SASSERT(d3.is_superset(d2));
SASSERT(!d1.is_superset(d2));
SASSERT(d1.subset_of(d3));
SASSERT(d2.subset_of(d3));
SASSERT(!d2.subset_of(d1));
// equality
seq::dep_tracker d4(8, 3);
seq::dep_tracker d4;
d4.insert(3);
SASSERT(d1 == d4);
SASSERT(d1 != d2);
}
@ -65,7 +68,7 @@ static void test_str_eq() {
euf::snode* a = sg.mk_char('A');
euf::snode* e = sg.mk_empty();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
// basic equality
seq::str_eq eq1(x, y, dep);
@ -112,7 +115,7 @@ static void test_str_mem() {
expr_ref star_fc(seq.re.mk_full_seq(str_sort), m);
euf::snode* regex = sg.mk(star_fc);
seq::dep_tracker dep(4, 1);
seq::dep_tracker dep; dep.insert(1);
seq::str_mem mem(x, regex, e, 0, dep);
// x in regex is primitive (x is a single variable)
@ -1322,37 +1325,40 @@ static void test_solve_conflict_deps() {
SASSERT(!deps.empty());
}
// test dep_tracker::get_set_bits
// test dep_tracker (uint_set) iteration
static void test_dep_tracker_get_set_bits() {
std::cout << "test_dep_tracker_get_set_bits\n";
// empty tracker has no bits
seq::dep_tracker d0;
unsigned_vector bits0;
d0.get_set_bits(bits0);
for (unsigned b : d0) bits0.push_back(b);
SASSERT(bits0.empty());
// single bit set at position 5
seq::dep_tracker d1(16, 5);
seq::dep_tracker d1;
d1.insert(5);
unsigned_vector bits1;
d1.get_set_bits(bits1);
for (unsigned b : d1) bits1.push_back(b);
SASSERT(bits1.size() == 1);
SASSERT(bits1[0] == 5);
// two bits merged
seq::dep_tracker d2(16, 3);
d2.merge(seq::dep_tracker(16, 11));
seq::dep_tracker d2;
d2.insert(3);
d2.insert(11);
unsigned_vector bits2;
d2.get_set_bits(bits2);
for (unsigned b : d2) bits2.push_back(b);
SASSERT(bits2.size() == 2);
SASSERT(bits2[0] == 3);
SASSERT(bits2[1] == 11);
// test across word boundary (bit 31 and 32)
seq::dep_tracker d3(64, 31);
d3.merge(seq::dep_tracker(64, 32));
seq::dep_tracker d3;
d3.insert(31);
d3.insert(32);
unsigned_vector bits3;
d3.get_set_bits(bits3);
for (unsigned b : d3) bits3.push_back(b);
SASSERT(bits3.size() == 2);
SASSERT(bits3[0] == 31);
SASSERT(bits3[1] == 32);
@ -1543,7 +1549,7 @@ static void test_simplify_prefix_cancel() {
euf::snode* aby = sg.mk_concat(a, sg.mk_concat(b, y));
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_eq(seq::str_eq(abx, aby, dep));
auto sr = node->simplify_and_init(ng);
@ -1573,7 +1579,7 @@ static void test_simplify_symbol_clash() {
euf::snode* by = sg.mk_concat(b, y);
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_eq(seq::str_eq(ax, by, dep));
auto sr = node->simplify_and_init(ng);
@ -1598,7 +1604,7 @@ static void test_simplify_empty_propagation() {
// ε = x·y → forces x=ε, y=ε → all trivial → satisfied
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_eq(seq::str_eq(e, xy, dep));
auto sr = node->simplify_and_init(ng);
@ -1619,7 +1625,7 @@ static void test_simplify_empty_vs_char() {
// ε = A → rhs has non-variable token → conflict
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_eq(seq::str_eq(e, a, dep));
auto sr = node->simplify_and_init(ng);
@ -1645,7 +1651,7 @@ static void test_simplify_multi_pass_clash() {
euf::snode* ac = sg.mk_concat(a, c);
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_eq(seq::str_eq(ab, ac, dep));
auto sr = node->simplify_and_init(ng);
@ -1667,7 +1673,7 @@ static void test_simplify_trivial_removal() {
euf::snode* e = sg.mk_empty();
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_eq(seq::str_eq(e, e, dep)); // trivial
node->add_str_eq(seq::str_eq(x, y, dep)); // non-trivial
@ -1689,7 +1695,7 @@ static void test_simplify_all_trivial_satisfied() {
euf::snode* e = sg.mk_empty();
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_eq(seq::str_eq(x, x, dep)); // trivial: same pointer
node->add_str_eq(seq::str_eq(e, e, dep)); // trivial: both empty
@ -1716,7 +1722,7 @@ static void test_simplify_regex_infeasible() {
// ε ∈ to_re("A") → non-nullable → conflict
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_mem(seq::str_mem(e, regex, e, 0, dep));
auto sr = node->simplify_and_init(ng);
@ -1744,7 +1750,7 @@ static void test_simplify_nullable_removal() {
// ε ∈ star(to_re("A")) → nullable → satisfied, mem removed
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_mem(seq::str_mem(e, regex, e, 0, dep));
auto sr = node->simplify_and_init(ng);
@ -1772,7 +1778,7 @@ static void test_simplify_brzozowski_sat() {
// "A" ∈ to_re("A") → derivative consumes 'A' → ε ∈ ε-regex → satisfied
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(4, 0);
seq::dep_tracker dep; dep.insert(0);
node->add_str_mem(seq::str_mem(a, regex, e, 0, dep));
auto sr = node->simplify_and_init(ng);
@ -1795,7 +1801,7 @@ static void test_simplify_multiple_eqs() {
euf::snode* e = sg.mk_empty();
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep(8, 0);
seq::dep_tracker dep; dep.insert(0);
// eq1: ε = ε (trivial → removed)
node->add_str_eq(seq::str_eq(e, e, dep));
@ -2234,7 +2240,7 @@ static void test_length_constraints_deps() {
for (auto const& c : constraints) {
SASSERT(!c.m_dep.empty());
unsigned_vector bits;
c.m_dep.get_set_bits(bits);
for (unsigned b : c.m_dep) bits.push_back(b);
SASSERT(bits.size() == 1);
SASSERT(bits[0] == 0);
}

View file

@ -286,3 +286,146 @@ bool operator<(const zstring& lhs, const zstring& rhs) {
// so decide based on the relative lengths
return lhs.length() < rhs.length();
}
// -----------------------------------------------
// char_set
// -----------------------------------------------
unsigned char_set::char_count() const {
unsigned count = 0;
for (auto const& r : m_ranges)
count += r.length();
return count;
}
bool char_set::contains(unsigned c) const {
// binary search over sorted non-overlapping ranges
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
while (lo <= hi) {
int mid = lo + (hi - lo) / 2;
if (c < m_ranges[mid].m_lo)
hi = mid - 1;
else if (c >= m_ranges[mid].m_hi)
lo = mid + 1;
else
return true;
}
return false;
}
void char_set::add(unsigned c) {
if (m_ranges.empty()) {
m_ranges.push_back(char_range(c));
return;
}
// binary search for insertion point
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
while (lo <= hi) {
int mid = lo + (hi - lo) / 2;
if (c < m_ranges[mid].m_lo)
hi = mid - 1;
else if (c >= m_ranges[mid].m_hi)
lo = mid + 1;
else
return; // already contained
}
// lo is the insertion point
unsigned idx = static_cast<unsigned>(lo);
bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c;
bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1;
if (merge_left && merge_right) {
m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi;
m_ranges.erase(m_ranges.begin() + idx);
} else if (merge_left) {
m_ranges[idx - 1].m_hi = c + 1;
} else if (merge_right) {
m_ranges[idx].m_lo = c;
} else {
// positional insert: shift elements right and place new element
m_ranges.push_back(char_range());
for (unsigned k = m_ranges.size() - 1; k > idx; --k)
m_ranges[k] = m_ranges[k - 1];
m_ranges[idx] = char_range(c);
}
}
void char_set::add(char_set const& other) {
for (auto const& r : other.m_ranges) {
for (unsigned c = r.m_lo; c < r.m_hi; ++c)
add(c);
}
}
char_set char_set::intersect_with(char_set const& other) const {
char_set result;
unsigned i = 0, j = 0;
while (i < m_ranges.size() && j < other.m_ranges.size()) {
unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo);
unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi);
if (lo < hi)
result.m_ranges.push_back(char_range(lo, hi));
if (m_ranges[i].m_hi < other.m_ranges[j].m_hi)
++i;
else
++j;
}
return result;
}
char_set char_set::complement(unsigned max_char) const {
char_set result;
if (m_ranges.empty()) {
result.m_ranges.push_back(char_range(0, max_char + 1));
return result;
}
unsigned from = 0;
for (auto const& r : m_ranges) {
if (from < r.m_lo)
result.m_ranges.push_back(char_range(from, r.m_lo));
from = r.m_hi;
}
if (from <= max_char)
result.m_ranges.push_back(char_range(from, max_char + 1));
return result;
}
bool char_set::is_disjoint(char_set const& other) const {
unsigned i = 0, j = 0;
while (i < m_ranges.size() && j < other.m_ranges.size()) {
if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo)
++i;
else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo)
++j;
else
return false;
}
return true;
}
std::ostream& char_set::display(std::ostream& out) const {
if (m_ranges.empty()) {
out << "{}";
return out;
}
out << "{ ";
bool first = true;
for (auto const& r : m_ranges) {
if (!first) out << ", ";
first = false;
if (r.is_unit()) {
unsigned c = r.m_lo;
if (c >= 'a' && c <= 'z')
out << (char)c;
else if (c >= 'A' && c <= 'Z')
out << (char)c;
else if (c >= '0' && c <= '9')
out << (char)c;
else
out << "#[" << c << "]";
} else {
out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]";
}
}
out << " }";
return out;
}

View file

@ -20,6 +20,7 @@ Author:
#include <string>
#include "util/buffer.h"
#include "util/rational.h"
#include "util/vector.h"
enum class string_encoding {
ascii, // exactly 8 bits
@ -93,3 +94,71 @@ public:
friend std::ostream& operator<<(std::ostream &os, const zstring &str);
friend bool operator<(const zstring& lhs, const zstring& rhs);
};
// half-open character interval [lo, hi)
// mirrors ZIPT's CharacterRange
struct char_range {
unsigned m_lo;
unsigned m_hi; // exclusive
char_range(): m_lo(0), m_hi(0) {}
char_range(unsigned c): m_lo(c), m_hi(c + 1) {}
char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); }
bool is_empty() const { return m_lo == m_hi; }
bool is_unit() const { return m_hi == m_lo + 1; }
unsigned length() const { return m_hi - m_lo; }
bool contains(unsigned c) const { return c >= m_lo && c < m_hi; }
bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; }
bool operator<(char_range const& o) const {
return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi);
}
};
// sorted list of non-overlapping character intervals
// mirrors ZIPT's CharacterSet
class char_set {
svector<char_range> m_ranges;
public:
char_set() = default;
explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); }
static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); }
bool is_empty() const { return m_ranges.empty(); }
bool is_full(unsigned max_char) const {
return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1;
}
bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); }
unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; }
svector<char_range> const& ranges() const { return m_ranges; }
// total number of characters in the set
unsigned char_count() const;
// membership test via binary search
bool contains(unsigned c) const;
// add a single character
void add(unsigned c);
// union with another char_set
void add(char_set const& other);
// intersect with another char_set, returns the result
char_set intersect_with(char_set const& other) const;
// complement relative to [0, max_char]
char_set complement(unsigned max_char) const;
// check if two sets are disjoint
bool is_disjoint(char_set const& other) const;
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }
std::ostream& display(std::ostream& out) const;
};