mirror of
https://github.com/Z3Prover/z3
synced 2026-03-11 15:50:29 +00:00
Merge branch 'c3' into copilot/replace-lp-solver-with-abstract-solver
This commit is contained in:
commit
2e636ec25d
11 changed files with 270 additions and 291 deletions
|
|
@ -15,8 +15,8 @@ Abstract:
|
|||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@ Abstract:
|
|||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ Abstract:
|
|||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
|
|
|
|||
|
|
@ -49,8 +49,8 @@ Abstract:
|
|||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ Abstract:
|
|||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ Abstract:
|
|||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-02
|
||||
Clemens Eisenhofer 2026-03-02
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-02
|
||||
|
||||
--*/
|
||||
|
||||
|
|
@ -23,7 +23,6 @@ Author:
|
|||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "math/lp/lar_solver.h"
|
||||
#include "util/bit_util.h"
|
||||
#include "util/hashtable.h"
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
|
|
@ -321,149 +320,6 @@ namespace seq {
|
|||
return true;
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// char_set
|
||||
// -----------------------------------------------
|
||||
|
||||
unsigned char_set::char_count() const {
|
||||
unsigned count = 0;
|
||||
for (auto const& r : m_ranges)
|
||||
count += r.length();
|
||||
return count;
|
||||
}
|
||||
|
||||
bool char_set::contains(unsigned c) const {
|
||||
// binary search over sorted non-overlapping ranges
|
||||
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
|
||||
while (lo <= hi) {
|
||||
int mid = lo + (hi - lo) / 2;
|
||||
if (c < m_ranges[mid].m_lo)
|
||||
hi = mid - 1;
|
||||
else if (c >= m_ranges[mid].m_hi)
|
||||
lo = mid + 1;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void char_set::add(unsigned c) {
|
||||
if (m_ranges.empty()) {
|
||||
m_ranges.push_back(char_range(c));
|
||||
return;
|
||||
}
|
||||
// binary search for insertion point
|
||||
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
|
||||
while (lo <= hi) {
|
||||
int mid = lo + (hi - lo) / 2;
|
||||
if (c < m_ranges[mid].m_lo)
|
||||
hi = mid - 1;
|
||||
else if (c >= m_ranges[mid].m_hi)
|
||||
lo = mid + 1;
|
||||
else
|
||||
return; // already contained
|
||||
}
|
||||
// lo is the insertion point
|
||||
unsigned idx = static_cast<unsigned>(lo);
|
||||
bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c;
|
||||
bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1;
|
||||
if (merge_left && merge_right) {
|
||||
m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi;
|
||||
m_ranges.erase(m_ranges.begin() + idx);
|
||||
} else if (merge_left) {
|
||||
m_ranges[idx - 1].m_hi = c + 1;
|
||||
} else if (merge_right) {
|
||||
m_ranges[idx].m_lo = c;
|
||||
} else {
|
||||
// positional insert: shift elements right and place new element
|
||||
m_ranges.push_back(char_range());
|
||||
for (unsigned k = m_ranges.size() - 1; k > idx; --k)
|
||||
m_ranges[k] = m_ranges[k - 1];
|
||||
m_ranges[idx] = char_range(c);
|
||||
}
|
||||
}
|
||||
|
||||
void char_set::add(char_set const& other) {
|
||||
for (auto const& r : other.m_ranges) {
|
||||
for (unsigned c = r.m_lo; c < r.m_hi; ++c)
|
||||
add(c);
|
||||
}
|
||||
}
|
||||
|
||||
char_set char_set::intersect_with(char_set const& other) const {
|
||||
char_set result;
|
||||
unsigned i = 0, j = 0;
|
||||
while (i < m_ranges.size() && j < other.m_ranges.size()) {
|
||||
unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo);
|
||||
unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi);
|
||||
if (lo < hi)
|
||||
result.m_ranges.push_back(char_range(lo, hi));
|
||||
if (m_ranges[i].m_hi < other.m_ranges[j].m_hi)
|
||||
++i;
|
||||
else
|
||||
++j;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
char_set char_set::complement(unsigned max_char) const {
|
||||
char_set result;
|
||||
if (m_ranges.empty()) {
|
||||
result.m_ranges.push_back(char_range(0, max_char + 1));
|
||||
return result;
|
||||
}
|
||||
unsigned from = 0;
|
||||
for (auto const& r : m_ranges) {
|
||||
if (from < r.m_lo)
|
||||
result.m_ranges.push_back(char_range(from, r.m_lo));
|
||||
from = r.m_hi;
|
||||
}
|
||||
if (from <= max_char)
|
||||
result.m_ranges.push_back(char_range(from, max_char + 1));
|
||||
return result;
|
||||
}
|
||||
|
||||
bool char_set::is_disjoint(char_set const& other) const {
|
||||
unsigned i = 0, j = 0;
|
||||
while (i < m_ranges.size() && j < other.m_ranges.size()) {
|
||||
if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo)
|
||||
++i;
|
||||
else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo)
|
||||
++j;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream& char_set::display(std::ostream& out) const {
|
||||
if (m_ranges.empty()) {
|
||||
out << "{}";
|
||||
return out;
|
||||
}
|
||||
out << "{ ";
|
||||
bool first = true;
|
||||
for (auto const& r : m_ranges) {
|
||||
if (!first) out << ", ";
|
||||
first = false;
|
||||
if (r.is_unit()) {
|
||||
unsigned c = r.m_lo;
|
||||
if (c >= 'a' && c <= 'z')
|
||||
out << (char)c;
|
||||
else if (c >= 'A' && c <= 'Z')
|
||||
out << (char)c;
|
||||
else if (c >= '0' && c <= '9')
|
||||
out << (char)c;
|
||||
else
|
||||
out << "#[" << c << "]";
|
||||
} else {
|
||||
out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]";
|
||||
}
|
||||
}
|
||||
out << " }";
|
||||
return out;
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// nielsen_edge
|
||||
// -----------------------------------------------
|
||||
|
|
@ -511,7 +367,7 @@ namespace seq {
|
|||
str_eq& eq = m_str_eq[i];
|
||||
eq.m_lhs = sg.subst(eq.m_lhs, s.m_var, s.m_replacement);
|
||||
eq.m_rhs = sg.subst(eq.m_rhs, s.m_var, s.m_replacement);
|
||||
eq.m_dep.merge(s.m_dep);
|
||||
eq.m_dep |= s.m_dep;
|
||||
eq.sort();
|
||||
}
|
||||
for (unsigned i = 0; i < m_str_mem.size(); ++i) {
|
||||
|
|
@ -519,7 +375,7 @@ namespace seq {
|
|||
mem.m_str = sg.subst(mem.m_str, s.m_var, s.m_replacement);
|
||||
// regex is typically ground, but apply subst for generality
|
||||
mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement);
|
||||
mem.m_dep.merge(s.m_dep);
|
||||
mem.m_dep |= s.m_dep;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -654,8 +510,8 @@ namespace seq {
|
|||
void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) {
|
||||
if (!m_root)
|
||||
m_root = mk_node();
|
||||
dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
|
||||
m_root->str_eqs().size());
|
||||
dep_tracker dep;
|
||||
dep.insert(m_root->str_eqs().size());
|
||||
str_eq eq(lhs, rhs, dep);
|
||||
eq.sort();
|
||||
m_root->add_str_eq(eq);
|
||||
|
|
@ -665,8 +521,8 @@ namespace seq {
|
|||
void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) {
|
||||
if (!m_root)
|
||||
m_root = mk_node();
|
||||
dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
|
||||
m_root->str_eqs().size() + m_root->str_mems().size());
|
||||
dep_tracker dep;
|
||||
dep.insert(m_root->str_eqs().size() + m_root->str_mems().size());
|
||||
euf::snode* history = m_sg.mk_empty();
|
||||
unsigned id = next_mem_id();
|
||||
m_root->add_str_mem(str_mem(str, regex, history, id, dep));
|
||||
|
|
@ -2574,9 +2430,9 @@ namespace seq {
|
|||
if (!n->is_currently_conflict())
|
||||
continue;
|
||||
for (str_eq const& eq : n->str_eqs())
|
||||
deps.merge(eq.m_dep);
|
||||
deps |= eq.m_dep;
|
||||
for (str_mem const& mem : n->str_mems())
|
||||
deps.merge(mem.m_dep);
|
||||
deps |= mem.m_dep;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2584,9 +2440,7 @@ namespace seq {
|
|||
SASSERT(m_root);
|
||||
dep_tracker deps;
|
||||
collect_conflict_deps(deps);
|
||||
unsigned_vector bits;
|
||||
deps.get_set_bits(bits);
|
||||
for (unsigned b : bits) {
|
||||
for (unsigned b : deps) {
|
||||
if (b < m_num_input_eqs)
|
||||
eq_indices.push_back(b);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -98,10 +98,8 @@ Abstract:
|
|||
------------------------------------
|
||||
1. dep_tracker (DependencyTracker): ZIPT's DependencyTracker is a .NET
|
||||
class using a BitArray-like structure for tracking constraint origins.
|
||||
Z3's dep_tracker uses a dense bitvector stored as svector<unsigned>
|
||||
(32-bit words). The merge/is_superset/empty semantics are equivalent,
|
||||
but the representation is more cache-friendly and avoids managed-heap
|
||||
allocation.
|
||||
Z3 uses uint_set (a dense bitvector from util/uint_set.h) for the same
|
||||
purpose. The |=/subset_of/empty semantics are equivalent.
|
||||
|
||||
2. Substitution application (nielsen_node::apply_subst): ZIPT uses an
|
||||
immutable, functional style -- Apply() returns a new constraint if
|
||||
|
|
@ -223,8 +221,8 @@ Abstract:
|
|||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-02
|
||||
Clemens Eisenhofer 2026-03-02
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-02
|
||||
|
||||
--*/
|
||||
|
||||
|
|
@ -293,96 +291,7 @@ namespace seq {
|
|||
// dependency tracker: bitvector tracking which input constraints
|
||||
// contributed to deriving a given constraint
|
||||
// mirrors ZIPT's DependencyTracker
|
||||
class dep_tracker {
|
||||
svector<unsigned> m_bits;
|
||||
public:
|
||||
dep_tracker() = default;
|
||||
explicit dep_tracker(unsigned num_bits);
|
||||
dep_tracker(unsigned num_bits, unsigned set_bit);
|
||||
|
||||
void merge(dep_tracker const& other);
|
||||
bool is_superset(dep_tracker const& other) const;
|
||||
bool empty() const;
|
||||
|
||||
// collect indices of all set bits into 'indices'
|
||||
void get_set_bits(unsigned_vector& indices) const;
|
||||
|
||||
bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; }
|
||||
bool operator!=(dep_tracker const& other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
// -----------------------------------------------
|
||||
// character range and set types
|
||||
// mirrors ZIPT's CharacterRange and CharacterSet
|
||||
// -----------------------------------------------
|
||||
|
||||
// half-open character interval [lo, hi)
|
||||
// mirrors ZIPT's CharacterRange
|
||||
struct char_range {
|
||||
unsigned m_lo;
|
||||
unsigned m_hi; // exclusive
|
||||
|
||||
char_range(): m_lo(0), m_hi(0) {}
|
||||
char_range(unsigned c): m_lo(c), m_hi(c + 1) {}
|
||||
char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); }
|
||||
|
||||
bool is_empty() const { return m_lo == m_hi; }
|
||||
bool is_unit() const { return m_hi == m_lo + 1; }
|
||||
unsigned length() const { return m_hi - m_lo; }
|
||||
bool contains(unsigned c) const { return c >= m_lo && c < m_hi; }
|
||||
|
||||
bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; }
|
||||
bool operator<(char_range const& o) const {
|
||||
return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi);
|
||||
}
|
||||
};
|
||||
|
||||
// sorted list of non-overlapping character intervals
|
||||
// mirrors ZIPT's CharacterSet
|
||||
class char_set {
|
||||
svector<char_range> m_ranges;
|
||||
public:
|
||||
char_set() = default;
|
||||
explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); }
|
||||
|
||||
static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); }
|
||||
|
||||
bool is_empty() const { return m_ranges.empty(); }
|
||||
bool is_full(unsigned max_char) const {
|
||||
return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1;
|
||||
}
|
||||
bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); }
|
||||
unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; }
|
||||
|
||||
svector<char_range> const& ranges() const { return m_ranges; }
|
||||
|
||||
// total number of characters in the set
|
||||
unsigned char_count() const;
|
||||
|
||||
// membership test via binary search
|
||||
bool contains(unsigned c) const;
|
||||
|
||||
// add a single character
|
||||
void add(unsigned c);
|
||||
|
||||
// union with another char_set
|
||||
void add(char_set const& other);
|
||||
|
||||
// intersect with another char_set, returns the result
|
||||
char_set intersect_with(char_set const& other) const;
|
||||
|
||||
// complement relative to [0, max_char]
|
||||
char_set complement(unsigned max_char) const;
|
||||
|
||||
// check if two sets are disjoint
|
||||
bool is_disjoint(char_set const& other) const;
|
||||
|
||||
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
|
||||
|
||||
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }
|
||||
|
||||
std::ostream& display(std::ostream& out) const;
|
||||
};
|
||||
using dep_tracker = uint_set;
|
||||
|
||||
// -----------------------------------------------
|
||||
// character-level substitution
|
||||
|
|
|
|||
|
|
@ -450,10 +450,8 @@ namespace smt {
|
|||
|
||||
void theory_nseq::deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits) {
|
||||
context& ctx = get_context();
|
||||
unsigned_vector bits;
|
||||
deps.get_set_bits(bits);
|
||||
unsigned num_input_eqs = m_nielsen.num_input_eqs();
|
||||
for (unsigned b : bits) {
|
||||
for (unsigned b : deps) {
|
||||
if (b < num_input_eqs) {
|
||||
eq_source const& src = m_state.get_eq_source(b);
|
||||
if (src.m_n1->get_root() == src.m_n2->get_root())
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ Abstract:
|
|||
#include "ast/ast_pp.h"
|
||||
#include <iostream>
|
||||
|
||||
// test dep_tracker basic operations
|
||||
// test dep_tracker (uint_set) basic operations
|
||||
static void test_dep_tracker() {
|
||||
std::cout << "test_dep_tracker\n";
|
||||
|
||||
|
|
@ -31,23 +31,26 @@ static void test_dep_tracker() {
|
|||
SASSERT(d0.empty());
|
||||
|
||||
// tracker with one bit set
|
||||
seq::dep_tracker d1(8, 3);
|
||||
seq::dep_tracker d1;
|
||||
d1.insert(3);
|
||||
SASSERT(!d1.empty());
|
||||
|
||||
// tracker with another bit
|
||||
seq::dep_tracker d2(8, 5);
|
||||
seq::dep_tracker d2;
|
||||
d2.insert(5);
|
||||
SASSERT(!d2.empty());
|
||||
|
||||
// merge
|
||||
seq::dep_tracker d3 = d1;
|
||||
d3.merge(d2);
|
||||
d3 |= d2;
|
||||
SASSERT(!d3.empty());
|
||||
SASSERT(d3.is_superset(d1));
|
||||
SASSERT(d3.is_superset(d2));
|
||||
SASSERT(!d1.is_superset(d2));
|
||||
SASSERT(d1.subset_of(d3));
|
||||
SASSERT(d2.subset_of(d3));
|
||||
SASSERT(!d2.subset_of(d1));
|
||||
|
||||
// equality
|
||||
seq::dep_tracker d4(8, 3);
|
||||
seq::dep_tracker d4;
|
||||
d4.insert(3);
|
||||
SASSERT(d1 == d4);
|
||||
SASSERT(d1 != d2);
|
||||
}
|
||||
|
|
@ -65,7 +68,7 @@ static void test_str_eq() {
|
|||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* e = sg.mk_empty();
|
||||
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
|
||||
// basic equality
|
||||
seq::str_eq eq1(x, y, dep);
|
||||
|
|
@ -112,7 +115,7 @@ static void test_str_mem() {
|
|||
expr_ref star_fc(seq.re.mk_full_seq(str_sort), m);
|
||||
euf::snode* regex = sg.mk(star_fc);
|
||||
|
||||
seq::dep_tracker dep(4, 1);
|
||||
seq::dep_tracker dep; dep.insert(1);
|
||||
seq::str_mem mem(x, regex, e, 0, dep);
|
||||
|
||||
// x in regex is primitive (x is a single variable)
|
||||
|
|
@ -1322,37 +1325,40 @@ static void test_solve_conflict_deps() {
|
|||
SASSERT(!deps.empty());
|
||||
}
|
||||
|
||||
// test dep_tracker::get_set_bits
|
||||
// test dep_tracker (uint_set) iteration
|
||||
static void test_dep_tracker_get_set_bits() {
|
||||
std::cout << "test_dep_tracker_get_set_bits\n";
|
||||
|
||||
// empty tracker has no bits
|
||||
seq::dep_tracker d0;
|
||||
unsigned_vector bits0;
|
||||
d0.get_set_bits(bits0);
|
||||
for (unsigned b : d0) bits0.push_back(b);
|
||||
SASSERT(bits0.empty());
|
||||
|
||||
// single bit set at position 5
|
||||
seq::dep_tracker d1(16, 5);
|
||||
seq::dep_tracker d1;
|
||||
d1.insert(5);
|
||||
unsigned_vector bits1;
|
||||
d1.get_set_bits(bits1);
|
||||
for (unsigned b : d1) bits1.push_back(b);
|
||||
SASSERT(bits1.size() == 1);
|
||||
SASSERT(bits1[0] == 5);
|
||||
|
||||
// two bits merged
|
||||
seq::dep_tracker d2(16, 3);
|
||||
d2.merge(seq::dep_tracker(16, 11));
|
||||
seq::dep_tracker d2;
|
||||
d2.insert(3);
|
||||
d2.insert(11);
|
||||
unsigned_vector bits2;
|
||||
d2.get_set_bits(bits2);
|
||||
for (unsigned b : d2) bits2.push_back(b);
|
||||
SASSERT(bits2.size() == 2);
|
||||
SASSERT(bits2[0] == 3);
|
||||
SASSERT(bits2[1] == 11);
|
||||
|
||||
// test across word boundary (bit 31 and 32)
|
||||
seq::dep_tracker d3(64, 31);
|
||||
d3.merge(seq::dep_tracker(64, 32));
|
||||
seq::dep_tracker d3;
|
||||
d3.insert(31);
|
||||
d3.insert(32);
|
||||
unsigned_vector bits3;
|
||||
d3.get_set_bits(bits3);
|
||||
for (unsigned b : d3) bits3.push_back(b);
|
||||
SASSERT(bits3.size() == 2);
|
||||
SASSERT(bits3[0] == 31);
|
||||
SASSERT(bits3[1] == 32);
|
||||
|
|
@ -1543,7 +1549,7 @@ static void test_simplify_prefix_cancel() {
|
|||
euf::snode* aby = sg.mk_concat(a, sg.mk_concat(b, y));
|
||||
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_eq(seq::str_eq(abx, aby, dep));
|
||||
|
||||
auto sr = node->simplify_and_init(ng);
|
||||
|
|
@ -1573,7 +1579,7 @@ static void test_simplify_symbol_clash() {
|
|||
euf::snode* by = sg.mk_concat(b, y);
|
||||
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_eq(seq::str_eq(ax, by, dep));
|
||||
|
||||
auto sr = node->simplify_and_init(ng);
|
||||
|
|
@ -1598,7 +1604,7 @@ static void test_simplify_empty_propagation() {
|
|||
|
||||
// ε = x·y → forces x=ε, y=ε → all trivial → satisfied
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_eq(seq::str_eq(e, xy, dep));
|
||||
|
||||
auto sr = node->simplify_and_init(ng);
|
||||
|
|
@ -1619,7 +1625,7 @@ static void test_simplify_empty_vs_char() {
|
|||
|
||||
// ε = A → rhs has non-variable token → conflict
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_eq(seq::str_eq(e, a, dep));
|
||||
|
||||
auto sr = node->simplify_and_init(ng);
|
||||
|
|
@ -1645,7 +1651,7 @@ static void test_simplify_multi_pass_clash() {
|
|||
euf::snode* ac = sg.mk_concat(a, c);
|
||||
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_eq(seq::str_eq(ab, ac, dep));
|
||||
|
||||
auto sr = node->simplify_and_init(ng);
|
||||
|
|
@ -1667,7 +1673,7 @@ static void test_simplify_trivial_removal() {
|
|||
euf::snode* e = sg.mk_empty();
|
||||
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_eq(seq::str_eq(e, e, dep)); // trivial
|
||||
node->add_str_eq(seq::str_eq(x, y, dep)); // non-trivial
|
||||
|
||||
|
|
@ -1689,7 +1695,7 @@ static void test_simplify_all_trivial_satisfied() {
|
|||
euf::snode* e = sg.mk_empty();
|
||||
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_eq(seq::str_eq(x, x, dep)); // trivial: same pointer
|
||||
node->add_str_eq(seq::str_eq(e, e, dep)); // trivial: both empty
|
||||
|
||||
|
|
@ -1716,7 +1722,7 @@ static void test_simplify_regex_infeasible() {
|
|||
|
||||
// ε ∈ to_re("A") → non-nullable → conflict
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_mem(seq::str_mem(e, regex, e, 0, dep));
|
||||
|
||||
auto sr = node->simplify_and_init(ng);
|
||||
|
|
@ -1744,7 +1750,7 @@ static void test_simplify_nullable_removal() {
|
|||
|
||||
// ε ∈ star(to_re("A")) → nullable → satisfied, mem removed
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_mem(seq::str_mem(e, regex, e, 0, dep));
|
||||
|
||||
auto sr = node->simplify_and_init(ng);
|
||||
|
|
@ -1772,7 +1778,7 @@ static void test_simplify_brzozowski_sat() {
|
|||
|
||||
// "A" ∈ to_re("A") → derivative consumes 'A' → ε ∈ ε-regex → satisfied
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(4, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
node->add_str_mem(seq::str_mem(a, regex, e, 0, dep));
|
||||
|
||||
auto sr = node->simplify_and_init(ng);
|
||||
|
|
@ -1795,7 +1801,7 @@ static void test_simplify_multiple_eqs() {
|
|||
euf::snode* e = sg.mk_empty();
|
||||
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep(8, 0);
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
|
||||
// eq1: ε = ε (trivial → removed)
|
||||
node->add_str_eq(seq::str_eq(e, e, dep));
|
||||
|
|
@ -2234,7 +2240,7 @@ static void test_length_constraints_deps() {
|
|||
for (auto const& c : constraints) {
|
||||
SASSERT(!c.m_dep.empty());
|
||||
unsigned_vector bits;
|
||||
c.m_dep.get_set_bits(bits);
|
||||
for (unsigned b : c.m_dep) bits.push_back(b);
|
||||
SASSERT(bits.size() == 1);
|
||||
SASSERT(bits[0] == 0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -286,3 +286,146 @@ bool operator<(const zstring& lhs, const zstring& rhs) {
|
|||
// so decide based on the relative lengths
|
||||
return lhs.length() < rhs.length();
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// char_set
|
||||
// -----------------------------------------------
|
||||
|
||||
unsigned char_set::char_count() const {
|
||||
unsigned count = 0;
|
||||
for (auto const& r : m_ranges)
|
||||
count += r.length();
|
||||
return count;
|
||||
}
|
||||
|
||||
bool char_set::contains(unsigned c) const {
|
||||
// binary search over sorted non-overlapping ranges
|
||||
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
|
||||
while (lo <= hi) {
|
||||
int mid = lo + (hi - lo) / 2;
|
||||
if (c < m_ranges[mid].m_lo)
|
||||
hi = mid - 1;
|
||||
else if (c >= m_ranges[mid].m_hi)
|
||||
lo = mid + 1;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void char_set::add(unsigned c) {
|
||||
if (m_ranges.empty()) {
|
||||
m_ranges.push_back(char_range(c));
|
||||
return;
|
||||
}
|
||||
// binary search for insertion point
|
||||
int lo = 0, hi = static_cast<int>(m_ranges.size()) - 1;
|
||||
while (lo <= hi) {
|
||||
int mid = lo + (hi - lo) / 2;
|
||||
if (c < m_ranges[mid].m_lo)
|
||||
hi = mid - 1;
|
||||
else if (c >= m_ranges[mid].m_hi)
|
||||
lo = mid + 1;
|
||||
else
|
||||
return; // already contained
|
||||
}
|
||||
// lo is the insertion point
|
||||
unsigned idx = static_cast<unsigned>(lo);
|
||||
bool merge_left = idx > 0 && m_ranges[idx - 1].m_hi == c;
|
||||
bool merge_right = idx < m_ranges.size() && m_ranges[idx].m_lo == c + 1;
|
||||
if (merge_left && merge_right) {
|
||||
m_ranges[idx - 1].m_hi = m_ranges[idx].m_hi;
|
||||
m_ranges.erase(m_ranges.begin() + idx);
|
||||
} else if (merge_left) {
|
||||
m_ranges[idx - 1].m_hi = c + 1;
|
||||
} else if (merge_right) {
|
||||
m_ranges[idx].m_lo = c;
|
||||
} else {
|
||||
// positional insert: shift elements right and place new element
|
||||
m_ranges.push_back(char_range());
|
||||
for (unsigned k = m_ranges.size() - 1; k > idx; --k)
|
||||
m_ranges[k] = m_ranges[k - 1];
|
||||
m_ranges[idx] = char_range(c);
|
||||
}
|
||||
}
|
||||
|
||||
void char_set::add(char_set const& other) {
|
||||
for (auto const& r : other.m_ranges) {
|
||||
for (unsigned c = r.m_lo; c < r.m_hi; ++c)
|
||||
add(c);
|
||||
}
|
||||
}
|
||||
|
||||
char_set char_set::intersect_with(char_set const& other) const {
|
||||
char_set result;
|
||||
unsigned i = 0, j = 0;
|
||||
while (i < m_ranges.size() && j < other.m_ranges.size()) {
|
||||
unsigned lo = std::max(m_ranges[i].m_lo, other.m_ranges[j].m_lo);
|
||||
unsigned hi = std::min(m_ranges[i].m_hi, other.m_ranges[j].m_hi);
|
||||
if (lo < hi)
|
||||
result.m_ranges.push_back(char_range(lo, hi));
|
||||
if (m_ranges[i].m_hi < other.m_ranges[j].m_hi)
|
||||
++i;
|
||||
else
|
||||
++j;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
char_set char_set::complement(unsigned max_char) const {
|
||||
char_set result;
|
||||
if (m_ranges.empty()) {
|
||||
result.m_ranges.push_back(char_range(0, max_char + 1));
|
||||
return result;
|
||||
}
|
||||
unsigned from = 0;
|
||||
for (auto const& r : m_ranges) {
|
||||
if (from < r.m_lo)
|
||||
result.m_ranges.push_back(char_range(from, r.m_lo));
|
||||
from = r.m_hi;
|
||||
}
|
||||
if (from <= max_char)
|
||||
result.m_ranges.push_back(char_range(from, max_char + 1));
|
||||
return result;
|
||||
}
|
||||
|
||||
bool char_set::is_disjoint(char_set const& other) const {
|
||||
unsigned i = 0, j = 0;
|
||||
while (i < m_ranges.size() && j < other.m_ranges.size()) {
|
||||
if (m_ranges[i].m_hi <= other.m_ranges[j].m_lo)
|
||||
++i;
|
||||
else if (other.m_ranges[j].m_hi <= m_ranges[i].m_lo)
|
||||
++j;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream& char_set::display(std::ostream& out) const {
|
||||
if (m_ranges.empty()) {
|
||||
out << "{}";
|
||||
return out;
|
||||
}
|
||||
out << "{ ";
|
||||
bool first = true;
|
||||
for (auto const& r : m_ranges) {
|
||||
if (!first) out << ", ";
|
||||
first = false;
|
||||
if (r.is_unit()) {
|
||||
unsigned c = r.m_lo;
|
||||
if (c >= 'a' && c <= 'z')
|
||||
out << (char)c;
|
||||
else if (c >= 'A' && c <= 'Z')
|
||||
out << (char)c;
|
||||
else if (c >= '0' && c <= '9')
|
||||
out << (char)c;
|
||||
else
|
||||
out << "#[" << c << "]";
|
||||
} else {
|
||||
out << "[" << r.m_lo << "-" << (r.m_hi - 1) << "]";
|
||||
}
|
||||
}
|
||||
out << " }";
|
||||
return out;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ Author:
|
|||
#include <string>
|
||||
#include "util/buffer.h"
|
||||
#include "util/rational.h"
|
||||
#include "util/vector.h"
|
||||
|
||||
enum class string_encoding {
|
||||
ascii, // exactly 8 bits
|
||||
|
|
@ -93,3 +94,71 @@ public:
|
|||
friend std::ostream& operator<<(std::ostream &os, const zstring &str);
|
||||
friend bool operator<(const zstring& lhs, const zstring& rhs);
|
||||
};
|
||||
|
||||
// half-open character interval [lo, hi)
|
||||
// mirrors ZIPT's CharacterRange
|
||||
struct char_range {
|
||||
unsigned m_lo;
|
||||
unsigned m_hi; // exclusive
|
||||
|
||||
char_range(): m_lo(0), m_hi(0) {}
|
||||
char_range(unsigned c): m_lo(c), m_hi(c + 1) {}
|
||||
char_range(unsigned lo, unsigned hi): m_lo(lo), m_hi(hi) { SASSERT(lo <= hi); }
|
||||
|
||||
bool is_empty() const { return m_lo == m_hi; }
|
||||
bool is_unit() const { return m_hi == m_lo + 1; }
|
||||
unsigned length() const { return m_hi - m_lo; }
|
||||
bool contains(unsigned c) const { return c >= m_lo && c < m_hi; }
|
||||
|
||||
bool operator==(char_range const& o) const { return m_lo == o.m_lo && m_hi == o.m_hi; }
|
||||
bool operator<(char_range const& o) const {
|
||||
return m_lo < o.m_lo || (m_lo == o.m_lo && m_hi < o.m_hi);
|
||||
}
|
||||
};
|
||||
|
||||
// sorted list of non-overlapping character intervals
|
||||
// mirrors ZIPT's CharacterSet
|
||||
class char_set {
|
||||
svector<char_range> m_ranges;
|
||||
public:
|
||||
char_set() = default;
|
||||
explicit char_set(char_range const& r) { if (!r.is_empty()) m_ranges.push_back(r); }
|
||||
|
||||
static char_set full(unsigned max_char) { return char_set(char_range(0, max_char + 1)); }
|
||||
|
||||
bool is_empty() const { return m_ranges.empty(); }
|
||||
bool is_full(unsigned max_char) const {
|
||||
return m_ranges.size() == 1 && m_ranges[0].m_lo == 0 && m_ranges[0].m_hi == max_char + 1;
|
||||
}
|
||||
bool is_unit() const { return m_ranges.size() == 1 && m_ranges[0].is_unit(); }
|
||||
unsigned first_char() const { SASSERT(!is_empty()); return m_ranges[0].m_lo; }
|
||||
|
||||
svector<char_range> const& ranges() const { return m_ranges; }
|
||||
|
||||
// total number of characters in the set
|
||||
unsigned char_count() const;
|
||||
|
||||
// membership test via binary search
|
||||
bool contains(unsigned c) const;
|
||||
|
||||
// add a single character
|
||||
void add(unsigned c);
|
||||
|
||||
// union with another char_set
|
||||
void add(char_set const& other);
|
||||
|
||||
// intersect with another char_set, returns the result
|
||||
char_set intersect_with(char_set const& other) const;
|
||||
|
||||
// complement relative to [0, max_char]
|
||||
char_set complement(unsigned max_char) const;
|
||||
|
||||
// check if two sets are disjoint
|
||||
bool is_disjoint(char_set const& other) const;
|
||||
|
||||
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
|
||||
|
||||
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }
|
||||
|
||||
std::ostream& display(std::ostream& out) const;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue