3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-17 18:43:45 +00:00
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2026-03-13 18:19:25 -07:00
parent 8a48caf742
commit 27f5541b0b
11 changed files with 2176 additions and 80 deletions

View file

@ -15,11 +15,294 @@ Author:
--*/
#include "smt/nseq_regex.h"
#include <unordered_set>
namespace smt {
// -----------------------------------------------------------------------
// Regex emptiness checking (structural analysis)
// Stabilizer store
// -----------------------------------------------------------------------
void nseq_regex::reset_stabilizers() {
m_stabilizers.reset();
m_self_stabilizing.reset();
}
void nseq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) {
if (!regex || !stabilizer)
return;
unsigned id = regex->id();
auto& stabs = m_stabilizers.insert_if_not_there(id, ptr_vector<euf::snode>());
// De-duplicate by pointer equality (mirrors ZIPT Environment.AddStabilizer
// which checks reference equality before adding).
for (euf::snode* s : stabs)
if (s == stabilizer)
return;
stabs.push_back(stabilizer);
}
euf::snode* nseq_regex::get_stabilizer_union(euf::snode* regex) {
if (!regex)
return nullptr;
if (!m_stabilizers.contains(regex->id()))
return nullptr;
auto& stabs = m_stabilizers[regex->id()];
if (stabs.empty())
return nullptr;
// Single stabilizer: return it directly.
if (stabs.size() == 1)
return stabs[0];
// Multiple stabilizers: build re.union chain.
// union(s1, union(s2, ... union(sN-1, sN)...))
seq_util& seq = m_sg.get_seq_util();
euf::snode* result = stabs[stabs.size() - 1];
for (unsigned i = stabs.size() - 1; i-- > 0; ) {
expr* lhs = stabs[i]->get_expr();
expr* rhs = result->get_expr();
if (!lhs || !rhs)
return nullptr;
expr_ref un(seq.re.mk_union(lhs, rhs), m_sg.get_manager());
result = m_sg.mk(un);
}
return result;
}
bool nseq_regex::has_stabilizers(euf::snode* regex) const {
if (!regex)
return false;
if (!m_stabilizers.contains(regex->id()))
return false;
return !m_stabilizers[regex->id()].empty();
}
ptr_vector<euf::snode> const* nseq_regex::get_stabilizers(euf::snode* regex) const {
if (!regex)
return nullptr;
if (!m_stabilizers.contains(regex->id()))
return nullptr;
return &m_stabilizers[regex->id()];
}
void nseq_regex::set_self_stabilizing(euf::snode* regex) {
if (!regex)
return;
m_self_stabilizing.insert(regex->id());
}
bool nseq_regex::is_self_stabilizing(euf::snode* regex) const {
if (!regex)
return false;
return m_self_stabilizing.contains(regex->id());
}
// -----------------------------------------------------------------------
// Self-stabilizing auto-detection
// -----------------------------------------------------------------------
bool nseq_regex::compute_self_stabilizing(euf::snode* regex) const {
if (!regex)
return false;
// R* is always self-stabilizing: D(c, R*) = D(c,R) · R*,
// so R* appears as the tail of every derivative and acts as
// its own stabilizer.
if (regex->is_star())
return true;
// Σ* (full_seq, i.e., re.all / .*) is self-stabilizing:
// D(c, Σ*) = Σ* for every character c.
if (regex->is_full_seq())
return true;
// ∅ (fail / empty language) is trivially self-stabilizing:
// it has no live derivatives, so the flag is vacuously true.
if (regex->is_fail())
return true;
// Complement of full_seq is ∅ (complement of Σ*), which is
// also trivially self-stabilizing.
if (regex->is_complement() && regex->num_args() == 1 &&
regex->arg(0)->is_full_seq())
return true;
// Loop with lo=0 and no upper bound behaves like R*
// (r{0,} ≡ r*), so it is self-stabilizing.
if (regex->is_loop() && regex->is_nullable()) {
// A nullable loop with a star-like body: heuristic check.
// Only mark as self-stabilizing if the body is a Kleene closure.
// Loop(R, 0, ∞) ~ R* — but we rely on the sgraph to normalize
// these, so only catch exact star nodes above.
}
return false;
}
// -----------------------------------------------------------------------
// Self-stabilizing propagation through derivatives
// -----------------------------------------------------------------------
void nseq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) {
if (!parent || !deriv)
return;
// If the derivative is already known to be self-stabilizing (either
// inherently or from a prior propagation), nothing to do.
if (is_self_stabilizing(deriv))
return;
// If the derivative is itself inherently self-stabilizing
// (e.g., it is a star or full_seq), mark it now.
if (compute_self_stabilizing(deriv)) {
set_self_stabilizing(deriv);
return;
}
// Rule 1: Star parent.
// D(c, R*) = D(c, R) · R*. The derivative always contains the
// R* tail, so it is self-stabilizing regardless of D(c,R).
if (parent->is_star()) {
set_self_stabilizing(deriv);
return;
}
// Rule 2: Full_seq parent.
// D(c, Σ*) = Σ*, and Σ* is self-stabilizing.
// (The derivative should be Σ* itself; mark it for safety.)
if (parent->is_full_seq()) {
set_self_stabilizing(deriv);
return;
}
// Check if parent is self-stabilizing (either inherently or marked).
bool parent_ss = is_self_stabilizing(parent) || compute_self_stabilizing(parent);
// Rule 3: Concat parent R · S.
// D(c, R·S) = D(c,R)·S | (nullable(R) ? D(c,S) : ∅).
// If S is self-stabilizing, the D(c,R)·S branch inherits it.
// If the whole parent R·S is self-stabilizing, the derivative is too.
if (parent->is_concat() && parent->num_args() == 2) {
euf::snode* tail = parent->arg(1);
bool tail_ss = is_self_stabilizing(tail) || compute_self_stabilizing(tail);
if (tail_ss || parent_ss) {
set_self_stabilizing(deriv);
return;
}
}
// Rule 4: Union parent R | S.
// D(c, R|S) = D(c,R) | D(c,S).
// Self-stabilizing if both children are self-stabilizing.
if (parent->is_union() && parent->num_args() == 2) {
euf::snode* lhs = parent->arg(0);
euf::snode* rhs = parent->arg(1);
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
if (lhs_ss && rhs_ss) {
set_self_stabilizing(deriv);
return;
}
}
// Rule 5: Intersection parent R ∩ S.
// D(c, R∩S) = D(c,R) ∩ D(c,S).
// Self-stabilizing if both children are self-stabilizing.
if (parent->is_intersect() && parent->num_args() == 2) {
euf::snode* lhs = parent->arg(0);
euf::snode* rhs = parent->arg(1);
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
if (lhs_ss && rhs_ss) {
set_self_stabilizing(deriv);
return;
}
}
// Rule 6: Complement parent ~R.
// D(c, ~R) = ~D(c, R).
// Preserves self-stabilizing from R.
if (parent->is_complement() && parent->num_args() == 1) {
euf::snode* inner = parent->arg(0);
bool inner_ss = is_self_stabilizing(inner) || compute_self_stabilizing(inner);
if (inner_ss) {
set_self_stabilizing(deriv);
return;
}
}
// Rule 7: Generic self-stabilizing parent.
// If the parent was explicitly marked self-stabilizing (e.g., via
// a previous propagation), propagate to the derivative.
if (parent_ss) {
set_self_stabilizing(deriv);
return;
}
}
// -----------------------------------------------------------------------
// Derivative with propagation
// -----------------------------------------------------------------------
euf::snode* nseq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) {
if (!re || !elem)
return nullptr;
euf::snode* deriv = derivative(re, elem);
if (deriv)
propagate_self_stabilizing(re, deriv);
return deriv;
}
// -----------------------------------------------------------------------
// Uniform derivative (symbolic character consumption)
// -----------------------------------------------------------------------
euf::snode* nseq_regex::try_uniform_derivative(euf::snode* regex) {
if (!regex)
return nullptr;
// Quick exits: trivial regexes with known uniform derivatives.
// Σ* (full_seq) has derivative Σ* for every character.
if (regex->is_full_seq())
return regex;
// ∅ (fail) has derivative ∅ for every character — but this means
// every character is rejected. Return fail so the caller can
// detect a conflict.
if (regex->is_fail())
return regex;
// Compute minterms: the character-class partition of the alphabet
// induced by the regex.
euf::snode_vector minterms;
m_sg.compute_minterms(regex, minterms);
if (minterms.empty())
return nullptr;
// Compute the derivative for each non-empty minterm. If all produce
// the same result, the derivative is independent of the character
// value and we can consume a symbolic character deterministically.
euf::snode* uniform = nullptr;
for (euf::snode* mt : minterms) {
if (!mt || mt->is_fail())
continue; // empty character class — no character belongs to it
euf::snode* deriv = m_sg.brzozowski_deriv(regex, mt);
if (!deriv)
return nullptr; // derivative computation failed
if (!uniform) {
uniform = deriv;
} else if (uniform->id() != deriv->id()) {
return nullptr; // different derivatives — not uniform
}
}
return uniform; // may be nullptr if all minterms were fail/empty
}
// -----------------------------------------------------------------------
// Ground prefix consumption
// -----------------------------------------------------------------------
bool nseq_regex::is_empty_regex(euf::snode* re) const {
@ -68,6 +351,377 @@ namespace smt {
return false;
}
// -----------------------------------------------------------------------
// BFS regex emptiness check — helper: collect character boundaries
// -----------------------------------------------------------------------
void nseq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const {
if (!re || !re->get_expr())
return;
seq_util& seq = m_sg.get_seq_util();
expr* e = re->get_expr();
// Range predicate re.range(lo, hi): boundary at lo and hi+1
// Range arguments are string expressions (e.g., str.unit(ch))
expr* lo_expr = nullptr;
expr* hi_expr = nullptr;
if (seq.re.is_range(e, lo_expr, hi_expr)) {
zstring s_lo, s_hi;
if (lo_expr && seq.str.is_string(lo_expr, s_lo) && s_lo.length() == 1)
bounds.push_back(s_lo[0]);
if (hi_expr && seq.str.is_string(hi_expr, s_hi) && s_hi.length() == 1 && s_hi[0] < zstring::max_char())
bounds.push_back(s_hi[0] + 1);
return;
}
// to_re(s): boundary at first character and first+1
expr* body = nullptr;
if (seq.re.is_to_re(e, body)) {
zstring s;
if (seq.str.is_string(body, s) && s.length() > 0) {
unsigned first_ch = s[0];
bounds.push_back(first_ch);
if (first_ch < zstring::max_char())
bounds.push_back(first_ch + 1);
}
return;
}
// Leaf nodes with no character discrimination
if (re->is_fail() || re->is_full_char() || re->is_full_seq())
return;
// Recurse into children (handles union, concat, star, loop, etc.)
for (unsigned i = 0; i < re->num_args(); ++i)
collect_char_boundaries(re->arg(i), bounds);
}
// -----------------------------------------------------------------------
// BFS regex emptiness check — helper: alphabet representatives
// -----------------------------------------------------------------------
void nseq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) {
unsigned_vector bounds;
bounds.push_back(0); // always include character 0
collect_char_boundaries(re, bounds);
// Sort and deduplicate
std::sort(bounds.begin(), bounds.end());
unsigned prev = UINT_MAX;
for (unsigned b : bounds) {
if (b != prev) {
reps.push_back(m_sg.mk_char(b));
prev = b;
}
}
}
// -----------------------------------------------------------------------
// BFS regex emptiness check
// -----------------------------------------------------------------------
lbool nseq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
if (!re || !re->get_expr())
return l_undef;
if (re->is_fail())
return l_true;
if (re->is_nullable())
return l_false;
// Structural quick checks for kinds that are never empty
if (re->is_star() || re->is_full_char() || re->is_full_seq() || re->is_to_re())
return l_false;
// Structural emptiness catches simple cases
if (is_empty_regex(re))
return l_true;
// Only handle ground regexes; non-ground can't be fully explored
if (!re->is_ground())
return l_undef;
// BFS over the Brzozowski derivative automaton.
// Each state is a derivative regex snode identified by its id.
// We explore states by computing derivatives for representative
// characters from the alphabet partition.
uint_set visited;
euf::snode_vector worklist;
worklist.push_back(re);
visited.insert(re->id());
unsigned states_explored = 0;
bool had_failed_deriv = false;
while (!worklist.empty()) {
if (states_explored >= max_states)
return l_undef;
euf::snode* current = worklist.back();
worklist.pop_back();
++states_explored;
// Compute representative characters for current state's
// alphabet partition. Each representative is a concrete
// character snode whose equivalence class has identical
// derivative behavior.
euf::snode_vector reps;
get_alphabet_representatives(current, reps);
if (reps.empty()) {
// No representatives means no character predicates;
// use a default character to explore the single partition.
reps.push_back(m_sg.mk_char('a'));
}
for (euf::snode* ch : reps) {
euf::snode* deriv = m_sg.brzozowski_deriv(current, ch);
if (!deriv) {
// Derivative computation failed for this character.
// Track the failure but continue with other characters.
had_failed_deriv = true;
continue;
}
if (deriv->is_nullable())
return l_false; // found an accepting state
if (deriv->is_fail())
continue; // dead-end, no need to explore further
if (is_empty_regex(deriv))
continue; // structurally empty subtree
if (!visited.contains(deriv->id())) {
visited.insert(deriv->id());
worklist.push_back(deriv);
}
}
}
// Exhausted all reachable states without finding a nullable one.
// If we had any failed derivative computations, the result is
// inconclusive since we may have missed reachable states.
if (had_failed_deriv)
return l_undef;
return l_true;
}
// -----------------------------------------------------------------------
// Multi-regex intersection emptiness check
// BFS over the product of Brzozowski derivative automata.
// Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
// -----------------------------------------------------------------------
lbool nseq_regex::check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
unsigned max_states) {
if (regexes.empty())
return l_false; // empty intersection = full language (vacuously non-empty)
// Quick checks: if any regex is fail/empty, intersection is empty
for (euf::snode* re : regexes) {
if (!re || !re->get_expr())
return l_undef;
if (re->is_fail() || is_empty_regex(re))
return l_true;
}
// Check if all are nullable (intersection accepts ε)
bool all_nullable = true;
for (euf::snode* re : regexes) {
if (!re->is_nullable()) { all_nullable = false; break; }
}
if (all_nullable)
return l_false;
// Single regex: delegate to is_empty_bfs
if (regexes.size() == 1)
return is_empty_bfs(regexes[0], max_states);
// Build product BFS. State = tuple of regex snode ids.
// Use a map from state hash to visited set.
using state_t = svector<unsigned>;
auto state_hash = [](state_t const& s) -> unsigned {
unsigned h = 0;
for (unsigned id : s)
h = h * 31 + id;
return h;
};
auto state_eq = [](state_t const& a, state_t const& b) -> bool {
if (a.size() != b.size()) return false;
for (unsigned i = 0; i < a.size(); ++i)
if (a[i] != b[i]) return false;
return true;
};
// Use simple set via sorted vector of hashes (good enough for bounded BFS)
std::unordered_set<unsigned> visited_hashes;
struct bfs_state {
ptr_vector<euf::snode> regexes;
};
std::vector<bfs_state> worklist;
bfs_state initial;
initial.regexes.append(regexes);
worklist.push_back(std::move(initial));
state_t init_ids;
for (euf::snode* re : regexes)
init_ids.push_back(re->id());
visited_hashes.insert(state_hash(init_ids));
unsigned states_explored = 0;
bool had_failed = false;
// Collect alphabet representatives from the intersection of all regexes
// (merge boundaries from all)
unsigned_vector all_bounds;
all_bounds.push_back(0);
for (euf::snode* re : regexes)
collect_char_boundaries(re, all_bounds);
std::sort(all_bounds.begin(), all_bounds.end());
euf::snode_vector reps;
unsigned prev = UINT_MAX;
for (unsigned b : all_bounds) {
if (b != prev) {
reps.push_back(m_sg.mk_char(b));
prev = b;
}
}
if (reps.empty())
reps.push_back(m_sg.mk_char('a'));
while (!worklist.empty()) {
if (states_explored >= max_states)
return l_undef;
bfs_state current = std::move(worklist.back());
worklist.pop_back();
++states_explored;
for (euf::snode* ch : reps) {
ptr_vector<euf::snode> derivs;
bool any_fail = false;
bool all_null = true;
bool deriv_failed = false;
for (euf::snode* re : current.regexes) {
euf::snode* d = m_sg.brzozowski_deriv(re, ch);
if (!d) { deriv_failed = true; break; }
if (d->is_fail()) { any_fail = true; break; }
if (!d->is_nullable()) all_null = false;
derivs.push_back(d);
}
if (deriv_failed) { had_failed = true; continue; }
if (any_fail) continue; // this character leads to empty intersection
if (all_null)
return l_false; // found an accepting state in the product
// Check if any component is structurally empty
bool any_empty = false;
for (euf::snode* d : derivs) {
if (is_empty_regex(d)) { any_empty = true; break; }
}
if (any_empty) continue;
// Compute state hash and check visited
state_t ids;
for (euf::snode* d : derivs)
ids.push_back(d->id());
unsigned h = state_hash(ids);
if (visited_hashes.count(h) == 0) {
visited_hashes.insert(h);
bfs_state next;
next.regexes.append(derivs);
worklist.push_back(std::move(next));
}
}
}
if (had_failed)
return l_undef;
return l_true; // exhausted all states, intersection is empty
}
// -----------------------------------------------------------------------
// Language subset check: L(A) ⊆ L(B)
// via intersection(A, complement(B)) = ∅
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
// -----------------------------------------------------------------------
lbool nseq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) {
if (!subset_re || !superset_re)
return l_undef;
// Quick checks
if (subset_re->is_fail() || is_empty_regex(subset_re))
return l_true; // ∅ ⊆ anything
if (superset_re->is_full_seq())
return l_true; // anything ⊆ Σ*
if (subset_re == superset_re)
return l_true; // L ⊆ L
// Build complement(superset)
seq_util& seq = m_sg.get_seq_util();
ast_manager& mgr = m_sg.get_manager();
expr* sup_expr = superset_re->get_expr();
if (!sup_expr)
return l_undef;
expr_ref comp(seq.re.mk_complement(sup_expr), mgr);
euf::snode* comp_sn = m_sg.mk(comp);
if (!comp_sn)
return l_undef;
// Build intersection and check emptiness
// subset ∩ complement(superset) should be empty for subset relation
expr* sub_expr = subset_re->get_expr();
if (!sub_expr)
return l_undef;
expr_ref inter(seq.re.mk_inter(sub_expr, comp.get()), mgr);
euf::snode* inter_sn = m_sg.mk(inter);
if (!inter_sn)
return l_undef;
return is_empty_bfs(inter_sn);
}
// -----------------------------------------------------------------------
// Collect primitive regex intersection for a variable
// -----------------------------------------------------------------------
euf::snode* nseq_regex::collect_primitive_regex_intersection(
euf::snode* var, seq::nielsen_node const& node) {
if (!var)
return nullptr;
seq_util& seq = m_sg.get_seq_util();
ast_manager& mgr = m_sg.get_manager();
euf::snode* result = nullptr;
for (auto const& mem : node.str_mems()) {
if (!mem.m_str || !mem.m_regex)
continue;
// Primitive constraint: str is a single variable
if (!mem.is_primitive())
continue;
euf::snode* first = mem.m_str->first();
if (!first || first != var)
continue;
if (!result) {
result = mem.m_regex;
} else {
expr* r1 = result->get_expr();
expr* r2 = mem.m_regex->get_expr();
if (r1 && r2) {
expr_ref inter(seq.re.mk_inter(r1, r2), mgr);
result = m_sg.mk(inter);
}
}
}
return result;
}
// -----------------------------------------------------------------------
// Cycle detection
// -----------------------------------------------------------------------
@ -88,11 +742,14 @@ namespace smt {
euf::snode* first = mem.m_str->first();
if (!first || !first->is_char())
break;
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, first);
euf::snode* parent_re = mem.m_regex;
euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, first);
if (!deriv)
break;
if (deriv->is_fail())
return simplify_status::conflict;
// propagate self-stabilizing flag from parent to derivative
propagate_self_stabilizing(parent_re, deriv);
mem.m_str = m_sg.drop_first(mem.m_str);
mem.m_regex = deriv;
}
@ -361,13 +1018,6 @@ namespace smt {
if (!cycle_regex || !current_regex)
return nullptr;
// The stabilizer is the Kleene star of the "cycle body" regex.
// If the cycle regex and current regex are the same (pointer equal),
// the stabilizer is cycle_regex* (Kleene star).
// This mirrors ZIPT's StabilizerFromCycle which extracts the
// regex between the cycle entry and current point and wraps it in *.
// Build cycle_regex* via the sgraph's expression factory
expr* re_expr = cycle_regex->get_expr();
if (!re_expr)
return nullptr;
@ -378,31 +1028,253 @@ namespace smt {
}
// -----------------------------------------------------------------------
// Stabilizer-based subsumption
// Extract cycle history tokens
// -----------------------------------------------------------------------
bool nseq_regex::try_subsume(seq::str_mem const& mem) {
// Check if the derivation history exhibits a cycle, and if so,
// whether the current regex is subsumed by the stabilizer.
euf::snode* cycle = extract_cycle(mem);
if (!cycle)
euf::snode* nseq_regex::extract_cycle_history(seq::str_mem const& current,
seq::str_mem const& ancestor) {
// The history is built by simplify_and_init as a left-associative
// string concat chain: concat(concat(concat(nil, c1), c2), c3).
// Extract the tokens consumed since the ancestor.
if (!current.m_history)
return nullptr;
unsigned cur_len = current.m_history->length();
unsigned anc_len = ancestor.m_history ? ancestor.m_history->length() : 0;
if (cur_len <= anc_len)
return nullptr;
if (anc_len == 0)
return current.m_history;
return m_sg.drop_left(current.m_history, anc_len);
}
// -----------------------------------------------------------------------
// Get filtered stabilizer star
// Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243)
// -----------------------------------------------------------------------
euf::snode* nseq_regex::get_filtered_stabilizer_star(euf::snode* re,
euf::snode* excluded_char) {
if (!re)
return nullptr;
ptr_vector<euf::snode> const* stabs = get_stabilizers(re);
if (!stabs || stabs->empty())
return nullptr;
seq_util& seq = m_sg.get_seq_util();
ast_manager& m = m_sg.get_manager();
euf::snode* filtered_union = nullptr;
for (euf::snode* s : *stabs) {
if (!s)
continue;
// Keep only stabilizers whose language cannot start with excluded_char
euf::snode* d = m_sg.brzozowski_deriv(s, excluded_char);
if (d && d->is_fail()) {
if (!filtered_union) {
filtered_union = s;
} else {
expr* e1 = filtered_union->get_expr();
expr* e2 = s->get_expr();
if (e1 && e2) {
expr_ref u(seq.re.mk_union(e1, e2), m);
filtered_union = m_sg.mk(u);
}
}
}
}
if (!filtered_union)
return nullptr;
expr* fe = filtered_union->get_expr();
if (!fe)
return nullptr;
expr_ref star_expr(seq.re.mk_star(fe), m);
return m_sg.mk(star_expr);
}
// -----------------------------------------------------------------------
// Strengthened stabilizer construction with sub-cycle detection
// Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225)
// -----------------------------------------------------------------------
euf::snode* nseq_regex::strengthened_stabilizer(euf::snode* cycle_regex,
euf::snode* cycle_history) {
if (!cycle_regex || !cycle_history)
return nullptr;
// Flatten the history concat chain into a vector of character tokens.
euf::snode_vector tokens;
cycle_history->collect_tokens(tokens);
if (tokens.empty())
return nullptr;
seq_util& seq = m_sg.get_seq_util();
ast_manager& m = m_sg.get_manager();
// Replay tokens on the cycle regex, detecting sub-cycles.
// A sub-cycle is detected when the derivative returns to cycle_regex.
svector<std::pair<unsigned, unsigned>> sub_cycles;
unsigned cycle_start = 0;
euf::snode* current_re = cycle_regex;
for (unsigned i = 0; i < tokens.size(); ++i) {
euf::snode* tok = tokens[i];
if (!tok)
return nullptr;
euf::snode* deriv = m_sg.brzozowski_deriv(current_re, tok);
if (!deriv)
return nullptr;
// Sub-cycle: derivative returned to the cycle entry regex
if (deriv == cycle_regex ||
(deriv->get_expr() && cycle_regex->get_expr() &&
deriv->get_expr() == cycle_regex->get_expr())) {
sub_cycles.push_back(std::make_pair(cycle_start, i + 1));
cycle_start = i + 1;
current_re = cycle_regex;
} else {
current_re = deriv;
}
}
// Remaining tokens that don't complete a sub-cycle
if (cycle_start < tokens.size())
sub_cycles.push_back(std::make_pair(cycle_start, tokens.size()));
if (sub_cycles.empty())
return nullptr;
// Build a stabilizer body for each sub-cycle.
// body = to_re(t0) · [filteredStar(R1, t1)] · to_re(t1) · ... · to_re(t_{n-1})
euf::snode* overall_union = nullptr;
for (auto const& sc : sub_cycles) {
unsigned start = sc.first;
unsigned end = sc.second;
if (start >= end)
continue;
euf::snode* re_state = cycle_regex;
euf::snode* body = nullptr;
for (unsigned i = start; i < end; ++i) {
euf::snode* tok = tokens[i];
if (!tok)
break;
// Insert filtered stabilizer star before each token after the first
if (i > start) {
euf::snode* filtered = get_filtered_stabilizer_star(re_state, tok);
if (filtered) {
expr* fe = filtered->get_expr();
if (fe) {
if (!body) {
body = filtered;
} else {
expr* be = body->get_expr();
if (be) {
expr_ref cat(seq.re.mk_concat(be, fe), m);
body = m_sg.mk(cat);
}
}
}
}
}
// Convert char token to regex: to_re(unit(tok))
expr* tok_expr = tok->get_expr();
if (!tok_expr)
break;
expr_ref unit_str(seq.str.mk_unit(tok_expr), m);
expr_ref tok_re(seq.re.mk_to_re(unit_str), m);
euf::snode* tok_re_sn = m_sg.mk(tok_re);
if (!body) {
body = tok_re_sn;
} else {
expr* be = body->get_expr();
expr* te = tok_re_sn->get_expr();
if (be && te) {
expr_ref cat(seq.re.mk_concat(be, te), m);
body = m_sg.mk(cat);
}
}
// Advance the regex state
euf::snode* deriv = m_sg.brzozowski_deriv(re_state, tok);
if (!deriv)
break;
re_state = deriv;
}
if (!body)
continue;
if (!overall_union) {
overall_union = body;
} else {
expr* oe = overall_union->get_expr();
expr* be = body->get_expr();
if (oe && be) {
expr_ref u(seq.re.mk_union(oe, be), m);
overall_union = m_sg.mk(u);
}
}
}
return overall_union;
}
// -----------------------------------------------------------------------
// Stabilizer-based subsumption (enhanced)
// Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386)
// -----------------------------------------------------------------------
bool nseq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) {
if (!mem.m_str || !mem.m_regex)
return false;
euf::snode* stab = stabilizer_from_cycle(cycle, mem.m_regex);
if (!stab)
// 1. Leading token must be a variable
euf::snode* first = mem.m_str->first();
if (!first || !first->is_var())
return false;
// A constraint x ∈ R is subsumed when R ⊆ stab.
// For the simple case where cycle == current regex,
// R ⊆ R* is always true (since R* accepts everything R does, and more).
// This handles the common idempotent cycle case.
if (cycle == mem.m_regex)
return true;
// 2. Must have stabilizers for the regex
if (!has_stabilizers(mem.m_regex))
return false;
// More sophisticated subsumption checks (regex containment)
// would require a regex inclusion decision procedure.
// For now, only handle the pointer-equality case.
return false;
// 3. Build stabStar = star(union(all stabilizers for this regex))
euf::snode* stab_union = get_stabilizer_union(mem.m_regex);
if (!stab_union)
return false;
seq_util& seq = m_sg.get_seq_util();
ast_manager& mgr = m_sg.get_manager();
expr* su_expr = stab_union->get_expr();
if (!su_expr)
return false;
expr_ref stab_star(seq.re.mk_star(su_expr), mgr);
euf::snode* stab_star_sn = m_sg.mk(stab_star);
if (!stab_star_sn)
return false;
// 4. Collect all primitive regex constraints on variable `first`
euf::snode* x_range = collect_primitive_regex_intersection(first, node);
if (!x_range)
return false;
// 5. Check L(x_range) ⊆ L(stab_star)
lbool result = is_language_subset(x_range, stab_star_sn);
return result == l_true;
}
}

View file

@ -31,17 +31,119 @@ Author:
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
#include "util/uint_set.h"
#include "util/lbool.h"
namespace smt {
class nseq_regex {
euf::sgraph& m_sg;
// -----------------------------------------------------------------
// Stabilizer store (non-backtrackable, persists across solve() calls)
// Mirrors ZIPT Environment.stabilizers / selfStabilizing
// (Environment.cs:36-37)
// -----------------------------------------------------------------
// Maps regex snode id → list of stabilizer snodes.
// Each regex may accumulate multiple stabilizers from different
// cycle detections. The list is deduplicated by pointer equality.
u_map<ptr_vector<euf::snode>> m_stabilizers;
// Set of regex snode ids that are self-stabilizing, i.e., the
// stabilizer for the regex is the regex itself (e.g., r*).
// Mirrors ZIPT Environment.selfStabilizing (Environment.cs:37)
uint_set m_self_stabilizing;
// -----------------------------------------------------------------
// BFS emptiness check helpers (private)
// -----------------------------------------------------------------
// Collect character boundary points from range predicates and
// to_re string literals in a regex. Boundaries partition the
// alphabet into equivalence classes where all characters in
// the same class produce identical derivatives.
void collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const;
// Build a set of representative character snodes, one per
// alphabet equivalence class, derived from the boundary points
// of the given regex.
void get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps);
public:
nseq_regex(euf::sgraph& sg) : m_sg(sg) {}
euf::sgraph& sg() { return m_sg; }
// -----------------------------------------------------------------
// Stabilizer store API
// Mirrors ZIPT Environment stabilizer management
// (Environment.cs:114-146)
// -----------------------------------------------------------------
// Reset all stabilizer data. Called at the start of each solve()
// invocation if fresh stabilizer state is desired.
// Note: stabilizers persist across depth-bound iterations by default;
// only call this to clear accumulated state.
void reset_stabilizers();
// Add a stabilizer for a regex. De-duplicates by pointer equality.
// Mirrors ZIPT Environment.AddStabilizer (Environment.cs:114-123).
void add_stabilizer(euf::snode* regex, euf::snode* stabilizer);
// Get the union of all stabilizers registered for a regex.
// Returns a single re.union snode combining all stabilizers,
// or nullptr if no stabilizers exist for the regex.
// Mirrors ZIPT Environment.GetStabilizerUnion (Environment.cs:125-128).
euf::snode* get_stabilizer_union(euf::snode* regex);
// Check if any stabilizers have been registered for a regex.
bool has_stabilizers(euf::snode* regex) const;
// Get raw stabilizer list for a regex (read-only).
// Returns nullptr if no stabilizers exist.
ptr_vector<euf::snode> const* get_stabilizers(euf::snode* regex) const;
// Mark a regex as self-stabilizing (stabilizer == regex itself).
// Mirrors ZIPT Environment.SetSelfStabilizing (Environment.cs:143-146).
void set_self_stabilizing(euf::snode* regex);
// Check if a regex is marked as self-stabilizing.
// Mirrors ZIPT Environment.IsSelfStabilizing (Environment.cs:134-141).
bool is_self_stabilizing(euf::snode* regex) const;
// -----------------------------------------------------------------
// Self-stabilizing auto-detection and propagation through derivatives
// -----------------------------------------------------------------
// Determine if a regex is inherently self-stabilizing based on its
// structure. Returns true for:
// - R* (Kleene star): D(c, R*) = D(c,R)·R*, so R* is its own
// stabilizer regardless of the character.
// - Σ* (full_seq): D(c, Σ*) = Σ*, trivially self-stabilizing.
// - ∅ (fail/empty language): no live derivatives, trivially stable.
// - Complement of full_seq (~Σ* = ∅): also trivially stable.
// Does NOT mark the snode; call set_self_stabilizing to persist.
bool compute_self_stabilizing(euf::snode* regex) const;
// After computing a derivative of parent, propagate the self-
// stabilizing flag to the derivative result if warranted.
// Applies structural rules:
// - If parent is R* → derivative is always self-stabilizing
// (derivative has the form D(c,R)·R* which contains the R* tail).
// - If parent is R·S and S is self-stabilizing → derivative may
// inherit from the self-stabilizing tail.
// - If parent is R|S and both are self-stabilizing → derivative is.
// - If parent is R∩S and both are self-stabilizing → derivative is.
// - If parent is ~R and R is self-stabilizing → derivative is.
// Updates the internal self-stabilizing set for the derivative.
void propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv);
// Convenience: compute derivative and propagate self-stabilizing flags.
// Equivalent to calling derivative() followed by
// propagate_self_stabilizing().
euf::snode* derivative_with_propagation(euf::snode* re, euf::snode* elem);
// -----------------------------------------------------------------
// Basic regex predicates
// -----------------------------------------------------------------
@ -51,6 +153,36 @@ namespace smt {
// derived emptiness (e.g., union of empties, concat with empty).
bool is_empty_regex(euf::snode* re) const;
// BFS emptiness check over the Brzozowski derivative automaton.
// Explores reachable derivative states using representative
// characters from each alphabet equivalence class.
// l_true — regex is definitely empty (no accepting states reachable)
// l_false — regex is definitely non-empty (found a nullable state)
// l_undef — inconclusive (hit exploration bound or failed derivative)
// max_states caps the number of explored states to prevent blowup.
lbool is_empty_bfs(euf::snode* re, unsigned max_states = 10000);
// Check emptiness of the intersection of multiple regexes.
// Uses BFS over the product of Brzozowski derivative automata.
// l_true — intersection is definitely empty
// l_false — intersection is definitely non-empty
// l_undef — inconclusive (hit exploration bound)
// Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
lbool check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
unsigned max_states = 10000);
// Check if L(subset_re) ⊆ L(superset_re).
// Computed as: subset_re ∩ complement(superset_re) = ∅.
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
lbool is_language_subset(euf::snode* subset_re, euf::snode* superset_re);
// Collect all primitive regex constraints on variable `var` from
// the node's str_mem list and return their intersection as a
// single regex snode (using re.inter).
// Returns nullptr if no primitive constraints found.
euf::snode* collect_primitive_regex_intersection(
euf::snode* var, seq::nielsen_node const& node);
// check if regex is the full language (Σ* / re.all)
bool is_full_regex(euf::snode* re) const {
return re && re->is_full_seq();
@ -76,10 +208,23 @@ namespace smt {
return m_sg.brzozowski_deriv(re, elem);
}
// check if all minterms of the regex produce the same Brzozowski
// derivative ("uniform derivative"). When this holds, the derivative
// is independent of the character value: for any character c,
// deriv(regex, c) = result. This enables deterministic consumption
// of symbolic (variable) characters without branching.
// Returns the uniform derivative if found, nullptr otherwise.
// Mirrors ZIPT's SimplifyCharRegex uniform-derivative fast path.
euf::snode* try_uniform_derivative(euf::snode* regex);
// compute derivative of a str_mem constraint: advance past one character.
// the string side is shortened by drop_first and the regex is derived.
// Propagates self-stabilizing flags from the parent regex to the derivative.
seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) {
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem);
euf::snode* parent_re = mem.m_regex;
euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, elem);
if (deriv)
propagate_self_stabilizing(parent_re, deriv);
euf::snode* new_str = m_sg.drop_first(mem.m_str);
return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep);
}
@ -175,11 +320,42 @@ namespace smt {
euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex,
euf::snode* current_regex);
// Strengthened stabilizer construction with sub-cycle detection.
// Replays the consumed character tokens from cycle_history on the
// cycle regex, detecting sub-cycles (where the derivative loops
// back to the original regex). For each sub-cycle, builds a
// stabilizer from the interleaved character tokens and filtered
// sub-stabilizers.
// Returns a union of all sub-cycle stabilizer bodies, or nullptr
// if no non-trivial stabilizer can be built.
// Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225).
euf::snode* strengthened_stabilizer(euf::snode* cycle_regex,
euf::snode* cycle_history);
// Get filtered stabilizer star: for regex state re, retrieve
// existing stabilizers, filter out those whose language can
// start with any character in excluded_char, and wrap the
// remaining in star(union(...)).
// Returns nullptr (or empty-equivalent) if no valid stabilizers.
// Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243).
euf::snode* get_filtered_stabilizer_star(euf::snode* re,
euf::snode* excluded_char);
// Extract the cycle portion of a str_mem's history by comparing
// the current history with an ancestor's history length.
// Returns the sub-sequence of tokens consumed since the ancestor,
// or nullptr if the history did not advance.
euf::snode* extract_cycle_history(seq::str_mem const& current,
seq::str_mem const& ancestor);
// try to subsume a str_mem constraint using stabilizer-based
// reasoning: if extract_cycle finds a cycle, check whether
// the current regex is already covered by the stabilizer.
// reasoning. Enhanced version: checks if the leading variable's
// language (intersection of all its primitive regex constraints)
// is a subset of star(union(stabilizers)) for the current regex.
// Falls back to cycle-based pointer equality check.
// returns true if the constraint can be dropped.
bool try_subsume(seq::str_mem const& mem);
// Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386).
bool try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node);
};
}

View file

@ -21,6 +21,7 @@ Author:
#include "smt/seq/seq_nielsen.h"
#include "smt/seq/seq_parikh.h"
#include "smt/nseq_regex.h"
#include "ast/arith_decl_plugin.h"
#include "ast/ast_pp.h"
#include "ast/rewriter/seq_rewriter.h"
@ -206,6 +207,19 @@ namespace seq {
m_var_lb.insert(kv.m_key, kv.m_value);
for (auto const& kv : parent.m_var_ub)
m_var_ub.insert(kv.m_key, kv.m_value);
// clone regex occurrence tracking
m_regex_occurrence = parent.m_regex_occurrence;
}
bool nielsen_node::track_regex_occurrence(unsigned regex_id, unsigned mem_id) {
auto key = std::make_pair(regex_id, mem_id);
auto it = m_regex_occurrence.find(key);
if (it != m_regex_occurrence.end()) {
// Already seen — cycle detected
return true;
}
m_regex_occurrence[key] = m_id;
return false;
}
void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) {
@ -499,11 +513,13 @@ namespace seq {
m_sg(sg),
m_solver(solver),
m_parikh(alloc(seq_parikh, sg)),
m_nseq_regex(alloc(smt::nseq_regex, sg)),
m_len_vars(sg.get_manager()) {
}
nielsen_graph::~nielsen_graph() {
dealloc(m_parikh);
dealloc(m_nseq_regex);
reset();
}
@ -1670,6 +1686,91 @@ namespace seq {
}
}
// consume symbolic characters (s_unit tokens) via uniform derivative
// check. When all minterms of the regex produce the same Brzozowski
// derivative, the derivative is independent of the character value
// and we can deterministically consume the token. Forward direction
// only (matches ZIPT history tracking convention).
//
// Extended: when uniform derivative fails but the token has a char_range
// constraint (from apply_regex_var_split), check if the char_range is a
// subset of a single minterm's character class. If so, the derivative
// is deterministic for that token.
// Mirrors ZIPT StrMem.SimplifyCharRegex lines 96-117.
for (str_mem& mem : m_str_mem) {
if (!mem.m_str || !mem.m_regex)
continue;
while (mem.m_str && !mem.m_str->is_empty()) {
euf::snode* tok = mem.m_str->first();
if (!tok || !tok->is_unit())
break;
// compute minterms and check for uniform derivative
euf::snode_vector minterms;
sg.compute_minterms(mem.m_regex, minterms);
if (minterms.empty())
break;
euf::snode* uniform = nullptr;
bool is_uniform = true;
for (euf::snode* mt : minterms) {
if (!mt || mt->is_fail())
continue;
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
if (!deriv) { is_uniform = false; break; }
if (!uniform) {
uniform = deriv;
} else if (uniform->id() != deriv->id()) {
is_uniform = false; break;
}
}
if (is_uniform && uniform) {
if (uniform->is_fail()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::regex;
return simplify_result::conflict;
}
mem.m_str = sg.drop_left(mem.m_str, 1);
mem.m_regex = uniform;
mem.m_history = sg.mk_concat(mem.m_history, tok);
continue;
}
// Uniform derivative failed — try char_range subset approach.
// If the symbolic char has a char_range constraint and that
// range is a subset of exactly one minterm's character class,
// we can deterministically take that minterm's derivative.
if (m_char_ranges.contains(tok->id()) && g.m_parikh) {
char_set const& cs = m_char_ranges[tok->id()];
if (!cs.is_empty()) {
euf::snode* matching_deriv = nullptr;
bool found = false;
for (euf::snode* mt : minterms) {
if (!mt || mt->is_fail()) continue;
if (!mt->get_expr()) continue;
char_set mt_cs = g.m_parikh->minterm_to_char_set(mt->get_expr());
if (cs.is_subset(mt_cs)) {
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
if (!deriv) { found = false; break; }
if (deriv->is_fail()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::regex;
return simplify_result::conflict;
}
matching_deriv = deriv;
found = true;
break;
}
}
if (found && matching_deriv) {
mem.m_str = sg.drop_left(mem.m_str, 1);
mem.m_regex = matching_deriv;
mem.m_history = sg.mk_concat(mem.m_history, tok);
continue;
}
}
}
break;
}
}
// check for regex memberships that are immediately infeasible
for (str_mem& mem : m_str_mem) {
if (!mem.m_str || !mem.m_regex)
@ -1692,6 +1793,23 @@ namespace seq {
if (wi < m_str_mem.size())
m_str_mem.shrink(wi);
// Regex widening: for each remaining str_mem, overapproximate
// the string by replacing variables with their regex intersection
// and check if the result intersected with the target regex is empty.
// Detects infeasible constraints that would otherwise require
// expensive exploration. Mirrors ZIPT NielsenNode.CheckRegexWidening.
if (g.m_nseq_regex) {
for (str_mem const& mem : m_str_mem) {
if (!mem.m_str || !mem.m_regex)
continue;
if (g.check_regex_widening(*this, mem.m_str, mem.m_regex)) {
m_is_general_conflict = true;
m_reason = backtrack_reason::regex;
return simplify_result::conflict;
}
}
}
// IntBounds initialization: derive per-variable Parikh length bounds from
// remaining regex memberships and add to m_int_constraints.
init_var_bounds_from_mems();
@ -2075,6 +2193,15 @@ namespace seq {
}
if (sr == simplify_result::satisfied || node->is_satisfied()) {
// Before declaring SAT, check leaf-node regex feasibility:
// for each variable with multiple regex constraints, verify
// that the intersection of all its regexes is non-empty.
// Mirrors ZIPT NielsenNode.CheckRegex.
if (!check_leaf_regex(*node)) {
node->set_general_conflict(true);
node->set_reason(backtrack_reason::regex);
return search_result::unsat;
}
m_sat_node = node;
m_sat_path = cur_path;
return search_result::sat;
@ -3132,9 +3259,10 @@ namespace seq {
// Modifier: apply_star_intr
// Star introduction: for a str_mem x·s ∈ R where a cycle is detected
// (backedge exists), introduce a stabilizer constraint x ∈ base*.
// Creates a child that adds x ∈ base* membership and constrains
// the remainder not to start with base.
// mirrors ZIPT's StarIntrModifier
// Creates a child that splits x into pr·po, adds pr ∈ base* and
// po·s ∈ R, with a blocking constraint on po.
// Enhanced to use strengthened_stabilizer and register via add_stabilizer.
// mirrors ZIPT's StarIntrModifier (StarIntrModifier.cs:22-85)
// -----------------------------------------------------------------------
bool nielsen_graph::apply_star_intr(nielsen_node* node) {
@ -3149,41 +3277,86 @@ namespace seq {
euf::snode* first = mem.m_str->first();
if (!first || !first->is_var()) continue;
// Introduce a fresh variable z constrained by z ∈ R*,
// replacing x → z·fresh_post. This breaks the cycle because
// z is a new variable that won't trigger star_intr again.
euf::snode* x = first;
euf::snode* z = mk_fresh_var();
euf::snode* fresh_post = mk_fresh_var();
euf::snode* str_tail = m_sg.drop_first(mem.m_str);
// Build z ∈ R* membership: the star of the current regex
seq_util& seq = m_sg.get_seq_util();
ast_manager& mgr = m_sg.get_manager();
expr* re_expr = mem.m_regex->get_expr();
if (!re_expr)
continue;
expr_ref star_re(seq.re.mk_star(re_expr), seq.get_manager());
// Determine the stabilizer base:
// 1. If self-stabilizing, use regex itself as stabilizer
// 2. Otherwise, try strengthened_stabilizer from cycle history
// 3. Fall back to simple star(R)
euf::snode* stab_base = nullptr;
if (m_nseq_regex && m_nseq_regex->is_self_stabilizing(mem.m_regex)) {
stab_base = mem.m_regex;
}
else if (m_nseq_regex && mem.m_history) {
stab_base = m_nseq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history);
}
if (!stab_base) {
// Fall back: simple star of the cycle regex
stab_base = mem.m_regex;
}
// Register the stabilizer
if (m_nseq_regex)
m_nseq_regex->add_stabilizer(mem.m_regex, stab_base);
// Check if stabilizer equals regex → self-stabilizing
if (m_nseq_regex && stab_base == mem.m_regex)
m_nseq_regex->set_self_stabilizing(mem.m_regex);
// Build stab_star = star(stab_base)
expr* stab_expr = stab_base->get_expr();
if (!stab_expr)
continue;
expr_ref star_re(seq.re.mk_star(stab_expr), mgr);
euf::snode* star_sn = m_sg.mk(star_re);
if (!star_sn)
continue;
// Try subsumption: if L(x_range) ⊆ L(stab_star), drop the constraint
if (m_nseq_regex && m_nseq_regex->try_subsume(mem, *node))
continue;
// Create child: x → pr · po
euf::snode* pr = mk_fresh_var();
euf::snode* po = mk_fresh_var();
euf::snode* str_tail = m_sg.drop_first(mem.m_str);
nielsen_node* child = mk_child(node);
// Add membership: z ∈ R* (stabilizer constraint)
child->add_str_mem(str_mem(z, star_sn, mem.m_history, next_mem_id(), mem.m_dep));
// Add membership: pr ∈ stab_base* (stabilizer constraint)
child->add_str_mem(str_mem(pr, star_sn, mem.m_history, next_mem_id(), mem.m_dep));
// Add remaining membership: fresh_post · tail ∈ R
euf::snode* post_tail = str_tail->is_empty() ? fresh_post : m_sg.mk_concat(fresh_post, str_tail);
child->add_str_mem(str_mem(post_tail, mem.m_regex, mem.m_history, next_mem_id(), mem.m_dep));
// Add remaining membership: po · tail ∈ R (same regex, trimmed history)
euf::snode* post_tail = str_tail->is_empty() ? po : m_sg.mk_concat(po, str_tail);
child->add_str_mem(str_mem(post_tail, mem.m_regex, nullptr, next_mem_id(), mem.m_dep));
// Substitute x → z · fresh_post
// Blocking constraint: po must NOT start with stab_base
// po ∈ complement(non_nullable(stab_base) · Σ*)
// This prevents redundant unrolling.
if (!stab_base->is_nullable()) {
sort* str_sort = seq.str.mk_string_sort();
expr_ref full_seq(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr);
expr_ref base_then_all(seq.re.mk_concat(stab_expr, full_seq), mgr);
expr_ref block_re(seq.re.mk_complement(base_then_all), mgr);
euf::snode* block_sn = m_sg.mk(block_re);
if (block_sn)
child->add_str_mem(str_mem(po, block_sn, nullptr, next_mem_id(), mem.m_dep));
}
// Substitute x → pr · po
nielsen_edge* e = mk_edge(node, child, false);
euf::snode* replacement = m_sg.mk_concat(z, fresh_post);
euf::snode* replacement = m_sg.mk_concat(pr, po);
nielsen_subst s(x, replacement, mem.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
// Do not re-set backedge — z is fresh, so star_intr won't fire again
return true;
}
return false;
@ -4101,5 +4274,160 @@ namespace seq {
return mdl.get() != nullptr;
}
}
// -----------------------------------------------------------------------
// Regex widening: overapproximate string and check intersection emptiness
// Mirrors ZIPT NielsenNode.CheckRegexWidening (NielsenNode.cs:1350-1380)
// -----------------------------------------------------------------------
bool nielsen_graph::check_regex_widening(nielsen_node const& node,
euf::snode* str, euf::snode* regex) {
if (!str || !regex || !m_nseq_regex)
return false;
// Only apply to ground regexes with non-trivial strings
if (!regex->is_ground())
return false;
seq_util seq(m_sg.get_manager());
ast_manager& mgr = m_sg.get_manager();
// Build the overapproximation regex for the string.
// Variables → intersection of their primitive regex constraints (or Σ*)
// Symbolic chars → re.range from char_ranges (or full_char)
// Concrete chars → to_re(unit(c))
euf::snode_vector tokens;
str->collect_tokens(tokens);
if (tokens.empty())
return false;
euf::snode* approx = nullptr;
for (euf::snode* tok : tokens) {
euf::snode* tok_re = nullptr;
if (tok->is_char()) {
// Concrete character → to_re(unit(c))
expr* te = tok->get_expr();
if (!te) return false;
expr_ref unit_s(seq.str.mk_unit(te), mgr);
expr_ref tre(seq.re.mk_to_re(unit_s), mgr);
tok_re = m_sg.mk(tre);
}
else if (tok->is_var()) {
// Variable → intersection of primitive regex constraints, or Σ*
euf::snode* x_range = m_nseq_regex->collect_primitive_regex_intersection(tok, node);
if (x_range) {
tok_re = x_range;
} else {
// Unconstrained variable: approximate as Σ*
sort* str_sort = seq.str.mk_string_sort();
expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr);
tok_re = m_sg.mk(all_re);
}
}
else if (tok->is_unit()) {
// Symbolic char — try to get char_range
if (node.char_ranges().contains(tok->id())) {
char_set const& cs = node.char_ranges()[tok->id()];
if (!cs.is_empty()) {
// Build union of re.range for each interval
euf::snode* range_re = nullptr;
for (auto const& r : cs.ranges()) {
expr_ref lo(seq.mk_char(r.m_lo), mgr);
expr_ref hi(seq.mk_char(r.m_hi - 1), mgr);
expr_ref rng(seq.re.mk_range(
seq.str.mk_string(zstring(r.m_lo)),
seq.str.mk_string(zstring(r.m_hi - 1))), mgr);
euf::snode* rng_sn = m_sg.mk(rng);
if (!range_re) {
range_re = rng_sn;
} else {
expr_ref u(seq.re.mk_union(range_re->get_expr(), rng_sn->get_expr()), mgr);
range_re = m_sg.mk(u);
}
}
tok_re = range_re;
}
}
if (!tok_re) {
// Unconstrained symbolic char: approximate as full_char (single char, any value)
sort* str_sort = seq.str.mk_string_sort();
expr_ref fc(seq.re.mk_full_char(seq.re.mk_re(str_sort)), mgr);
tok_re = m_sg.mk(fc);
}
}
else {
// Unknown token type — approximate as Σ*
sort* str_sort = seq.str.mk_string_sort();
expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr);
tok_re = m_sg.mk(all_re);
}
if (!tok_re)
return false;
if (!approx) {
approx = tok_re;
} else {
expr* ae = approx->get_expr();
expr* te = tok_re->get_expr();
if (!ae || !te) return false;
expr_ref cat(seq.re.mk_concat(ae, te), mgr);
approx = m_sg.mk(cat);
}
}
if (!approx)
return false;
// Check intersection(approx, regex) for emptiness
// Build intersection regex
expr* ae = approx->get_expr();
expr* re = regex->get_expr();
if (!ae || !re)
return false;
expr_ref inter(seq.re.mk_inter(ae, re), mgr);
euf::snode* inter_sn = m_sg.mk(inter);
if (!inter_sn)
return false;
lbool result = m_nseq_regex->is_empty_bfs(inter_sn, 5000);
return result == l_true;
}
// -----------------------------------------------------------------------
// Leaf-node regex intersection emptiness check
// Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329)
// -----------------------------------------------------------------------
bool nielsen_graph::check_leaf_regex(nielsen_node const& node) {
if (!m_nseq_regex)
return true;
// Group str_mem constraints by variable (primitive constraints only)
u_map<ptr_vector<euf::snode>> var_regexes;
for (auto const& mem : node.str_mems()) {
if (!mem.m_str || !mem.m_regex)
continue;
if (!mem.is_primitive())
continue;
euf::snode* first = mem.m_str->first();
if (!first || !first->is_var())
continue;
auto& list = var_regexes.insert_if_not_there(first->id(), ptr_vector<euf::snode>());
list.push_back(mem.m_regex);
}
// For each variable with 2+ regex constraints, check intersection non-emptiness
for (auto& [var_id, regexes] : var_regexes) {
if (regexes.size() < 2)
continue;
lbool result = m_nseq_regex->check_intersection_emptiness(regexes, 5000);
if (result == l_true) {
// Intersection is empty — infeasible
return false;
}
}
return true;
}
}

View file

@ -244,6 +244,10 @@ Author:
#include <map>
#include "model/model.h"
namespace smt {
class nseq_regex; // forward declaration (defined in smt/nseq_regex.h)
}
namespace seq {
// forward declarations
@ -538,6 +542,12 @@ namespace seq {
// asserted when this node's solver scope is entered.
unsigned m_parent_ic_count = 0;
// RegexOccurrence: maps (regex snode id, str_mem id) → node id where
// this regex was last seen on the current DFS path.
// Used for precise cycle detection with history-length-based progress.
// Mirrors ZIPT LocalInfo.RegexOccurrence (LocalInfo.cs:34)
std::map<std::pair<unsigned, unsigned>, unsigned> m_regex_occurrence;
public:
nielsen_node(nielsen_graph* graph, unsigned id);
@ -628,6 +638,15 @@ namespace seq {
// clone constraints from a parent node
void clone_from(nielsen_node const& parent);
// Regex occurrence tracking: record current regex state for cycle detection.
// Returns true if a cycle is detected (same regex seen before on this path).
bool track_regex_occurrence(unsigned regex_id, unsigned mem_id);
// Get the regex occurrence map (for undo on backtrack).
std::map<std::pair<unsigned, unsigned>, unsigned> const& regex_occurrence() const {
return m_regex_occurrence;
}
// apply a substitution to all constraints
void apply_subst(euf::sgraph& sg, nielsen_subst const& s);
@ -739,6 +758,11 @@ namespace seq {
// Parikh image filter: generates modular length constraints from regex
// memberships. Allocated in the constructor; owned by this graph.
seq_parikh* m_parikh = nullptr;
// Regex membership module: stabilizers, emptiness checks, language
// inclusion, derivatives. Allocated in the constructor; owned by this graph.
smt::nseq_regex* m_nseq_regex = nullptr;
// -----------------------------------------------
// Modification counter for substitution length tracking.
// mirrors ZIPT's LocalInfo.CurrentModificationCnt
@ -871,9 +895,28 @@ namespace seq {
// Caller takes ownership of the returned model pointer.
bool solve_sat_path_ints(model_ref& mdl);
// accessor for the nseq_regex module
smt::nseq_regex* nseq_regex_module() const { return m_nseq_regex; }
private:
search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);
// Regex widening: overapproximate `str` by replacing variables with
// the intersection of their primitive regex constraints (or Σ* if
// unconstrained), replacing symbolic chars with their char ranges,
// then checking if the approximation intersected with `regex` is empty.
// Returns true if widening detects infeasibility (UNSAT).
// Mirrors ZIPT NielsenNode.CheckRegexWidening (NielsenNode.cs:1350-1380)
bool check_regex_widening(nielsen_node const& node,
euf::snode* str, euf::snode* regex);
// Check regex feasibility at a leaf node: for each variable with
// multiple primitive regex constraints, check that the intersection
// of all its regexes is non-empty.
// Returns true if all constraints are feasible.
// Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329)
bool check_leaf_regex(nielsen_node const& node);
// Apply the Parikh image filter to a node: generate modular length
// constraints from regex memberships and append them to the node's
// int_constraints. Also performs a lightweight feasibility pre-check;

View file

@ -449,6 +449,7 @@ static void test_sgraph_factory() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
// mk_var
euf::snode* x = sg.mk_var(symbol("x"));
@ -463,7 +464,7 @@ static void test_sgraph_factory() {
SASSERT(a->length() == 1);
// mk_empty
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
SASSERT(e && e->is_empty());
SASSERT(e->is_nullable());
SASSERT(e->length() == 0);
@ -499,6 +500,7 @@ static void test_sgraph_indexing() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
euf::snode* a = sg.mk_char('A');
euf::snode* b = sg.mk_char('B');
@ -533,7 +535,7 @@ static void test_sgraph_indexing() {
SASSERT(a->at(1) == nullptr);
// empty: at(0) is nullptr
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
SASSERT(e->at(0) == nullptr);
euf::snode_vector empty_tokens;
e->collect_tokens(empty_tokens);
@ -547,6 +549,7 @@ static void test_sgraph_drop() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
euf::snode* a = sg.mk_char('A');
euf::snode* b = sg.mk_char('B');
@ -591,7 +594,7 @@ static void test_sgraph_drop() {
SASSERT(e->is_empty());
// drop from empty: no change
euf::snode* ee = sg.drop_first(sg.mk_empty());
euf::snode* ee = sg.drop_first(sg.mk_empty_seq(seq.str.mk_string_sort()));
SASSERT(ee->is_empty());
}
@ -602,6 +605,7 @@ static void test_sgraph_subst() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
@ -624,7 +628,7 @@ static void test_sgraph_subst() {
SASSERT(same == xax);
// substitution of variable with empty
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
euf::snode* collapsed = sg.subst(xax, x, e);
SASSERT(collapsed->length() == 1); // just 'a' remains
SASSERT(collapsed == a);

View file

@ -91,14 +91,15 @@ static void test_nseq_simplification() {
std::cout << "test_nseq_simplification\n";
ast_manager m;
reg_decl_plugins(m);
seq_util su(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
nseq_basic_dummy_solver solver;
seq::nielsen_graph ng(sg, solver);
// Add a trivial equality: empty = empty
euf::snode* empty1 = sg.mk_empty();
euf::snode* empty2 = sg.mk_empty();
euf::snode* empty1 = sg.mk_empty_seq(su.str.mk_string_sort());
euf::snode* empty2 = sg.mk_empty_seq(su.str.mk_string_sort());
ng.add_str_eq(empty1, empty2);
@ -113,6 +114,7 @@ static void test_nseq_node_satisfied() {
std::cout << "test_nseq_node_satisfied\n";
ast_manager m;
reg_decl_plugins(m);
seq_util su(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
nseq_basic_dummy_solver solver;
@ -123,7 +125,7 @@ static void test_nseq_node_satisfied() {
SASSERT(node->is_satisfied());
// add a trivial equality
euf::snode* empty = sg.mk_empty();
euf::snode* empty = sg.mk_empty_seq(su.str.mk_string_sort());
seq::dep_tracker dep;
seq::str_eq eq(empty, empty, dep);
node->add_str_eq(eq);

View file

@ -16,6 +16,9 @@ Abstract:
#include "ast/euf/euf_egraph.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/nseq_regex.h"
#include "smt/seq/seq_nielsen.h"
#include "util/lbool.h"
#include "util/zstring.h"
#include <iostream>
// Test 1: nseq_regex instantiation
@ -66,9 +69,643 @@ static void test_nseq_regex_is_full() {
std::cout << " ok: re.all not recognized as empty\n";
}
// Test 4: strengthened_stabilizer — null safety
static void test_strengthened_stabilizer_null() {
std::cout << "test_strengthened_stabilizer_null\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
SASSERT(nr.strengthened_stabilizer(nullptr, nullptr) == nullptr);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
euf::snode* full_re = sg.mk(full_e);
SASSERT(nr.strengthened_stabilizer(full_re, nullptr) == nullptr);
SASSERT(nr.strengthened_stabilizer(nullptr, full_re) == nullptr);
std::cout << " ok\n";
}
// Test 5: strengthened_stabilizer — single char cycle on a*
// Regex a*, history = 'a'. D('a', a*) = a* (sub-cycle back to start).
// Stabilizer body should be to_re("a").
static void test_strengthened_stabilizer_single_char() {
std::cout << "test_strengthened_stabilizer_single_char\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
// Build a*
expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m);
euf::snode* re_star_a = sg.mk(star_a);
// Build history = char 'a' (single token, no concat needed)
euf::snode* tok_a = sg.mk_char('a');
euf::snode* history = tok_a;
euf::snode* result = nr.strengthened_stabilizer(re_star_a, history);
// Should produce a non-null stabilizer body (to_re("a"))
SASSERT(result != nullptr);
std::cout << " ok: a* with history 'a' -> non-null stabilizer\n";
}
// Test 6: strengthened_stabilizer — two-char cycle with sub-cycle
// Regex (ab)*, history = 'a', 'b'. D('a', (ab)*) = b(ab)*, D('b', b(ab)*) = (ab)*
// This should detect a sub-cycle and build a stabilizer body.
static void test_strengthened_stabilizer_two_char() {
std::cout << "test_strengthened_stabilizer_two_char\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
// Build (ab)*
expr_ref ab(su.re.mk_to_re(su.str.mk_string("ab")), m);
expr_ref star_ab(su.re.mk_star(ab), m);
euf::snode* re_star_ab = sg.mk(star_ab);
// Build history: concat(char_a, char_b) using string concat
euf::snode* tok_a = sg.mk_char('a');
euf::snode* tok_b = sg.mk_char('b');
euf::snode* history = sg.mk_concat(tok_a, tok_b);
euf::snode* result = nr.strengthened_stabilizer(re_star_ab, history);
// Should produce a non-null stabilizer body
SASSERT(result != nullptr);
std::cout << " ok: (ab)* with history 'ab' -> non-null stabilizer\n";
}
// Test 7: get_filtered_stabilizer_star — no stabilizers registered
static void test_filtered_stabilizer_star_empty() {
std::cout << "test_filtered_stabilizer_star_empty\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
euf::snode* full_re = sg.mk(full_e);
euf::snode* tok_a = sg.mk_char('a');
euf::snode* result = nr.get_filtered_stabilizer_star(full_re, tok_a);
SASSERT(result == nullptr);
std::cout << " ok: no stabilizers -> nullptr\n";
}
// Test 8: get_filtered_stabilizer_star — with registered stabilizer that passes filter
static void test_filtered_stabilizer_star_with_stab() {
std::cout << "test_filtered_stabilizer_star_with_stab\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
// Build a* as the regex state
expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m);
euf::snode* re_star_a = sg.mk(star_a);
// Register a stabilizer: to_re("b") — only accepts "b"
expr_ref stab_b(su.re.mk_to_re(su.str.mk_string("b")), m);
euf::snode* stab_b_sn = sg.mk(stab_b);
nr.add_stabilizer(re_star_a, stab_b_sn);
// Exclude char 'a': D('a', to_re("b")) should be fail
euf::snode* tok_a = sg.mk_char('a');
euf::snode* result = nr.get_filtered_stabilizer_star(re_star_a, tok_a);
// to_re("b") should pass the filter → result is star(to_re("b"))
SASSERT(result != nullptr);
SASSERT(result->is_star());
std::cout << " ok: filter keeps to_re('b') when excluding 'a'\n";
}
// Test 9: get_filtered_stabilizer_star — stabilizer filtered out
static void test_filtered_stabilizer_star_filtered() {
std::cout << "test_filtered_stabilizer_star_filtered\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
// Build a* as the regex state
expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m);
euf::snode* re_star_a = sg.mk(star_a);
// Register a stabilizer: to_re("a") — accepts "a"
expr_ref stab_a(su.re.mk_to_re(su.str.mk_string("a")), m);
euf::snode* stab_a_sn = sg.mk(stab_a);
nr.add_stabilizer(re_star_a, stab_a_sn);
// Exclude char 'a': D('a', to_re("a")) is NOT fail → filtered out
euf::snode* tok_a = sg.mk_char('a');
euf::snode* result = nr.get_filtered_stabilizer_star(re_star_a, tok_a);
SASSERT(result == nullptr);
std::cout << " ok: filter removes to_re('a') when excluding 'a'\n";
}
// Test 10: extract_cycle_history — basic extraction
static void test_extract_cycle_history_basic() {
std::cout << "test_extract_cycle_history_basic\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
euf::snode* full_re = sg.mk(full_e);
euf::snode* tok_a = sg.mk_char('a');
euf::snode* tok_b = sg.mk_char('b');
euf::snode* tok_c = sg.mk_char('c');
// Ancestor history: just 'a' (length 1)
euf::snode* anc_hist = tok_a;
// Current history: concat(concat(a, b), c) = a,b,c (length 3)
euf::snode* cur_hist = sg.mk_concat(sg.mk_concat(tok_a, tok_b), tok_c);
euf::snode* empty_str = sg.mk_empty_seq(str_sort);
seq::dep_tracker empty_dep;
seq::str_mem ancestor(empty_str, full_re, anc_hist, 0, empty_dep);
seq::str_mem current(empty_str, full_re, cur_hist, 0, empty_dep);
euf::snode* cycle = nr.extract_cycle_history(current, ancestor);
// Should return the last 2 tokens (b, c)
SASSERT(cycle != nullptr);
SASSERT(cycle->length() == 2);
std::cout << " ok: extracted cycle of length 2\n";
}
// Test 11: extract_cycle_history — null ancestor history
static void test_extract_cycle_history_null_ancestor() {
std::cout << "test_extract_cycle_history_null_ancestor\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
euf::snode* full_re = sg.mk(full_e);
euf::snode* tok_a = sg.mk_char('a');
euf::snode* tok_b = sg.mk_char('b');
euf::snode* cur_hist = sg.mk_concat(tok_a, tok_b);
euf::snode* empty_str = sg.mk_empty_seq(str_sort);
seq::dep_tracker empty_dep;
// Ancestor has no history (nullptr)
seq::str_mem ancestor(empty_str, full_re, nullptr, 0, empty_dep);
seq::str_mem current(empty_str, full_re, cur_hist, 0, empty_dep);
euf::snode* cycle = nr.extract_cycle_history(current, ancestor);
// With null ancestor history, entire current history is the cycle
SASSERT(cycle != nullptr);
SASSERT(cycle->length() == 2);
std::cout << " ok: null ancestor -> full history as cycle\n";
}
// Test 12: BFS emptiness — re.none (empty language) is empty
static void test_bfs_empty_none() {
std::cout << "test_bfs_empty_none\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
expr_ref none_e(su.re.mk_empty(re_sort), m);
euf::snode* none_re = sg.mk(none_e);
lbool result = nr.is_empty_bfs(none_re);
SASSERT(result == l_true);
std::cout << " ok: re.none -> l_true (empty)\n";
}
// Test 13: BFS emptiness — full_seq (Sigma*) is NOT empty
static void test_bfs_nonempty_full() {
std::cout << "test_bfs_nonempty_full\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
euf::snode* full_re = sg.mk(full_e);
lbool result = nr.is_empty_bfs(full_re);
SASSERT(result == l_false);
std::cout << " ok: full_seq -> l_false (non-empty)\n";
}
// Test 14: BFS emptiness — to_re("abc") is NOT empty
static void test_bfs_nonempty_to_re() {
std::cout << "test_bfs_nonempty_to_re\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
expr_ref to_re_abc(su.re.mk_to_re(su.str.mk_string("abc")), m);
euf::snode* re_abc = sg.mk(to_re_abc);
lbool result = nr.is_empty_bfs(re_abc);
SASSERT(result == l_false);
std::cout << " ok: to_re(\"abc\") -> l_false (non-empty)\n";
}
// Test 15: BFS emptiness — a* is NOT empty (accepts epsilon)
static void test_bfs_nonempty_star() {
std::cout << "test_bfs_nonempty_star\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m);
euf::snode* re_star_a = sg.mk(star_a);
lbool result = nr.is_empty_bfs(re_star_a);
SASSERT(result == l_false);
std::cout << " ok: a* -> l_false (non-empty, accepts epsilon)\n";
}
// Test 16: BFS emptiness — union(none, none) is empty
static void test_bfs_empty_union_of_empties() {
std::cout << "test_bfs_empty_union_of_empties\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
expr_ref none1(su.re.mk_empty(re_sort), m);
expr_ref none2(su.re.mk_empty(re_sort), m);
expr_ref union_e(su.re.mk_union(none1, none2), m);
euf::snode* re_union = sg.mk(union_e);
lbool result = nr.is_empty_bfs(re_union);
SASSERT(result == l_true);
std::cout << " ok: union(none, none) -> l_true (empty)\n";
}
// Test 17: BFS emptiness — re.range('a','z') is NOT empty
static void test_bfs_nonempty_range() {
std::cout << "test_bfs_nonempty_range\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
expr_ref lo(su.mk_char('a'), m);
expr_ref hi(su.mk_char('z'), m);
expr_ref range_e(su.re.mk_range(su.str.mk_unit(lo), su.str.mk_unit(hi)), m);
euf::snode* re_range = sg.mk(range_e);
lbool result = nr.is_empty_bfs(re_range);
SASSERT(result == l_false);
std::cout << " ok: range('a','z') -> l_false (non-empty)\n";
}
// Test 18: BFS emptiness — complement(full_seq) = empty
static void test_bfs_empty_complement_full() {
std::cout << "test_bfs_empty_complement_full\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
expr_ref comp_full(su.re.mk_complement(su.re.mk_full_seq(re_sort)), m);
euf::snode* re_comp = sg.mk(comp_full);
lbool result = nr.is_empty_bfs(re_comp);
SASSERT(result == l_true);
std::cout << " ok: ~full_seq -> l_true (empty)\n";
}
// Test 19: BFS emptiness — nullptr returns l_undef
static void test_bfs_null_safety() {
std::cout << "test_bfs_null_safety\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
lbool result = nr.is_empty_bfs(nullptr);
SASSERT(result == l_undef);
std::cout << " ok: nullptr -> l_undef\n";
}
// Test 20: BFS emptiness — max_states bound respected
static void test_bfs_bounded() {
std::cout << "test_bfs_bounded\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
// (a|b)+ requires at least one char; with max_states=1 should bail
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
expr_ref ab_union(su.re.mk_union(a_re, b_re), m);
expr_ref ab_plus(su.re.mk_plus(ab_union), m);
euf::snode* re_plus = sg.mk(ab_plus);
lbool result = nr.is_empty_bfs(re_plus, 1);
SASSERT(result == l_undef);
std::cout << " ok: (a|b)+ with max_states=1 -> l_undef (bounded)\n";
}
// -----------------------------------------------------------------------
// New tests for regex membership completion (Phase 1-4)
// -----------------------------------------------------------------------
// Test: char_set::is_subset
static void test_char_set_is_subset() {
std::cout << "test_char_set_is_subset\n";
// {a} ⊆ {a,b,c} = [97,100)
char_set cs1(char_range('a', 'b')); // {a}
char_set cs2(char_range('a', 'd')); // {a,b,c}
SASSERT(cs1.is_subset(cs2));
SASSERT(!cs2.is_subset(cs1));
// empty ⊆ anything
char_set empty;
SASSERT(empty.is_subset(cs1));
SASSERT(empty.is_subset(cs2));
// self ⊆ self
SASSERT(cs1.is_subset(cs1));
SASSERT(cs2.is_subset(cs2));
// disjoint: {x} not ⊆ {a}
char_set cs3(char_range('x', 'y'));
SASSERT(!cs3.is_subset(cs1));
std::cout << " ok\n";
}
// Test: stabilizer store basic operations
static void test_stabilizer_store_basic() {
std::cout << "test_stabilizer_store_basic\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
euf::snode* a_sn = sg.mk(a_re);
euf::snode* b_sn = sg.mk(b_re);
SASSERT(!nr.has_stabilizers(a_sn));
nr.add_stabilizer(a_sn, b_sn);
SASSERT(nr.has_stabilizers(a_sn));
SASSERT(nr.get_stabilizer_union(a_sn) == b_sn);
// dedup: adding same stabilizer again
nr.add_stabilizer(a_sn, b_sn);
auto* stabs = nr.get_stabilizers(a_sn);
SASSERT(stabs && stabs->size() == 1);
// reset
nr.reset_stabilizers();
SASSERT(!nr.has_stabilizers(a_sn));
std::cout << " ok\n";
}
// Test: self-stabilizing flag
static void test_self_stabilizing() {
std::cout << "test_self_stabilizing\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
euf::snode* a_sn = sg.mk(a_re);
SASSERT(!nr.is_self_stabilizing(a_sn));
nr.set_self_stabilizing(a_sn);
SASSERT(nr.is_self_stabilizing(a_sn));
// star should be detected as self-stabilizing
expr_ref star_a(su.re.mk_star(a_re), m);
euf::snode* star_sn = sg.mk(star_a);
SASSERT(nr.compute_self_stabilizing(star_sn));
std::cout << " ok\n";
}
// Test: check_intersection_emptiness — SAT case
static void test_check_intersection_sat() {
std::cout << "test_check_intersection_sat\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
// a* ∩ (a|b)* should be non-empty (both accept "a")
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
expr_ref star_a(su.re.mk_star(a_re), m);
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
expr_ref ab_union(su.re.mk_union(a_re, b_re), m);
expr_ref star_ab(su.re.mk_star(ab_union), m);
euf::snode* s1 = sg.mk(star_a);
euf::snode* s2 = sg.mk(star_ab);
ptr_vector<euf::snode> regexes;
regexes.push_back(s1);
regexes.push_back(s2);
lbool result = nr.check_intersection_emptiness(regexes);
SASSERT(result == l_false); // non-empty
std::cout << " ok: a* ∩ (a|b)* is non-empty\n";
}
// Test: check_intersection_emptiness — UNSAT case
static void test_check_intersection_unsat() {
std::cout << "test_check_intersection_unsat\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
// to_re("a") ∩ to_re("b") should be empty
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
euf::snode* s1 = sg.mk(a_re);
euf::snode* s2 = sg.mk(b_re);
ptr_vector<euf::snode> regexes;
regexes.push_back(s1);
regexes.push_back(s2);
lbool result = nr.check_intersection_emptiness(regexes);
SASSERT(result == l_true); // empty
std::cout << " ok: to_re(a) ∩ to_re(b) is empty\n";
}
// Test: is_language_subset — true case
static void test_is_language_subset_true() {
std::cout << "test_is_language_subset_true\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
// a* ⊆ (a|b)* should be true
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
expr_ref star_a(su.re.mk_star(a_re), m);
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
expr_ref ab_union(su.re.mk_union(a_re, b_re), m);
expr_ref star_ab(su.re.mk_star(ab_union), m);
euf::snode* subset = sg.mk(star_a);
euf::snode* superset = sg.mk(star_ab);
lbool result = nr.is_language_subset(subset, superset);
SASSERT(result == l_true);
std::cout << " ok: a* ⊆ (a|b)*\n";
}
// Test: is_language_subset — false case
static void test_is_language_subset_false() {
std::cout << "test_is_language_subset_false\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
// (a|b)* ⊄ a* should be false (b ∈ (a|b)* but b ∉ a*)
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
expr_ref star_a(su.re.mk_star(a_re), m);
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
expr_ref ab_union(su.re.mk_union(a_re, b_re), m);
expr_ref star_ab(su.re.mk_star(ab_union), m);
euf::snode* subset = sg.mk(star_ab);
euf::snode* superset = sg.mk(star_a);
lbool result = nr.is_language_subset(subset, superset);
SASSERT(result == l_false);
std::cout << " ok: (a|b)* ⊄ a*\n";
}
// Test: is_language_subset — trivial cases
static void test_is_language_subset_trivial() {
std::cout << "test_is_language_subset_trivial\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
// ∅ ⊆ anything = true
expr_ref none(su.re.mk_empty(su.re.mk_re(str_sort)), m);
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
euf::snode* empty_sn = sg.mk(none);
euf::snode* a_sn = sg.mk(a_re);
SASSERT(nr.is_language_subset(empty_sn, a_sn) == l_true);
// anything ⊆ Σ* = true
expr_ref full(su.re.mk_full_seq(su.re.mk_re(str_sort)), m);
euf::snode* full_sn = sg.mk(full);
SASSERT(nr.is_language_subset(a_sn, full_sn) == l_true);
// L ⊆ L = true (same pointer)
SASSERT(nr.is_language_subset(a_sn, a_sn) == l_true);
std::cout << " ok\n";
}
void tst_nseq_regex() {
test_nseq_regex_instantiation();
test_nseq_regex_is_empty();
test_nseq_regex_is_full();
test_strengthened_stabilizer_null();
test_strengthened_stabilizer_single_char();
test_strengthened_stabilizer_two_char();
test_filtered_stabilizer_star_empty();
test_filtered_stabilizer_star_with_stab();
test_filtered_stabilizer_star_filtered();
test_extract_cycle_history_basic();
test_extract_cycle_history_null_ancestor();
test_bfs_empty_none();
test_bfs_nonempty_full();
test_bfs_nonempty_to_re();
test_bfs_nonempty_star();
test_bfs_empty_union_of_empties();
test_bfs_nonempty_range();
test_bfs_empty_complement_full();
test_bfs_null_safety();
test_bfs_bounded();
// New tests for regex membership completion
test_char_set_is_subset();
test_stabilizer_store_basic();
test_self_stabilizing();
test_check_intersection_sat();
test_check_intersection_unsat();
test_is_language_subset_true();
test_is_language_subset_false();
test_is_language_subset_trivial();
std::cout << "nseq_regex: all tests passed\n";
}

View file

@ -57,9 +57,10 @@ public:
// -----------------------------------------------------------------------
struct str_builder {
euf::sgraph& sg;
seq_util& su;
euf::snode* vars[26] = {}; // vars['A'-'A'] .. vars['Z'-'A']
explicit str_builder(euf::sgraph& sg) : sg(sg) {}
str_builder(euf::sgraph& sg, seq_util& su) : sg(sg), su(su) {}
euf::snode* var(char c) {
int idx = c - 'A';
@ -76,7 +77,7 @@ struct str_builder {
: sg.mk_char((unsigned)(unsigned char)*p);
result = result ? sg.mk_concat(result, tok) : tok;
}
return result ? result : sg.mk_empty();
return result ? result : sg.mk_empty_seq(su.str.mk_string_sort());
}
};
@ -190,7 +191,7 @@ struct nseq_fixture {
static ast_manager& init(ast_manager& m) { reg_decl_plugins(m); return m; }
nseq_fixture()
: eg(init(m)), sg(m, eg), ng(sg, dummy_solver), su(m), sb(sg), rb(m, su, sg)
: eg(init(m)), sg(m, eg), ng(sg, dummy_solver), su(m), sb(sg, su), rb(m, su, sg)
{}
euf::snode* S(const char* s) { return sb.parse(s); }

View file

@ -74,11 +74,12 @@ static void test_str_eq() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* a = sg.mk_char('A');
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
seq::dep_tracker dep; dep.insert(0);
@ -121,7 +122,7 @@ static void test_str_mem() {
sort_ref str_sort(seq.str.mk_string_sort(), m);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
// create a regex: re.all (.*)
expr_ref star_fc(seq.re.mk_full_seq(str_sort), m);
@ -149,11 +150,12 @@ static void test_nielsen_subst() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* a = sg.mk_char('A');
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
seq::dep_tracker dep;
@ -208,7 +210,7 @@ static void test_nielsen_node() {
// regex membership
expr_ref re_all(seq.re.mk_full_seq(str_sort), m);
euf::snode* regex = sg.mk(re_all);
euf::snode* empty = sg.mk_empty();
euf::snode* empty = sg.mk_empty_seq(seq.str.mk_string_sort());
root->add_str_mem(seq::str_mem(x, regex, empty, 0, dep));
SASSERT(root->str_mems().size() == 1);
@ -311,7 +313,7 @@ static void test_nielsen_subst_apply() {
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* a = sg.mk_char('A');
euf::snode* b = sg.mk_char('B');
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
// create node with constraint: concat(x, A) = concat(B, y)
seq::nielsen_node* node = ng.mk_node();
@ -362,6 +364,7 @@ static void test_nielsen_expansion() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
dummy_simple_solver solver;
seq::nielsen_graph ng(sg, solver);
@ -379,7 +382,7 @@ static void test_nielsen_expansion() {
seq::dep_tracker dep;
// branch 1: x -> eps (eliminating, progress)
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
seq::nielsen_node* child1 = ng.mk_child(root);
seq::nielsen_subst s1(x, e, dep);
child1->apply_subst(sg, s1);
@ -1720,9 +1723,10 @@ static void test_simplify_empty_propagation() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* xy = sg.mk_concat(x, y);
@ -1743,9 +1747,10 @@ static void test_simplify_empty_vs_char() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
euf::snode* a = sg.mk_char('A');
// ε = A → rhs has non-variable token → conflict
@ -1791,11 +1796,12 @@ static void test_simplify_trivial_removal() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep; dep.insert(0);
@ -1814,10 +1820,11 @@ static void test_simplify_all_trivial_satisfied() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep; dep.insert(0);
@ -1838,7 +1845,7 @@ static void test_simplify_regex_infeasible() {
seq_util seq(m);
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
expr_ref ch_a(seq.str.mk_char('A'), m);
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
@ -1865,7 +1872,7 @@ static void test_simplify_nullable_removal() {
seq_util seq(m);
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
expr_ref ch_a(seq.str.mk_char('A'), m);
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
@ -1894,7 +1901,7 @@ static void test_simplify_brzozowski_sat() {
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* a = sg.mk_char('A');
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
expr_ref ch_a(seq.str.mk_char('A'), m);
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
@ -1923,7 +1930,7 @@ static void test_simplify_brzozowski_rtl_suffix() {
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* a = sg.mk_char('A');
euf::snode* xa = sg.mk_concat(x, a);
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
expr_ref ch_b(seq.str.mk_char('B'), m);
expr_ref unit_b(seq.str.mk_unit(ch_b), m);
@ -1955,13 +1962,14 @@ static void test_simplify_multiple_eqs() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* a = sg.mk_char('A');
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* y = sg.mk_var(symbol("y"));
euf::snode* z = sg.mk_var(symbol("z"));
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
seq::nielsen_node* node = ng.mk_node();
seq::dep_tracker dep; dep.insert(0);
@ -2083,7 +2091,7 @@ static void test_explain_conflict_mem_only() {
seq_util seq(m);
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
expr_ref ch_a(seq.str.mk_char('A'), m);
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
@ -2115,7 +2123,7 @@ static void test_explain_conflict_mixed_eq_mem() {
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
euf::snode* a = sg.mk_char('A');
euf::snode* b = sg.mk_char('B');
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
// eq[0]: A = B (conflict)
ng.add_str_eq(a, b);
@ -2418,9 +2426,10 @@ static void test_length_constraints_empty_side() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
seq_util seq(m);
euf::snode* x = sg.mk_var(symbol("x"));
euf::snode* e = sg.mk_empty();
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
// x = ε
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);

View file

@ -402,6 +402,27 @@ bool char_set::is_disjoint(char_set const& other) const {
return true;
}
bool char_set::is_subset(char_set const& other) const {
// Every range in *this must be fully covered by ranges in other.
// Both are sorted, non-overlapping.
if (m_ranges.empty())
return true;
unsigned j = 0;
for (unsigned i = 0; i < m_ranges.size(); ++i) {
unsigned lo = m_ranges[i].m_lo;
unsigned hi = m_ranges[i].m_hi;
// Advance j to find covering range in other
while (j < other.m_ranges.size() && other.m_ranges[j].m_hi <= lo)
++j;
if (j >= other.m_ranges.size())
return false;
// other.m_ranges[j] must fully contain [lo, hi)
if (other.m_ranges[j].m_lo > lo || other.m_ranges[j].m_hi < hi)
return false;
}
return true;
}
std::ostream& char_set::display(std::ostream& out) const {
if (m_ranges.empty()) {
out << "{}";

View file

@ -156,6 +156,9 @@ public:
// check if two sets are disjoint
bool is_disjoint(char_set const& other) const;
// check if this set is a subset of other (every char in this is also in other)
bool is_subset(char_set const& other) const;
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }