3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-21 04:15:51 +00:00
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2026-03-13 18:19:25 -07:00
parent 8a48caf742
commit 27f5541b0b
11 changed files with 2176 additions and 80 deletions

View file

@ -15,11 +15,294 @@ Author:
--*/
#include "smt/nseq_regex.h"
#include <unordered_set>
namespace smt {
// -----------------------------------------------------------------------
// Regex emptiness checking (structural analysis)
// Stabilizer store
// -----------------------------------------------------------------------
void nseq_regex::reset_stabilizers() {
m_stabilizers.reset();
m_self_stabilizing.reset();
}
void nseq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) {
if (!regex || !stabilizer)
return;
unsigned id = regex->id();
auto& stabs = m_stabilizers.insert_if_not_there(id, ptr_vector<euf::snode>());
// De-duplicate by pointer equality (mirrors ZIPT Environment.AddStabilizer
// which checks reference equality before adding).
for (euf::snode* s : stabs)
if (s == stabilizer)
return;
stabs.push_back(stabilizer);
}
euf::snode* nseq_regex::get_stabilizer_union(euf::snode* regex) {
if (!regex)
return nullptr;
if (!m_stabilizers.contains(regex->id()))
return nullptr;
auto& stabs = m_stabilizers[regex->id()];
if (stabs.empty())
return nullptr;
// Single stabilizer: return it directly.
if (stabs.size() == 1)
return stabs[0];
// Multiple stabilizers: build re.union chain.
// union(s1, union(s2, ... union(sN-1, sN)...))
seq_util& seq = m_sg.get_seq_util();
euf::snode* result = stabs[stabs.size() - 1];
for (unsigned i = stabs.size() - 1; i-- > 0; ) {
expr* lhs = stabs[i]->get_expr();
expr* rhs = result->get_expr();
if (!lhs || !rhs)
return nullptr;
expr_ref un(seq.re.mk_union(lhs, rhs), m_sg.get_manager());
result = m_sg.mk(un);
}
return result;
}
bool nseq_regex::has_stabilizers(euf::snode* regex) const {
if (!regex)
return false;
if (!m_stabilizers.contains(regex->id()))
return false;
return !m_stabilizers[regex->id()].empty();
}
ptr_vector<euf::snode> const* nseq_regex::get_stabilizers(euf::snode* regex) const {
if (!regex)
return nullptr;
if (!m_stabilizers.contains(regex->id()))
return nullptr;
return &m_stabilizers[regex->id()];
}
void nseq_regex::set_self_stabilizing(euf::snode* regex) {
if (!regex)
return;
m_self_stabilizing.insert(regex->id());
}
bool nseq_regex::is_self_stabilizing(euf::snode* regex) const {
if (!regex)
return false;
return m_self_stabilizing.contains(regex->id());
}
// -----------------------------------------------------------------------
// Self-stabilizing auto-detection
// -----------------------------------------------------------------------
bool nseq_regex::compute_self_stabilizing(euf::snode* regex) const {
if (!regex)
return false;
// R* is always self-stabilizing: D(c, R*) = D(c,R) · R*,
// so R* appears as the tail of every derivative and acts as
// its own stabilizer.
if (regex->is_star())
return true;
// Σ* (full_seq, i.e., re.all / .*) is self-stabilizing:
// D(c, Σ*) = Σ* for every character c.
if (regex->is_full_seq())
return true;
// ∅ (fail / empty language) is trivially self-stabilizing:
// it has no live derivatives, so the flag is vacuously true.
if (regex->is_fail())
return true;
// Complement of full_seq is ∅ (complement of Σ*), which is
// also trivially self-stabilizing.
if (regex->is_complement() && regex->num_args() == 1 &&
regex->arg(0)->is_full_seq())
return true;
// Loop with lo=0 and no upper bound behaves like R*
// (r{0,} ≡ r*), so it is self-stabilizing.
if (regex->is_loop() && regex->is_nullable()) {
// A nullable loop with a star-like body: heuristic check.
// Only mark as self-stabilizing if the body is a Kleene closure.
// Loop(R, 0, ∞) ~ R* — but we rely on the sgraph to normalize
// these, so only catch exact star nodes above.
}
return false;
}
// -----------------------------------------------------------------------
// Self-stabilizing propagation through derivatives
// -----------------------------------------------------------------------
void nseq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) {
if (!parent || !deriv)
return;
// If the derivative is already known to be self-stabilizing (either
// inherently or from a prior propagation), nothing to do.
if (is_self_stabilizing(deriv))
return;
// If the derivative is itself inherently self-stabilizing
// (e.g., it is a star or full_seq), mark it now.
if (compute_self_stabilizing(deriv)) {
set_self_stabilizing(deriv);
return;
}
// Rule 1: Star parent.
// D(c, R*) = D(c, R) · R*. The derivative always contains the
// R* tail, so it is self-stabilizing regardless of D(c,R).
if (parent->is_star()) {
set_self_stabilizing(deriv);
return;
}
// Rule 2: Full_seq parent.
// D(c, Σ*) = Σ*, and Σ* is self-stabilizing.
// (The derivative should be Σ* itself; mark it for safety.)
if (parent->is_full_seq()) {
set_self_stabilizing(deriv);
return;
}
// Check if parent is self-stabilizing (either inherently or marked).
bool parent_ss = is_self_stabilizing(parent) || compute_self_stabilizing(parent);
// Rule 3: Concat parent R · S.
// D(c, R·S) = D(c,R)·S | (nullable(R) ? D(c,S) : ∅).
// If S is self-stabilizing, the D(c,R)·S branch inherits it.
// If the whole parent R·S is self-stabilizing, the derivative is too.
if (parent->is_concat() && parent->num_args() == 2) {
euf::snode* tail = parent->arg(1);
bool tail_ss = is_self_stabilizing(tail) || compute_self_stabilizing(tail);
if (tail_ss || parent_ss) {
set_self_stabilizing(deriv);
return;
}
}
// Rule 4: Union parent R | S.
// D(c, R|S) = D(c,R) | D(c,S).
// Self-stabilizing if both children are self-stabilizing.
if (parent->is_union() && parent->num_args() == 2) {
euf::snode* lhs = parent->arg(0);
euf::snode* rhs = parent->arg(1);
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
if (lhs_ss && rhs_ss) {
set_self_stabilizing(deriv);
return;
}
}
// Rule 5: Intersection parent R ∩ S.
// D(c, R∩S) = D(c,R) ∩ D(c,S).
// Self-stabilizing if both children are self-stabilizing.
if (parent->is_intersect() && parent->num_args() == 2) {
euf::snode* lhs = parent->arg(0);
euf::snode* rhs = parent->arg(1);
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
if (lhs_ss && rhs_ss) {
set_self_stabilizing(deriv);
return;
}
}
// Rule 6: Complement parent ~R.
// D(c, ~R) = ~D(c, R).
// Preserves self-stabilizing from R.
if (parent->is_complement() && parent->num_args() == 1) {
euf::snode* inner = parent->arg(0);
bool inner_ss = is_self_stabilizing(inner) || compute_self_stabilizing(inner);
if (inner_ss) {
set_self_stabilizing(deriv);
return;
}
}
// Rule 7: Generic self-stabilizing parent.
// If the parent was explicitly marked self-stabilizing (e.g., via
// a previous propagation), propagate to the derivative.
if (parent_ss) {
set_self_stabilizing(deriv);
return;
}
}
// -----------------------------------------------------------------------
// Derivative with propagation
// -----------------------------------------------------------------------
euf::snode* nseq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) {
if (!re || !elem)
return nullptr;
euf::snode* deriv = derivative(re, elem);
if (deriv)
propagate_self_stabilizing(re, deriv);
return deriv;
}
// -----------------------------------------------------------------------
// Uniform derivative (symbolic character consumption)
// -----------------------------------------------------------------------
euf::snode* nseq_regex::try_uniform_derivative(euf::snode* regex) {
if (!regex)
return nullptr;
// Quick exits: trivial regexes with known uniform derivatives.
// Σ* (full_seq) has derivative Σ* for every character.
if (regex->is_full_seq())
return regex;
// ∅ (fail) has derivative ∅ for every character — but this means
// every character is rejected. Return fail so the caller can
// detect a conflict.
if (regex->is_fail())
return regex;
// Compute minterms: the character-class partition of the alphabet
// induced by the regex.
euf::snode_vector minterms;
m_sg.compute_minterms(regex, minterms);
if (minterms.empty())
return nullptr;
// Compute the derivative for each non-empty minterm. If all produce
// the same result, the derivative is independent of the character
// value and we can consume a symbolic character deterministically.
euf::snode* uniform = nullptr;
for (euf::snode* mt : minterms) {
if (!mt || mt->is_fail())
continue; // empty character class — no character belongs to it
euf::snode* deriv = m_sg.brzozowski_deriv(regex, mt);
if (!deriv)
return nullptr; // derivative computation failed
if (!uniform) {
uniform = deriv;
} else if (uniform->id() != deriv->id()) {
return nullptr; // different derivatives — not uniform
}
}
return uniform; // may be nullptr if all minterms were fail/empty
}
// -----------------------------------------------------------------------
// Ground prefix consumption
// -----------------------------------------------------------------------
bool nseq_regex::is_empty_regex(euf::snode* re) const {
@ -68,6 +351,377 @@ namespace smt {
return false;
}
// -----------------------------------------------------------------------
// BFS regex emptiness check — helper: collect character boundaries
// -----------------------------------------------------------------------
void nseq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const {
if (!re || !re->get_expr())
return;
seq_util& seq = m_sg.get_seq_util();
expr* e = re->get_expr();
// Range predicate re.range(lo, hi): boundary at lo and hi+1
// Range arguments are string expressions (e.g., str.unit(ch))
expr* lo_expr = nullptr;
expr* hi_expr = nullptr;
if (seq.re.is_range(e, lo_expr, hi_expr)) {
zstring s_lo, s_hi;
if (lo_expr && seq.str.is_string(lo_expr, s_lo) && s_lo.length() == 1)
bounds.push_back(s_lo[0]);
if (hi_expr && seq.str.is_string(hi_expr, s_hi) && s_hi.length() == 1 && s_hi[0] < zstring::max_char())
bounds.push_back(s_hi[0] + 1);
return;
}
// to_re(s): boundary at first character and first+1
expr* body = nullptr;
if (seq.re.is_to_re(e, body)) {
zstring s;
if (seq.str.is_string(body, s) && s.length() > 0) {
unsigned first_ch = s[0];
bounds.push_back(first_ch);
if (first_ch < zstring::max_char())
bounds.push_back(first_ch + 1);
}
return;
}
// Leaf nodes with no character discrimination
if (re->is_fail() || re->is_full_char() || re->is_full_seq())
return;
// Recurse into children (handles union, concat, star, loop, etc.)
for (unsigned i = 0; i < re->num_args(); ++i)
collect_char_boundaries(re->arg(i), bounds);
}
// -----------------------------------------------------------------------
// BFS regex emptiness check — helper: alphabet representatives
// -----------------------------------------------------------------------
void nseq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) {
unsigned_vector bounds;
bounds.push_back(0); // always include character 0
collect_char_boundaries(re, bounds);
// Sort and deduplicate
std::sort(bounds.begin(), bounds.end());
unsigned prev = UINT_MAX;
for (unsigned b : bounds) {
if (b != prev) {
reps.push_back(m_sg.mk_char(b));
prev = b;
}
}
}
// -----------------------------------------------------------------------
// BFS regex emptiness check
// -----------------------------------------------------------------------
lbool nseq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
if (!re || !re->get_expr())
return l_undef;
if (re->is_fail())
return l_true;
if (re->is_nullable())
return l_false;
// Structural quick checks for kinds that are never empty
if (re->is_star() || re->is_full_char() || re->is_full_seq() || re->is_to_re())
return l_false;
// Structural emptiness catches simple cases
if (is_empty_regex(re))
return l_true;
// Only handle ground regexes; non-ground can't be fully explored
if (!re->is_ground())
return l_undef;
// BFS over the Brzozowski derivative automaton.
// Each state is a derivative regex snode identified by its id.
// We explore states by computing derivatives for representative
// characters from the alphabet partition.
uint_set visited;
euf::snode_vector worklist;
worklist.push_back(re);
visited.insert(re->id());
unsigned states_explored = 0;
bool had_failed_deriv = false;
while (!worklist.empty()) {
if (states_explored >= max_states)
return l_undef;
euf::snode* current = worklist.back();
worklist.pop_back();
++states_explored;
// Compute representative characters for current state's
// alphabet partition. Each representative is a concrete
// character snode whose equivalence class has identical
// derivative behavior.
euf::snode_vector reps;
get_alphabet_representatives(current, reps);
if (reps.empty()) {
// No representatives means no character predicates;
// use a default character to explore the single partition.
reps.push_back(m_sg.mk_char('a'));
}
for (euf::snode* ch : reps) {
euf::snode* deriv = m_sg.brzozowski_deriv(current, ch);
if (!deriv) {
// Derivative computation failed for this character.
// Track the failure but continue with other characters.
had_failed_deriv = true;
continue;
}
if (deriv->is_nullable())
return l_false; // found an accepting state
if (deriv->is_fail())
continue; // dead-end, no need to explore further
if (is_empty_regex(deriv))
continue; // structurally empty subtree
if (!visited.contains(deriv->id())) {
visited.insert(deriv->id());
worklist.push_back(deriv);
}
}
}
// Exhausted all reachable states without finding a nullable one.
// If we had any failed derivative computations, the result is
// inconclusive since we may have missed reachable states.
if (had_failed_deriv)
return l_undef;
return l_true;
}
// -----------------------------------------------------------------------
// Multi-regex intersection emptiness check
// BFS over the product of Brzozowski derivative automata.
// Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
// -----------------------------------------------------------------------
lbool nseq_regex::check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
unsigned max_states) {
if (regexes.empty())
return l_false; // empty intersection = full language (vacuously non-empty)
// Quick checks: if any regex is fail/empty, intersection is empty
for (euf::snode* re : regexes) {
if (!re || !re->get_expr())
return l_undef;
if (re->is_fail() || is_empty_regex(re))
return l_true;
}
// Check if all are nullable (intersection accepts ε)
bool all_nullable = true;
for (euf::snode* re : regexes) {
if (!re->is_nullable()) { all_nullable = false; break; }
}
if (all_nullable)
return l_false;
// Single regex: delegate to is_empty_bfs
if (regexes.size() == 1)
return is_empty_bfs(regexes[0], max_states);
// Build product BFS. State = tuple of regex snode ids.
// Use a map from state hash to visited set.
using state_t = svector<unsigned>;
auto state_hash = [](state_t const& s) -> unsigned {
unsigned h = 0;
for (unsigned id : s)
h = h * 31 + id;
return h;
};
auto state_eq = [](state_t const& a, state_t const& b) -> bool {
if (a.size() != b.size()) return false;
for (unsigned i = 0; i < a.size(); ++i)
if (a[i] != b[i]) return false;
return true;
};
// Use simple set via sorted vector of hashes (good enough for bounded BFS)
std::unordered_set<unsigned> visited_hashes;
struct bfs_state {
ptr_vector<euf::snode> regexes;
};
std::vector<bfs_state> worklist;
bfs_state initial;
initial.regexes.append(regexes);
worklist.push_back(std::move(initial));
state_t init_ids;
for (euf::snode* re : regexes)
init_ids.push_back(re->id());
visited_hashes.insert(state_hash(init_ids));
unsigned states_explored = 0;
bool had_failed = false;
// Collect alphabet representatives from the intersection of all regexes
// (merge boundaries from all)
unsigned_vector all_bounds;
all_bounds.push_back(0);
for (euf::snode* re : regexes)
collect_char_boundaries(re, all_bounds);
std::sort(all_bounds.begin(), all_bounds.end());
euf::snode_vector reps;
unsigned prev = UINT_MAX;
for (unsigned b : all_bounds) {
if (b != prev) {
reps.push_back(m_sg.mk_char(b));
prev = b;
}
}
if (reps.empty())
reps.push_back(m_sg.mk_char('a'));
while (!worklist.empty()) {
if (states_explored >= max_states)
return l_undef;
bfs_state current = std::move(worklist.back());
worklist.pop_back();
++states_explored;
for (euf::snode* ch : reps) {
ptr_vector<euf::snode> derivs;
bool any_fail = false;
bool all_null = true;
bool deriv_failed = false;
for (euf::snode* re : current.regexes) {
euf::snode* d = m_sg.brzozowski_deriv(re, ch);
if (!d) { deriv_failed = true; break; }
if (d->is_fail()) { any_fail = true; break; }
if (!d->is_nullable()) all_null = false;
derivs.push_back(d);
}
if (deriv_failed) { had_failed = true; continue; }
if (any_fail) continue; // this character leads to empty intersection
if (all_null)
return l_false; // found an accepting state in the product
// Check if any component is structurally empty
bool any_empty = false;
for (euf::snode* d : derivs) {
if (is_empty_regex(d)) { any_empty = true; break; }
}
if (any_empty) continue;
// Compute state hash and check visited
state_t ids;
for (euf::snode* d : derivs)
ids.push_back(d->id());
unsigned h = state_hash(ids);
if (visited_hashes.count(h) == 0) {
visited_hashes.insert(h);
bfs_state next;
next.regexes.append(derivs);
worklist.push_back(std::move(next));
}
}
}
if (had_failed)
return l_undef;
return l_true; // exhausted all states, intersection is empty
}
// -----------------------------------------------------------------------
// Language subset check: L(A) ⊆ L(B)
// via intersection(A, complement(B)) = ∅
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
// -----------------------------------------------------------------------
lbool nseq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) {
if (!subset_re || !superset_re)
return l_undef;
// Quick checks
if (subset_re->is_fail() || is_empty_regex(subset_re))
return l_true; // ∅ ⊆ anything
if (superset_re->is_full_seq())
return l_true; // anything ⊆ Σ*
if (subset_re == superset_re)
return l_true; // L ⊆ L
// Build complement(superset)
seq_util& seq = m_sg.get_seq_util();
ast_manager& mgr = m_sg.get_manager();
expr* sup_expr = superset_re->get_expr();
if (!sup_expr)
return l_undef;
expr_ref comp(seq.re.mk_complement(sup_expr), mgr);
euf::snode* comp_sn = m_sg.mk(comp);
if (!comp_sn)
return l_undef;
// Build intersection and check emptiness
// subset ∩ complement(superset) should be empty for subset relation
expr* sub_expr = subset_re->get_expr();
if (!sub_expr)
return l_undef;
expr_ref inter(seq.re.mk_inter(sub_expr, comp.get()), mgr);
euf::snode* inter_sn = m_sg.mk(inter);
if (!inter_sn)
return l_undef;
return is_empty_bfs(inter_sn);
}
// -----------------------------------------------------------------------
// Collect primitive regex intersection for a variable
// -----------------------------------------------------------------------
euf::snode* nseq_regex::collect_primitive_regex_intersection(
euf::snode* var, seq::nielsen_node const& node) {
if (!var)
return nullptr;
seq_util& seq = m_sg.get_seq_util();
ast_manager& mgr = m_sg.get_manager();
euf::snode* result = nullptr;
for (auto const& mem : node.str_mems()) {
if (!mem.m_str || !mem.m_regex)
continue;
// Primitive constraint: str is a single variable
if (!mem.is_primitive())
continue;
euf::snode* first = mem.m_str->first();
if (!first || first != var)
continue;
if (!result) {
result = mem.m_regex;
} else {
expr* r1 = result->get_expr();
expr* r2 = mem.m_regex->get_expr();
if (r1 && r2) {
expr_ref inter(seq.re.mk_inter(r1, r2), mgr);
result = m_sg.mk(inter);
}
}
}
return result;
}
// -----------------------------------------------------------------------
// Cycle detection
// -----------------------------------------------------------------------
@ -88,11 +742,14 @@ namespace smt {
euf::snode* first = mem.m_str->first();
if (!first || !first->is_char())
break;
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, first);
euf::snode* parent_re = mem.m_regex;
euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, first);
if (!deriv)
break;
if (deriv->is_fail())
return simplify_status::conflict;
// propagate self-stabilizing flag from parent to derivative
propagate_self_stabilizing(parent_re, deriv);
mem.m_str = m_sg.drop_first(mem.m_str);
mem.m_regex = deriv;
}
@ -361,13 +1018,6 @@ namespace smt {
if (!cycle_regex || !current_regex)
return nullptr;
// The stabilizer is the Kleene star of the "cycle body" regex.
// If the cycle regex and current regex are the same (pointer equal),
// the stabilizer is cycle_regex* (Kleene star).
// This mirrors ZIPT's StabilizerFromCycle which extracts the
// regex between the cycle entry and current point and wraps it in *.
// Build cycle_regex* via the sgraph's expression factory
expr* re_expr = cycle_regex->get_expr();
if (!re_expr)
return nullptr;
@ -378,31 +1028,253 @@ namespace smt {
}
// -----------------------------------------------------------------------
// Stabilizer-based subsumption
// Extract cycle history tokens
// -----------------------------------------------------------------------
bool nseq_regex::try_subsume(seq::str_mem const& mem) {
// Check if the derivation history exhibits a cycle, and if so,
// whether the current regex is subsumed by the stabilizer.
euf::snode* cycle = extract_cycle(mem);
if (!cycle)
euf::snode* nseq_regex::extract_cycle_history(seq::str_mem const& current,
seq::str_mem const& ancestor) {
// The history is built by simplify_and_init as a left-associative
// string concat chain: concat(concat(concat(nil, c1), c2), c3).
// Extract the tokens consumed since the ancestor.
if (!current.m_history)
return nullptr;
unsigned cur_len = current.m_history->length();
unsigned anc_len = ancestor.m_history ? ancestor.m_history->length() : 0;
if (cur_len <= anc_len)
return nullptr;
if (anc_len == 0)
return current.m_history;
return m_sg.drop_left(current.m_history, anc_len);
}
// -----------------------------------------------------------------------
// Get filtered stabilizer star
// Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243)
// -----------------------------------------------------------------------
euf::snode* nseq_regex::get_filtered_stabilizer_star(euf::snode* re,
euf::snode* excluded_char) {
if (!re)
return nullptr;
ptr_vector<euf::snode> const* stabs = get_stabilizers(re);
if (!stabs || stabs->empty())
return nullptr;
seq_util& seq = m_sg.get_seq_util();
ast_manager& m = m_sg.get_manager();
euf::snode* filtered_union = nullptr;
for (euf::snode* s : *stabs) {
if (!s)
continue;
// Keep only stabilizers whose language cannot start with excluded_char
euf::snode* d = m_sg.brzozowski_deriv(s, excluded_char);
if (d && d->is_fail()) {
if (!filtered_union) {
filtered_union = s;
} else {
expr* e1 = filtered_union->get_expr();
expr* e2 = s->get_expr();
if (e1 && e2) {
expr_ref u(seq.re.mk_union(e1, e2), m);
filtered_union = m_sg.mk(u);
}
}
}
}
if (!filtered_union)
return nullptr;
expr* fe = filtered_union->get_expr();
if (!fe)
return nullptr;
expr_ref star_expr(seq.re.mk_star(fe), m);
return m_sg.mk(star_expr);
}
// -----------------------------------------------------------------------
// Strengthened stabilizer construction with sub-cycle detection
// Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225)
// -----------------------------------------------------------------------
euf::snode* nseq_regex::strengthened_stabilizer(euf::snode* cycle_regex,
euf::snode* cycle_history) {
if (!cycle_regex || !cycle_history)
return nullptr;
// Flatten the history concat chain into a vector of character tokens.
euf::snode_vector tokens;
cycle_history->collect_tokens(tokens);
if (tokens.empty())
return nullptr;
seq_util& seq = m_sg.get_seq_util();
ast_manager& m = m_sg.get_manager();
// Replay tokens on the cycle regex, detecting sub-cycles.
// A sub-cycle is detected when the derivative returns to cycle_regex.
svector<std::pair<unsigned, unsigned>> sub_cycles;
unsigned cycle_start = 0;
euf::snode* current_re = cycle_regex;
for (unsigned i = 0; i < tokens.size(); ++i) {
euf::snode* tok = tokens[i];
if (!tok)
return nullptr;
euf::snode* deriv = m_sg.brzozowski_deriv(current_re, tok);
if (!deriv)
return nullptr;
// Sub-cycle: derivative returned to the cycle entry regex
if (deriv == cycle_regex ||
(deriv->get_expr() && cycle_regex->get_expr() &&
deriv->get_expr() == cycle_regex->get_expr())) {
sub_cycles.push_back(std::make_pair(cycle_start, i + 1));
cycle_start = i + 1;
current_re = cycle_regex;
} else {
current_re = deriv;
}
}
// Remaining tokens that don't complete a sub-cycle
if (cycle_start < tokens.size())
sub_cycles.push_back(std::make_pair(cycle_start, tokens.size()));
if (sub_cycles.empty())
return nullptr;
// Build a stabilizer body for each sub-cycle.
// body = to_re(t0) · [filteredStar(R1, t1)] · to_re(t1) · ... · to_re(t_{n-1})
euf::snode* overall_union = nullptr;
for (auto const& sc : sub_cycles) {
unsigned start = sc.first;
unsigned end = sc.second;
if (start >= end)
continue;
euf::snode* re_state = cycle_regex;
euf::snode* body = nullptr;
for (unsigned i = start; i < end; ++i) {
euf::snode* tok = tokens[i];
if (!tok)
break;
// Insert filtered stabilizer star before each token after the first
if (i > start) {
euf::snode* filtered = get_filtered_stabilizer_star(re_state, tok);
if (filtered) {
expr* fe = filtered->get_expr();
if (fe) {
if (!body) {
body = filtered;
} else {
expr* be = body->get_expr();
if (be) {
expr_ref cat(seq.re.mk_concat(be, fe), m);
body = m_sg.mk(cat);
}
}
}
}
}
// Convert char token to regex: to_re(unit(tok))
expr* tok_expr = tok->get_expr();
if (!tok_expr)
break;
expr_ref unit_str(seq.str.mk_unit(tok_expr), m);
expr_ref tok_re(seq.re.mk_to_re(unit_str), m);
euf::snode* tok_re_sn = m_sg.mk(tok_re);
if (!body) {
body = tok_re_sn;
} else {
expr* be = body->get_expr();
expr* te = tok_re_sn->get_expr();
if (be && te) {
expr_ref cat(seq.re.mk_concat(be, te), m);
body = m_sg.mk(cat);
}
}
// Advance the regex state
euf::snode* deriv = m_sg.brzozowski_deriv(re_state, tok);
if (!deriv)
break;
re_state = deriv;
}
if (!body)
continue;
if (!overall_union) {
overall_union = body;
} else {
expr* oe = overall_union->get_expr();
expr* be = body->get_expr();
if (oe && be) {
expr_ref u(seq.re.mk_union(oe, be), m);
overall_union = m_sg.mk(u);
}
}
}
return overall_union;
}
// -----------------------------------------------------------------------
// Stabilizer-based subsumption (enhanced)
// Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386)
// -----------------------------------------------------------------------
bool nseq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) {
if (!mem.m_str || !mem.m_regex)
return false;
euf::snode* stab = stabilizer_from_cycle(cycle, mem.m_regex);
if (!stab)
// 1. Leading token must be a variable
euf::snode* first = mem.m_str->first();
if (!first || !first->is_var())
return false;
// A constraint x ∈ R is subsumed when R ⊆ stab.
// For the simple case where cycle == current regex,
// R ⊆ R* is always true (since R* accepts everything R does, and more).
// This handles the common idempotent cycle case.
if (cycle == mem.m_regex)
return true;
// 2. Must have stabilizers for the regex
if (!has_stabilizers(mem.m_regex))
return false;
// More sophisticated subsumption checks (regex containment)
// would require a regex inclusion decision procedure.
// For now, only handle the pointer-equality case.
return false;
// 3. Build stabStar = star(union(all stabilizers for this regex))
euf::snode* stab_union = get_stabilizer_union(mem.m_regex);
if (!stab_union)
return false;
seq_util& seq = m_sg.get_seq_util();
ast_manager& mgr = m_sg.get_manager();
expr* su_expr = stab_union->get_expr();
if (!su_expr)
return false;
expr_ref stab_star(seq.re.mk_star(su_expr), mgr);
euf::snode* stab_star_sn = m_sg.mk(stab_star);
if (!stab_star_sn)
return false;
// 4. Collect all primitive regex constraints on variable `first`
euf::snode* x_range = collect_primitive_regex_intersection(first, node);
if (!x_range)
return false;
// 5. Check L(x_range) ⊆ L(stab_star)
lbool result = is_language_subset(x_range, stab_star_sn);
return result == l_true;
}
}