mirror of
https://github.com/Z3Prover/z3
synced 2026-03-18 11:04:09 +00:00
1281 lines
47 KiB
C++
1281 lines
47 KiB
C++
/*++
|
|
Copyright (c) 2026 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
seq_regex.cpp
|
|
|
|
Abstract:
|
|
|
|
Lazy regex membership processing for the Nielsen-based string solver.
|
|
|
|
Author:
|
|
|
|
Clemens Eisenhofer 2026-03-01
|
|
Nikolaj Bjorner (nbjorner) 2026-03-01
|
|
|
|
--*/
|
|
#include "smt/seq/seq_regex.h"
|
|
#include <unordered_set>
|
|
|
|
namespace seq {
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Stabilizer store
|
|
// -----------------------------------------------------------------------
|
|
|
|
void seq_regex::reset_stabilizers() {
|
|
m_stabilizers.reset();
|
|
m_self_stabilizing.reset();
|
|
}
|
|
|
|
void seq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) {
|
|
if (!regex || !stabilizer)
|
|
return;
|
|
|
|
unsigned id = regex->id();
|
|
auto& stabs = m_stabilizers.insert_if_not_there(id, ptr_vector<euf::snode>());
|
|
|
|
// De-duplicate by pointer equality (mirrors ZIPT Environment.AddStabilizer
|
|
// which checks reference equality before adding).
|
|
for (euf::snode* s : stabs)
|
|
if (s == stabilizer)
|
|
return;
|
|
stabs.push_back(stabilizer);
|
|
}
|
|
|
|
euf::snode* seq_regex::get_stabilizer_union(euf::snode* regex) {
|
|
if (!regex)
|
|
return nullptr;
|
|
|
|
if (!m_stabilizers.contains(regex->id()))
|
|
return nullptr;
|
|
|
|
auto& stabs = m_stabilizers[regex->id()];
|
|
if (stabs.empty())
|
|
return nullptr;
|
|
|
|
// Single stabilizer: return it directly.
|
|
if (stabs.size() == 1)
|
|
return stabs[0];
|
|
|
|
// Multiple stabilizers: build re.union chain.
|
|
// union(s1, union(s2, ... union(sN-1, sN)...))
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
euf::snode* result = stabs[stabs.size() - 1];
|
|
for (unsigned i = stabs.size() - 1; i-- > 0; ) {
|
|
expr* lhs = stabs[i]->get_expr();
|
|
expr* rhs = result->get_expr();
|
|
if (!lhs || !rhs)
|
|
return nullptr;
|
|
expr_ref un(seq.re.mk_union(lhs, rhs), m_sg.get_manager());
|
|
result = m_sg.mk(un);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool seq_regex::has_stabilizers(euf::snode* regex) const {
|
|
if (!regex)
|
|
return false;
|
|
if (!m_stabilizers.contains(regex->id()))
|
|
return false;
|
|
return !m_stabilizers[regex->id()].empty();
|
|
}
|
|
|
|
ptr_vector<euf::snode> const* seq_regex::get_stabilizers(euf::snode* regex) const {
|
|
if (!regex)
|
|
return nullptr;
|
|
if (!m_stabilizers.contains(regex->id()))
|
|
return nullptr;
|
|
return &m_stabilizers[regex->id()];
|
|
}
|
|
|
|
void seq_regex::set_self_stabilizing(euf::snode* regex) {
|
|
if (!regex)
|
|
return;
|
|
m_self_stabilizing.insert(regex->id());
|
|
}
|
|
|
|
bool seq_regex::is_self_stabilizing(euf::snode* regex) const {
|
|
if (!regex)
|
|
return false;
|
|
return m_self_stabilizing.contains(regex->id());
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Self-stabilizing auto-detection
|
|
// -----------------------------------------------------------------------
|
|
|
|
bool seq_regex::compute_self_stabilizing(euf::snode* regex) const {
|
|
if (!regex)
|
|
return false;
|
|
|
|
// R* is always self-stabilizing: D(c, R*) = D(c,R) · R*,
|
|
// so R* appears as the tail of every derivative and acts as
|
|
// its own stabilizer.
|
|
if (regex->is_star())
|
|
return true;
|
|
|
|
// Σ* (full_seq, i.e., re.all / .*) is self-stabilizing:
|
|
// D(c, Σ*) = Σ* for every character c.
|
|
if (regex->is_full_seq())
|
|
return true;
|
|
|
|
// ∅ (fail / empty language) is trivially self-stabilizing:
|
|
// it has no live derivatives, so the flag is vacuously true.
|
|
if (regex->is_fail())
|
|
return true;
|
|
|
|
// Complement of full_seq is ∅ (complement of Σ*), which is
|
|
// also trivially self-stabilizing.
|
|
if (regex->is_complement() && regex->num_args() == 1 &&
|
|
regex->arg(0)->is_full_seq())
|
|
return true;
|
|
|
|
// Loop with lo=0 and no upper bound behaves like R*
|
|
// (r{0,} ≡ r*), so it is self-stabilizing.
|
|
if (regex->is_loop() && regex->is_nullable()) {
|
|
// A nullable loop with a star-like body: heuristic check.
|
|
// Only mark as self-stabilizing if the body is a Kleene closure.
|
|
// Loop(R, 0, ∞) ~ R* — but we rely on the sgraph to normalize
|
|
// these, so only catch exact star nodes above.
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Self-stabilizing propagation through derivatives
|
|
// -----------------------------------------------------------------------
|
|
|
|
void seq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) {
|
|
if (!parent || !deriv)
|
|
return;
|
|
|
|
// If the derivative is already known to be self-stabilizing (either
|
|
// inherently or from a prior propagation), nothing to do.
|
|
if (is_self_stabilizing(deriv))
|
|
return;
|
|
|
|
// If the derivative is itself inherently self-stabilizing
|
|
// (e.g., it is a star or full_seq), mark it now.
|
|
if (compute_self_stabilizing(deriv)) {
|
|
set_self_stabilizing(deriv);
|
|
return;
|
|
}
|
|
|
|
// Rule 1: Star parent.
|
|
// D(c, R*) = D(c, R) · R*. The derivative always contains the
|
|
// R* tail, so it is self-stabilizing regardless of D(c,R).
|
|
if (parent->is_star()) {
|
|
set_self_stabilizing(deriv);
|
|
return;
|
|
}
|
|
|
|
// Rule 2: Full_seq parent.
|
|
// D(c, Σ*) = Σ*, and Σ* is self-stabilizing.
|
|
// (The derivative should be Σ* itself; mark it for safety.)
|
|
if (parent->is_full_seq()) {
|
|
set_self_stabilizing(deriv);
|
|
return;
|
|
}
|
|
|
|
// Check if parent is self-stabilizing (either inherently or marked).
|
|
bool parent_ss = is_self_stabilizing(parent) || compute_self_stabilizing(parent);
|
|
|
|
// Rule 3: Concat parent R · S.
|
|
// D(c, R·S) = D(c,R)·S | (nullable(R) ? D(c,S) : ∅).
|
|
// If S is self-stabilizing, the D(c,R)·S branch inherits it.
|
|
// If the whole parent R·S is self-stabilizing, the derivative is too.
|
|
if (parent->is_concat() && parent->num_args() == 2) {
|
|
euf::snode* tail = parent->arg(1);
|
|
bool tail_ss = is_self_stabilizing(tail) || compute_self_stabilizing(tail);
|
|
if (tail_ss || parent_ss) {
|
|
set_self_stabilizing(deriv);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Rule 4: Union parent R | S.
|
|
// D(c, R|S) = D(c,R) | D(c,S).
|
|
// Self-stabilizing if both children are self-stabilizing.
|
|
if (parent->is_union() && parent->num_args() == 2) {
|
|
euf::snode* lhs = parent->arg(0);
|
|
euf::snode* rhs = parent->arg(1);
|
|
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
|
|
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
|
|
if (lhs_ss && rhs_ss) {
|
|
set_self_stabilizing(deriv);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Rule 5: Intersection parent R ∩ S.
|
|
// D(c, R∩S) = D(c,R) ∩ D(c,S).
|
|
// Self-stabilizing if both children are self-stabilizing.
|
|
if (parent->is_intersect() && parent->num_args() == 2) {
|
|
euf::snode* lhs = parent->arg(0);
|
|
euf::snode* rhs = parent->arg(1);
|
|
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
|
|
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
|
|
if (lhs_ss && rhs_ss) {
|
|
set_self_stabilizing(deriv);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Rule 6: Complement parent ~R.
|
|
// D(c, ~R) = ~D(c, R).
|
|
// Preserves self-stabilizing from R.
|
|
if (parent->is_complement() && parent->num_args() == 1) {
|
|
euf::snode* inner = parent->arg(0);
|
|
bool inner_ss = is_self_stabilizing(inner) || compute_self_stabilizing(inner);
|
|
if (inner_ss) {
|
|
set_self_stabilizing(deriv);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Rule 7: Generic self-stabilizing parent.
|
|
// If the parent was explicitly marked self-stabilizing (e.g., via
|
|
// a previous propagation), propagate to the derivative.
|
|
if (parent_ss) {
|
|
set_self_stabilizing(deriv);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Derivative with propagation
|
|
// -----------------------------------------------------------------------
|
|
|
|
euf::snode* seq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) {
|
|
if (!re || !elem)
|
|
return nullptr;
|
|
euf::snode* deriv = derivative(re, elem);
|
|
if (deriv)
|
|
propagate_self_stabilizing(re, deriv);
|
|
return deriv;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Uniform derivative (symbolic character consumption)
|
|
// -----------------------------------------------------------------------
|
|
|
|
euf::snode* seq_regex::try_uniform_derivative(euf::snode* regex) {
|
|
if (!regex)
|
|
return nullptr;
|
|
|
|
// Quick exits: trivial regexes with known uniform derivatives.
|
|
// Σ* (full_seq) has derivative Σ* for every character.
|
|
if (regex->is_full_seq())
|
|
return regex;
|
|
// ∅ (fail) has derivative ∅ for every character — but this means
|
|
// every character is rejected. Return fail so the caller can
|
|
// detect a conflict.
|
|
if (regex->is_fail())
|
|
return regex;
|
|
|
|
// Compute minterms: the character-class partition of the alphabet
|
|
// induced by the regex.
|
|
euf::snode_vector minterms;
|
|
m_sg.compute_minterms(regex, minterms);
|
|
if (minterms.empty())
|
|
return nullptr;
|
|
|
|
// Compute the derivative for each non-empty minterm. If all produce
|
|
// the same result, the derivative is independent of the character
|
|
// value and we can consume a symbolic character deterministically.
|
|
euf::snode* uniform = nullptr;
|
|
for (euf::snode* mt : minterms) {
|
|
if (!mt || mt->is_fail())
|
|
continue; // empty character class — no character belongs to it
|
|
euf::snode* deriv = m_sg.brzozowski_deriv(regex, mt);
|
|
if (!deriv)
|
|
return nullptr; // derivative computation failed
|
|
if (!uniform) {
|
|
uniform = deriv;
|
|
} else if (uniform->id() != deriv->id()) {
|
|
return nullptr; // different derivatives — not uniform
|
|
}
|
|
}
|
|
return uniform; // may be nullptr if all minterms were fail/empty
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Ground prefix consumption
|
|
// -----------------------------------------------------------------------
|
|
|
|
bool seq_regex::is_empty_regex(euf::snode* re) const {
|
|
if (!re)
|
|
return false;
|
|
// direct empty language constant
|
|
if (re->is_fail())
|
|
return true;
|
|
// kinds that are never empty
|
|
if (re->is_star() || re->is_to_re() ||
|
|
re->is_full_char() || re->is_full_seq())
|
|
return false;
|
|
// loop with lo == 0 accepts ε
|
|
if (re->is_loop() && re->is_nullable())
|
|
return false;
|
|
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
expr* e = re->get_expr();
|
|
if (!e)
|
|
return false;
|
|
|
|
expr* r1, * r2;
|
|
// union is empty iff both children are empty
|
|
if (seq.re.is_union(e, r1, r2)) {
|
|
SASSERT(re->num_args() == 2);
|
|
return is_empty_regex(re->arg(0)) && is_empty_regex(re->arg(1));
|
|
}
|
|
// regex concat is empty if either child is empty
|
|
if (seq.re.is_concat(e, r1, r2)) {
|
|
SASSERT(re->num_args() == 2);
|
|
return is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1));
|
|
}
|
|
// intersection is empty if either child is empty
|
|
if (seq.re.is_intersection(e, r1, r2)) {
|
|
SASSERT(re->num_args() == 2);
|
|
if (is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1)))
|
|
return true;
|
|
}
|
|
// complement of full_seq is empty
|
|
if (re->is_complement() && re->num_args() == 1 && re->arg(0)->is_full_seq())
|
|
return true;
|
|
// loop(empty, lo, _) with lo > 0 is empty
|
|
if (re->is_loop() && re->num_args() >= 1 && is_empty_regex(re->arg(0)))
|
|
return !re->is_nullable(); // empty if not nullable (i.e., lo > 0)
|
|
|
|
return false;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// BFS regex emptiness check — helper: collect character boundaries
|
|
// -----------------------------------------------------------------------
|
|
|
|
void seq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const {
|
|
if (!re || !re->get_expr())
|
|
return;
|
|
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
expr* e = re->get_expr();
|
|
|
|
// Range predicate re.range(lo, hi): boundary at lo and hi+1
|
|
// Range arguments are string expressions (e.g., str.unit(ch))
|
|
expr* lo_expr = nullptr;
|
|
expr* hi_expr = nullptr;
|
|
if (seq.re.is_range(e, lo_expr, hi_expr)) {
|
|
zstring s_lo, s_hi;
|
|
if (lo_expr && seq.str.is_string(lo_expr, s_lo) && s_lo.length() == 1)
|
|
bounds.push_back(s_lo[0]);
|
|
if (hi_expr && seq.str.is_string(hi_expr, s_hi) && s_hi.length() == 1 && s_hi[0] < zstring::max_char())
|
|
bounds.push_back(s_hi[0] + 1);
|
|
return;
|
|
}
|
|
|
|
// to_re(s): boundary at first character and first+1
|
|
expr* body = nullptr;
|
|
if (seq.re.is_to_re(e, body)) {
|
|
zstring s;
|
|
if (seq.str.is_string(body, s) && s.length() > 0) {
|
|
unsigned first_ch = s[0];
|
|
bounds.push_back(first_ch);
|
|
if (first_ch < zstring::max_char())
|
|
bounds.push_back(first_ch + 1);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Leaf nodes with no character discrimination
|
|
if (re->is_fail() || re->is_full_char() || re->is_full_seq())
|
|
return;
|
|
|
|
// Recurse into children (handles union, concat, star, loop, etc.)
|
|
for (unsigned i = 0; i < re->num_args(); ++i)
|
|
collect_char_boundaries(re->arg(i), bounds);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// BFS regex emptiness check — helper: alphabet representatives
|
|
// -----------------------------------------------------------------------
|
|
|
|
void seq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) {
|
|
unsigned_vector bounds;
|
|
bounds.push_back(0); // always include character 0
|
|
collect_char_boundaries(re, bounds);
|
|
|
|
// Sort and deduplicate
|
|
std::sort(bounds.begin(), bounds.end());
|
|
unsigned prev = UINT_MAX;
|
|
for (unsigned b : bounds) {
|
|
if (b != prev) {
|
|
reps.push_back(m_sg.mk_char(b));
|
|
prev = b;
|
|
}
|
|
}
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// BFS regex emptiness check
|
|
// -----------------------------------------------------------------------
|
|
|
|
lbool seq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
|
|
if (!re || !re->get_expr())
|
|
return l_undef;
|
|
if (re->is_fail())
|
|
return l_true;
|
|
if (re->is_nullable())
|
|
return l_false;
|
|
// Structural quick checks for kinds that are never empty
|
|
if (re->is_star() || re->is_full_char() || re->is_full_seq() || re->is_to_re())
|
|
return l_false;
|
|
// Structural emptiness catches simple cases
|
|
if (is_empty_regex(re))
|
|
return l_true;
|
|
// Only handle ground regexes; non-ground can't be fully explored
|
|
if (!re->is_ground())
|
|
return l_undef;
|
|
|
|
// BFS over the Brzozowski derivative automaton.
|
|
// Each state is a derivative regex snode identified by its id.
|
|
// We explore states by computing derivatives for representative
|
|
// characters from the alphabet partition.
|
|
uint_set visited;
|
|
euf::snode_vector worklist;
|
|
worklist.push_back(re);
|
|
visited.insert(re->id());
|
|
|
|
unsigned states_explored = 0;
|
|
bool had_failed_deriv = false;
|
|
|
|
while (!worklist.empty()) {
|
|
if (states_explored >= max_states)
|
|
return l_undef;
|
|
|
|
euf::snode* current = worklist.back();
|
|
worklist.pop_back();
|
|
++states_explored;
|
|
|
|
// Compute representative characters for current state's
|
|
// alphabet partition. Each representative is a concrete
|
|
// character snode whose equivalence class has identical
|
|
// derivative behavior.
|
|
euf::snode_vector reps;
|
|
get_alphabet_representatives(current, reps);
|
|
|
|
if (reps.empty()) {
|
|
// No representatives means no character predicates;
|
|
// use a default character to explore the single partition.
|
|
reps.push_back(m_sg.mk_char('a'));
|
|
}
|
|
|
|
for (euf::snode* ch : reps) {
|
|
euf::snode* deriv = m_sg.brzozowski_deriv(current, ch);
|
|
if (!deriv) {
|
|
// Derivative computation failed for this character.
|
|
// Track the failure but continue with other characters.
|
|
had_failed_deriv = true;
|
|
continue;
|
|
}
|
|
if (deriv->is_nullable())
|
|
return l_false; // found an accepting state
|
|
if (deriv->is_fail())
|
|
continue; // dead-end, no need to explore further
|
|
if (is_empty_regex(deriv))
|
|
continue; // structurally empty subtree
|
|
if (!visited.contains(deriv->id())) {
|
|
visited.insert(deriv->id());
|
|
worklist.push_back(deriv);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Exhausted all reachable states without finding a nullable one.
|
|
// If we had any failed derivative computations, the result is
|
|
// inconclusive since we may have missed reachable states.
|
|
if (had_failed_deriv)
|
|
return l_undef;
|
|
|
|
return l_true;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Multi-regex intersection emptiness check
|
|
// BFS over the product of Brzozowski derivative automata.
|
|
// Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
|
|
// -----------------------------------------------------------------------
|
|
|
|
lbool seq_regex::check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
|
|
unsigned max_states) {
|
|
if (regexes.empty())
|
|
return l_false; // empty intersection = full language (vacuously non-empty)
|
|
|
|
// Quick checks: if any regex is fail/empty, intersection is empty
|
|
for (euf::snode* re : regexes) {
|
|
if (!re || !re->get_expr())
|
|
return l_undef;
|
|
if (re->is_fail() || is_empty_regex(re))
|
|
return l_true;
|
|
}
|
|
|
|
// Check if all are nullable (intersection accepts ε)
|
|
bool all_nullable = true;
|
|
for (euf::snode* re : regexes) {
|
|
if (!re->is_nullable()) { all_nullable = false; break; }
|
|
}
|
|
if (all_nullable)
|
|
return l_false;
|
|
|
|
// Single regex: delegate to is_empty_bfs
|
|
if (regexes.size() == 1)
|
|
return is_empty_bfs(regexes[0], max_states);
|
|
|
|
// Build product BFS. State = tuple of regex snode ids.
|
|
// Use a map from state hash to visited set.
|
|
using state_t = svector<unsigned>;
|
|
|
|
auto state_hash = [](state_t const& s) -> unsigned {
|
|
unsigned h = 0;
|
|
for (unsigned id : s)
|
|
h = h * 31 + id;
|
|
return h;
|
|
};
|
|
|
|
auto state_eq = [](state_t const& a, state_t const& b) -> bool {
|
|
if (a.size() != b.size()) return false;
|
|
for (unsigned i = 0; i < a.size(); ++i)
|
|
if (a[i] != b[i]) return false;
|
|
return true;
|
|
};
|
|
|
|
// Use simple set via sorted vector of hashes (good enough for bounded BFS)
|
|
std::unordered_set<unsigned> visited_hashes;
|
|
|
|
struct bfs_state {
|
|
ptr_vector<euf::snode> regexes;
|
|
};
|
|
|
|
std::vector<bfs_state> worklist;
|
|
bfs_state initial;
|
|
initial.regexes.append(regexes);
|
|
worklist.push_back(std::move(initial));
|
|
|
|
state_t init_ids;
|
|
for (euf::snode* re : regexes)
|
|
init_ids.push_back(re->id());
|
|
visited_hashes.insert(state_hash(init_ids));
|
|
|
|
unsigned states_explored = 0;
|
|
bool had_failed = false;
|
|
|
|
// Collect alphabet representatives from the intersection of all regexes
|
|
// (merge boundaries from all)
|
|
unsigned_vector all_bounds;
|
|
all_bounds.push_back(0);
|
|
for (euf::snode* re : regexes)
|
|
collect_char_boundaries(re, all_bounds);
|
|
std::sort(all_bounds.begin(), all_bounds.end());
|
|
|
|
euf::snode_vector reps;
|
|
unsigned prev = UINT_MAX;
|
|
for (unsigned b : all_bounds) {
|
|
if (b != prev) {
|
|
reps.push_back(m_sg.mk_char(b));
|
|
prev = b;
|
|
}
|
|
}
|
|
if (reps.empty())
|
|
reps.push_back(m_sg.mk_char('a'));
|
|
|
|
while (!worklist.empty()) {
|
|
if (states_explored >= max_states)
|
|
return l_undef;
|
|
|
|
bfs_state current = std::move(worklist.back());
|
|
worklist.pop_back();
|
|
++states_explored;
|
|
|
|
for (euf::snode* ch : reps) {
|
|
ptr_vector<euf::snode> derivs;
|
|
bool any_fail = false;
|
|
bool all_null = true;
|
|
bool deriv_failed = false;
|
|
|
|
for (euf::snode* re : current.regexes) {
|
|
euf::snode* d = m_sg.brzozowski_deriv(re, ch);
|
|
if (!d) { deriv_failed = true; break; }
|
|
if (d->is_fail()) { any_fail = true; break; }
|
|
if (!d->is_nullable()) all_null = false;
|
|
derivs.push_back(d);
|
|
}
|
|
|
|
if (deriv_failed) { had_failed = true; continue; }
|
|
if (any_fail) continue; // this character leads to empty intersection
|
|
|
|
if (all_null)
|
|
return l_false; // found an accepting state in the product
|
|
|
|
// Check if any component is structurally empty
|
|
bool any_empty = false;
|
|
for (euf::snode* d : derivs) {
|
|
if (is_empty_regex(d)) { any_empty = true; break; }
|
|
}
|
|
if (any_empty) continue;
|
|
|
|
// Compute state hash and check visited
|
|
state_t ids;
|
|
for (euf::snode* d : derivs)
|
|
ids.push_back(d->id());
|
|
unsigned h = state_hash(ids);
|
|
if (visited_hashes.count(h) == 0) {
|
|
visited_hashes.insert(h);
|
|
bfs_state next;
|
|
next.regexes.append(derivs);
|
|
worklist.push_back(std::move(next));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (had_failed)
|
|
return l_undef;
|
|
return l_true; // exhausted all states, intersection is empty
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Language subset check: L(A) ⊆ L(B)
|
|
// via intersection(A, complement(B)) = ∅
|
|
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
|
|
// -----------------------------------------------------------------------
|
|
|
|
lbool seq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) {
|
|
if (!subset_re || !superset_re)
|
|
return l_undef;
|
|
|
|
// Quick checks
|
|
if (subset_re->is_fail() || is_empty_regex(subset_re))
|
|
return l_true; // ∅ ⊆ anything
|
|
if (superset_re->is_full_seq())
|
|
return l_true; // anything ⊆ Σ*
|
|
if (subset_re == superset_re)
|
|
return l_true; // L ⊆ L
|
|
|
|
// Build complement(superset)
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
ast_manager& mgr = m_sg.get_manager();
|
|
expr* sup_expr = superset_re->get_expr();
|
|
if (!sup_expr)
|
|
return l_undef;
|
|
expr_ref comp(seq.re.mk_complement(sup_expr), mgr);
|
|
euf::snode* comp_sn = m_sg.mk(comp);
|
|
if (!comp_sn)
|
|
return l_undef;
|
|
|
|
// Build intersection and check emptiness
|
|
// subset ∩ complement(superset) should be empty for subset relation
|
|
expr* sub_expr = subset_re->get_expr();
|
|
if (!sub_expr)
|
|
return l_undef;
|
|
expr_ref inter(seq.re.mk_inter(sub_expr, comp.get()), mgr);
|
|
euf::snode* inter_sn = m_sg.mk(inter);
|
|
if (!inter_sn)
|
|
return l_undef;
|
|
|
|
return is_empty_bfs(inter_sn);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Collect primitive regex intersection for a variable
|
|
// -----------------------------------------------------------------------
|
|
|
|
euf::snode* seq_regex::collect_primitive_regex_intersection(
|
|
euf::snode* var, seq::nielsen_node const& node) {
|
|
if (!var)
|
|
return nullptr;
|
|
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
ast_manager& mgr = m_sg.get_manager();
|
|
euf::snode* result = nullptr;
|
|
|
|
for (auto const& mem : node.str_mems()) {
|
|
if (!mem.m_str || !mem.m_regex)
|
|
continue;
|
|
// Primitive constraint: str is a single variable
|
|
if (!mem.is_primitive())
|
|
continue;
|
|
euf::snode* first = mem.m_str->first();
|
|
if (!first || first != var)
|
|
continue;
|
|
|
|
if (!result) {
|
|
result = mem.m_regex;
|
|
} else {
|
|
expr* r1 = result->get_expr();
|
|
expr* r2 = mem.m_regex->get_expr();
|
|
if (r1 && r2) {
|
|
expr_ref inter(seq.re.mk_inter(r1, r2), mgr);
|
|
result = m_sg.mk(inter);
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Cycle detection
|
|
// -----------------------------------------------------------------------
|
|
|
|
bool seq_regex::detect_cycle(seq::str_mem const& mem) const {
|
|
return extract_cycle(mem) != nullptr;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Ground prefix consumption
|
|
// -----------------------------------------------------------------------
|
|
|
|
seq_regex::simplify_status seq_regex::simplify_ground_prefix(seq::str_mem& mem) {
|
|
if (!mem.m_str || !mem.m_regex)
|
|
return simplify_status::ok;
|
|
|
|
while (mem.m_str && !mem.m_str->is_empty()) {
|
|
euf::snode* first = mem.m_str->first();
|
|
if (!first || !first->is_char())
|
|
break;
|
|
euf::snode* parent_re = mem.m_regex;
|
|
euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, first);
|
|
if (!deriv)
|
|
break;
|
|
if (deriv->is_fail())
|
|
return simplify_status::conflict;
|
|
// propagate self-stabilizing flag from parent to derivative
|
|
propagate_self_stabilizing(parent_re, deriv);
|
|
mem.m_str = m_sg.drop_first(mem.m_str);
|
|
mem.m_regex = deriv;
|
|
}
|
|
|
|
// check final state
|
|
if (mem.m_str && mem.m_str->is_empty()) {
|
|
if (mem.m_regex->is_nullable())
|
|
return simplify_status::satisfied;
|
|
return simplify_status::conflict;
|
|
}
|
|
|
|
return simplify_status::ok;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Ground suffix consumption (best-effort)
|
|
// -----------------------------------------------------------------------
|
|
|
|
seq_regex::simplify_status seq_regex::simplify_ground_suffix(seq::str_mem& mem) {
|
|
// Suffix consumption via reverse derivatives is complex.
|
|
// For now, only handle the case where the entire string is ground:
|
|
// consume all characters from the front (which covers trailing chars
|
|
// when the string is fully ground).
|
|
if (!mem.m_str || !mem.m_regex)
|
|
return simplify_status::ok;
|
|
if (!mem.m_str->is_ground())
|
|
return simplify_status::ok;
|
|
|
|
// If the string is ground, simplify_ground_prefix handles everything.
|
|
return simplify_ground_prefix(mem);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Trivial checks
|
|
// -----------------------------------------------------------------------
|
|
|
|
int seq_regex::check_trivial(seq::str_mem const& mem) const {
|
|
if (!mem.m_str || !mem.m_regex)
|
|
return 0;
|
|
// regex is ∅ => always conflict
|
|
if (is_empty_regex(mem.m_regex))
|
|
return -1;
|
|
// regex is Σ* => always satisfied
|
|
if (is_full_regex(mem.m_regex))
|
|
return 1;
|
|
// empty string checks
|
|
if (mem.m_str->is_empty()) {
|
|
if (mem.m_regex->is_nullable())
|
|
return 1;
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Minterm computation with filtering
|
|
// -----------------------------------------------------------------------
|
|
|
|
void seq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) {
|
|
if (!regex)
|
|
return;
|
|
|
|
// compute raw minterms from the regex predicates
|
|
euf::snode_vector raw;
|
|
m_sg.compute_minterms(regex, raw);
|
|
|
|
// filter: keep only minterms that are non-fail (non-empty character class).
|
|
// note: minterms are regex character-class expressions, not concrete
|
|
// characters, so we cannot compute Brzozowski derivatives with them.
|
|
// callers should compute derivatives using concrete or fresh chars.
|
|
for (euf::snode* mt : raw) {
|
|
if (!mt || mt->is_fail())
|
|
continue;
|
|
minterms.push_back(mt);
|
|
}
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Collect first characters
|
|
// -----------------------------------------------------------------------
|
|
|
|
void seq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) {
|
|
if (!re)
|
|
return;
|
|
|
|
// to_re(s): extract first character of the string body
|
|
if (re->is_to_re()) {
|
|
euf::snode* body = re->arg(0);
|
|
if (body && !body->is_empty()) {
|
|
euf::snode* first = body->first();
|
|
if (first && first->is_char()) {
|
|
bool dup = false;
|
|
for (euf::snode* c : chars)
|
|
if (c == first) { dup = true; break; }
|
|
if (!dup)
|
|
chars.push_back(first);
|
|
}
|
|
// Handle string literals (classified as s_other in sgraph)
|
|
else if (first && first->get_expr()) {
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
zstring s;
|
|
if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) {
|
|
euf::snode* ch = m_sg.mk_char(s[0]);
|
|
bool dup = false;
|
|
for (euf::snode* c : chars)
|
|
if (c == ch) { dup = true; break; }
|
|
if (!dup)
|
|
chars.push_back(ch);
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
// leaf cases: produce representative characters for character classes
|
|
if (re->is_full_char()) {
|
|
// full character set (.): use 'a' as representative
|
|
euf::snode* ch = m_sg.mk_char('a');
|
|
bool dup = false;
|
|
for (euf::snode* c : chars)
|
|
if (c == ch) { dup = true; break; }
|
|
if (!dup)
|
|
chars.push_back(ch);
|
|
return;
|
|
}
|
|
|
|
// re.range(lo, hi): use lo as representative
|
|
if (re->get_expr()) {
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
expr* lo = nullptr, *hi = nullptr;
|
|
if (seq.re.is_range(re->get_expr(), lo, hi) && lo) {
|
|
zstring s;
|
|
unsigned ch_val = 'a';
|
|
if (seq.is_const_char(lo, ch_val)) {
|
|
euf::snode* ch = m_sg.mk_char(ch_val);
|
|
bool dup = false;
|
|
for (euf::snode* c : chars)
|
|
if (c == ch) { dup = true; break; }
|
|
if (!dup)
|
|
chars.push_back(ch);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (re->is_fail() || re->is_full_seq())
|
|
return;
|
|
|
|
// recurse into children (handles union, concat, star, loop, etc.)
|
|
for (unsigned i = 0; i < re->num_args(); ++i)
|
|
collect_first_chars(re->arg(i), chars);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Membership processing
|
|
// -----------------------------------------------------------------------
|
|
|
|
bool seq_regex::process_str_mem(seq::str_mem const& mem,
|
|
vector<seq::str_mem>& out_mems) {
|
|
if (!mem.m_str || !mem.m_regex)
|
|
return true;
|
|
// empty string: check nullable
|
|
if (mem.m_str->is_empty())
|
|
return mem.m_regex->is_nullable();
|
|
|
|
// consume ground prefix: derive regex by each leading concrete char
|
|
seq::str_mem working = mem;
|
|
simplify_status st = simplify_ground_prefix(working);
|
|
if (st == simplify_status::conflict)
|
|
return false;
|
|
if (st == simplify_status::satisfied)
|
|
return true;
|
|
|
|
// after ground prefix consumption, if the front is still a concrete
|
|
// character we can take one more step (shouldn't happen after
|
|
// simplify_ground_prefix, but guard defensively)
|
|
euf::snode* first = working.m_str->first();
|
|
if (first && first->is_char()) {
|
|
seq::str_mem derived = derive(working, first);
|
|
if (is_empty_regex(derived.m_regex))
|
|
return false;
|
|
out_mems.push_back(derived);
|
|
return true;
|
|
}
|
|
|
|
// string starts with a non-ground element (variable or unit):
|
|
// return the simplified constraint for the Nielsen graph to expand
|
|
// via character-split modifiers.
|
|
out_mems.push_back(working);
|
|
return true;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// History recording
|
|
// -----------------------------------------------------------------------
|
|
|
|
seq::str_mem seq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) {
|
|
// Build a history chain by prepending the new regex entry to the
|
|
// existing history. Uses regex-concat as a cons cell:
|
|
// new_history = re.concat(history_re, old_history)
|
|
// where arg(0) is the latest entry and arg(1) is the tail.
|
|
// If old_history is nullptr, the new entry becomes the terminal leaf.
|
|
euf::snode* new_history = history_re;
|
|
if (mem.m_history && history_re) {
|
|
expr* re_expr = history_re->get_expr();
|
|
expr* old_expr = mem.m_history->get_expr();
|
|
if (re_expr && old_expr) {
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
expr_ref chain(seq.re.mk_concat(re_expr, old_expr), m_sg.get_manager());
|
|
new_history = m_sg.mk(chain);
|
|
}
|
|
}
|
|
return seq::str_mem(mem.m_str, mem.m_regex, new_history, mem.m_id, mem.m_dep);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Cycle detection
|
|
// -----------------------------------------------------------------------
|
|
|
|
euf::snode* seq_regex::extract_cycle(seq::str_mem const& mem) const {
|
|
// Walk the history chain looking for a repeated regex.
|
|
// A cycle exists when the current regex matches a regex in the history.
|
|
if (!mem.m_regex || !mem.m_history)
|
|
return nullptr;
|
|
|
|
euf::snode* current = mem.m_regex;
|
|
euf::snode* hist = mem.m_history;
|
|
|
|
// Walk the history chain up to a bounded depth.
|
|
// The history is structured as a chain of regex snapshots connected
|
|
// via the sgraph's regex-concat: each level's arg(0) is a snapshot
|
|
// and arg(1) is the tail. A leaf (non-concat) is a terminal entry.
|
|
unsigned bound = 1000;
|
|
while (hist && bound-- > 0) {
|
|
euf::snode* entry = hist;
|
|
euf::snode* tail = nullptr;
|
|
|
|
// If the history node is a regex concat, decompose it:
|
|
// arg(0) is the regex snapshot, arg(1) is the rest of the chain
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
if (hist->is_concat() && hist->get_expr() &&
|
|
seq.re.is_concat(hist->get_expr())) {
|
|
entry = hist->arg(0);
|
|
tail = hist->arg(1);
|
|
}
|
|
|
|
// Check pointer equality (fast, covers normalized regexes)
|
|
if (entry == current)
|
|
return entry;
|
|
// Check expression-level equality as fallback
|
|
if (entry->get_expr() && current->get_expr() &&
|
|
entry->get_expr() == current->get_expr())
|
|
return entry;
|
|
|
|
hist = tail;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Stabilizer from cycle
|
|
// -----------------------------------------------------------------------
|
|
|
|
euf::snode* seq_regex::stabilizer_from_cycle(euf::snode* cycle_regex,
|
|
euf::snode* current_regex) {
|
|
if (!cycle_regex || !current_regex)
|
|
return nullptr;
|
|
|
|
expr* re_expr = cycle_regex->get_expr();
|
|
if (!re_expr)
|
|
return nullptr;
|
|
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
expr_ref star_expr(seq.re.mk_star(re_expr), m_sg.get_manager());
|
|
return m_sg.mk(star_expr);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Extract cycle history tokens
|
|
// -----------------------------------------------------------------------
|
|
|
|
euf::snode* seq_regex::extract_cycle_history(seq::str_mem const& current,
|
|
seq::str_mem const& ancestor) {
|
|
// The history is built by simplify_and_init as a left-associative
|
|
// string concat chain: concat(concat(concat(nil, c1), c2), c3).
|
|
// Extract the tokens consumed since the ancestor.
|
|
if (!current.m_history)
|
|
return nullptr;
|
|
|
|
unsigned cur_len = current.m_history->length();
|
|
unsigned anc_len = ancestor.m_history ? ancestor.m_history->length() : 0;
|
|
|
|
if (cur_len <= anc_len)
|
|
return nullptr;
|
|
|
|
if (anc_len == 0)
|
|
return current.m_history;
|
|
|
|
return m_sg.drop_left(current.m_history, anc_len);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Get filtered stabilizer star
|
|
// Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243)
|
|
// -----------------------------------------------------------------------
|
|
|
|
euf::snode* seq_regex::get_filtered_stabilizer_star(euf::snode* re,
|
|
euf::snode* excluded_char) {
|
|
if (!re)
|
|
return nullptr;
|
|
|
|
ptr_vector<euf::snode> const* stabs = get_stabilizers(re);
|
|
if (!stabs || stabs->empty())
|
|
return nullptr;
|
|
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
ast_manager& m = m_sg.get_manager();
|
|
euf::snode* filtered_union = nullptr;
|
|
|
|
for (euf::snode* s : *stabs) {
|
|
if (!s)
|
|
continue;
|
|
// Keep only stabilizers whose language cannot start with excluded_char
|
|
euf::snode* d = m_sg.brzozowski_deriv(s, excluded_char);
|
|
if (d && d->is_fail()) {
|
|
if (!filtered_union) {
|
|
filtered_union = s;
|
|
} else {
|
|
expr* e1 = filtered_union->get_expr();
|
|
expr* e2 = s->get_expr();
|
|
if (e1 && e2) {
|
|
expr_ref u(seq.re.mk_union(e1, e2), m);
|
|
filtered_union = m_sg.mk(u);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!filtered_union)
|
|
return nullptr;
|
|
|
|
expr* fe = filtered_union->get_expr();
|
|
if (!fe)
|
|
return nullptr;
|
|
expr_ref star_expr(seq.re.mk_star(fe), m);
|
|
return m_sg.mk(star_expr);
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Strengthened stabilizer construction with sub-cycle detection
|
|
// Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225)
|
|
// -----------------------------------------------------------------------
|
|
|
|
euf::snode* seq_regex::strengthened_stabilizer(euf::snode* cycle_regex,
|
|
euf::snode* cycle_history) {
|
|
if (!cycle_regex || !cycle_history)
|
|
return nullptr;
|
|
|
|
// Flatten the history concat chain into a vector of character tokens.
|
|
euf::snode_vector tokens;
|
|
cycle_history->collect_tokens(tokens);
|
|
|
|
if (tokens.empty())
|
|
return nullptr;
|
|
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
ast_manager& m = m_sg.get_manager();
|
|
|
|
// Replay tokens on the cycle regex, detecting sub-cycles.
|
|
// A sub-cycle is detected when the derivative returns to cycle_regex.
|
|
svector<std::pair<unsigned, unsigned>> sub_cycles;
|
|
unsigned cycle_start = 0;
|
|
euf::snode* current_re = cycle_regex;
|
|
|
|
for (unsigned i = 0; i < tokens.size(); ++i) {
|
|
euf::snode* tok = tokens[i];
|
|
if (!tok)
|
|
return nullptr;
|
|
|
|
euf::snode* deriv = m_sg.brzozowski_deriv(current_re, tok);
|
|
if (!deriv)
|
|
return nullptr;
|
|
|
|
// Sub-cycle: derivative returned to the cycle entry regex
|
|
if (deriv == cycle_regex ||
|
|
(deriv->get_expr() && cycle_regex->get_expr() &&
|
|
deriv->get_expr() == cycle_regex->get_expr())) {
|
|
sub_cycles.push_back(std::make_pair(cycle_start, i + 1));
|
|
cycle_start = i + 1;
|
|
current_re = cycle_regex;
|
|
} else {
|
|
current_re = deriv;
|
|
}
|
|
}
|
|
|
|
// Remaining tokens that don't complete a sub-cycle
|
|
if (cycle_start < tokens.size())
|
|
sub_cycles.push_back(std::make_pair(cycle_start, tokens.size()));
|
|
|
|
if (sub_cycles.empty())
|
|
return nullptr;
|
|
|
|
// Build a stabilizer body for each sub-cycle.
|
|
// body = to_re(t0) · [filteredStar(R1, t1)] · to_re(t1) · ... · to_re(t_{n-1})
|
|
euf::snode* overall_union = nullptr;
|
|
|
|
for (auto const& sc : sub_cycles) {
|
|
unsigned start = sc.first;
|
|
unsigned end = sc.second;
|
|
if (start >= end)
|
|
continue;
|
|
|
|
euf::snode* re_state = cycle_regex;
|
|
euf::snode* body = nullptr;
|
|
|
|
for (unsigned i = start; i < end; ++i) {
|
|
euf::snode* tok = tokens[i];
|
|
if (!tok)
|
|
break;
|
|
|
|
// Insert filtered stabilizer star before each token after the first
|
|
if (i > start) {
|
|
euf::snode* filtered = get_filtered_stabilizer_star(re_state, tok);
|
|
if (filtered) {
|
|
expr* fe = filtered->get_expr();
|
|
if (fe) {
|
|
if (!body) {
|
|
body = filtered;
|
|
} else {
|
|
expr* be = body->get_expr();
|
|
if (be) {
|
|
expr_ref cat(seq.re.mk_concat(be, fe), m);
|
|
body = m_sg.mk(cat);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert char token to regex: to_re(unit(tok))
|
|
expr* tok_expr = tok->get_expr();
|
|
if (!tok_expr)
|
|
break;
|
|
|
|
expr_ref unit_str(seq.str.mk_unit(tok_expr), m);
|
|
expr_ref tok_re(seq.re.mk_to_re(unit_str), m);
|
|
euf::snode* tok_re_sn = m_sg.mk(tok_re);
|
|
|
|
if (!body) {
|
|
body = tok_re_sn;
|
|
} else {
|
|
expr* be = body->get_expr();
|
|
expr* te = tok_re_sn->get_expr();
|
|
if (be && te) {
|
|
expr_ref cat(seq.re.mk_concat(be, te), m);
|
|
body = m_sg.mk(cat);
|
|
}
|
|
}
|
|
|
|
// Advance the regex state
|
|
euf::snode* deriv = m_sg.brzozowski_deriv(re_state, tok);
|
|
if (!deriv)
|
|
break;
|
|
re_state = deriv;
|
|
}
|
|
|
|
if (!body)
|
|
continue;
|
|
|
|
if (!overall_union) {
|
|
overall_union = body;
|
|
} else {
|
|
expr* oe = overall_union->get_expr();
|
|
expr* be = body->get_expr();
|
|
if (oe && be) {
|
|
expr_ref u(seq.re.mk_union(oe, be), m);
|
|
overall_union = m_sg.mk(u);
|
|
}
|
|
}
|
|
}
|
|
|
|
return overall_union;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// Stabilizer-based subsumption (enhanced)
|
|
// Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386)
|
|
// -----------------------------------------------------------------------
|
|
|
|
bool seq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) {
|
|
if (!mem.m_str || !mem.m_regex)
|
|
return false;
|
|
|
|
// 1. Leading token must be a variable
|
|
euf::snode* first = mem.m_str->first();
|
|
if (!first || !first->is_var())
|
|
return false;
|
|
|
|
// 2. Must have stabilizers for the regex
|
|
if (!has_stabilizers(mem.m_regex))
|
|
return false;
|
|
|
|
// 3. Build stabStar = star(union(all stabilizers for this regex))
|
|
euf::snode* stab_union = get_stabilizer_union(mem.m_regex);
|
|
if (!stab_union)
|
|
return false;
|
|
|
|
seq_util& seq = m_sg.get_seq_util();
|
|
ast_manager& mgr = m_sg.get_manager();
|
|
expr* su_expr = stab_union->get_expr();
|
|
if (!su_expr)
|
|
return false;
|
|
expr_ref stab_star(seq.re.mk_star(su_expr), mgr);
|
|
euf::snode* stab_star_sn = m_sg.mk(stab_star);
|
|
if (!stab_star_sn)
|
|
return false;
|
|
|
|
// 4. Collect all primitive regex constraints on variable `first`
|
|
euf::snode* x_range = collect_primitive_regex_intersection(first, node);
|
|
if (!x_range)
|
|
return false;
|
|
|
|
// 5. Check L(x_range) ⊆ L(stab_star)
|
|
lbool result = is_language_subset(x_range, stab_star_sn);
|
|
return result == l_true;
|
|
}
|
|
|
|
}
|