mirror of
https://github.com/Z3Prover/z3
synced 2026-03-17 18:43:45 +00:00
updates
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
parent
8a48caf742
commit
27f5541b0b
11 changed files with 2176 additions and 80 deletions
|
|
@ -15,11 +15,294 @@ Author:
|
|||
|
||||
--*/
|
||||
#include "smt/nseq_regex.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace smt {
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Regex emptiness checking (structural analysis)
|
||||
// Stabilizer store
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void nseq_regex::reset_stabilizers() {
|
||||
m_stabilizers.reset();
|
||||
m_self_stabilizing.reset();
|
||||
}
|
||||
|
||||
void nseq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) {
|
||||
if (!regex || !stabilizer)
|
||||
return;
|
||||
|
||||
unsigned id = regex->id();
|
||||
auto& stabs = m_stabilizers.insert_if_not_there(id, ptr_vector<euf::snode>());
|
||||
|
||||
// De-duplicate by pointer equality (mirrors ZIPT Environment.AddStabilizer
|
||||
// which checks reference equality before adding).
|
||||
for (euf::snode* s : stabs)
|
||||
if (s == stabilizer)
|
||||
return;
|
||||
stabs.push_back(stabilizer);
|
||||
}
|
||||
|
||||
euf::snode* nseq_regex::get_stabilizer_union(euf::snode* regex) {
|
||||
if (!regex)
|
||||
return nullptr;
|
||||
|
||||
if (!m_stabilizers.contains(regex->id()))
|
||||
return nullptr;
|
||||
|
||||
auto& stabs = m_stabilizers[regex->id()];
|
||||
if (stabs.empty())
|
||||
return nullptr;
|
||||
|
||||
// Single stabilizer: return it directly.
|
||||
if (stabs.size() == 1)
|
||||
return stabs[0];
|
||||
|
||||
// Multiple stabilizers: build re.union chain.
|
||||
// union(s1, union(s2, ... union(sN-1, sN)...))
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
euf::snode* result = stabs[stabs.size() - 1];
|
||||
for (unsigned i = stabs.size() - 1; i-- > 0; ) {
|
||||
expr* lhs = stabs[i]->get_expr();
|
||||
expr* rhs = result->get_expr();
|
||||
if (!lhs || !rhs)
|
||||
return nullptr;
|
||||
expr_ref un(seq.re.mk_union(lhs, rhs), m_sg.get_manager());
|
||||
result = m_sg.mk(un);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool nseq_regex::has_stabilizers(euf::snode* regex) const {
|
||||
if (!regex)
|
||||
return false;
|
||||
if (!m_stabilizers.contains(regex->id()))
|
||||
return false;
|
||||
return !m_stabilizers[regex->id()].empty();
|
||||
}
|
||||
|
||||
ptr_vector<euf::snode> const* nseq_regex::get_stabilizers(euf::snode* regex) const {
|
||||
if (!regex)
|
||||
return nullptr;
|
||||
if (!m_stabilizers.contains(regex->id()))
|
||||
return nullptr;
|
||||
return &m_stabilizers[regex->id()];
|
||||
}
|
||||
|
||||
void nseq_regex::set_self_stabilizing(euf::snode* regex) {
|
||||
if (!regex)
|
||||
return;
|
||||
m_self_stabilizing.insert(regex->id());
|
||||
}
|
||||
|
||||
bool nseq_regex::is_self_stabilizing(euf::snode* regex) const {
|
||||
if (!regex)
|
||||
return false;
|
||||
return m_self_stabilizing.contains(regex->id());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Self-stabilizing auto-detection
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nseq_regex::compute_self_stabilizing(euf::snode* regex) const {
|
||||
if (!regex)
|
||||
return false;
|
||||
|
||||
// R* is always self-stabilizing: D(c, R*) = D(c,R) · R*,
|
||||
// so R* appears as the tail of every derivative and acts as
|
||||
// its own stabilizer.
|
||||
if (regex->is_star())
|
||||
return true;
|
||||
|
||||
// Σ* (full_seq, i.e., re.all / .*) is self-stabilizing:
|
||||
// D(c, Σ*) = Σ* for every character c.
|
||||
if (regex->is_full_seq())
|
||||
return true;
|
||||
|
||||
// ∅ (fail / empty language) is trivially self-stabilizing:
|
||||
// it has no live derivatives, so the flag is vacuously true.
|
||||
if (regex->is_fail())
|
||||
return true;
|
||||
|
||||
// Complement of full_seq is ∅ (complement of Σ*), which is
|
||||
// also trivially self-stabilizing.
|
||||
if (regex->is_complement() && regex->num_args() == 1 &&
|
||||
regex->arg(0)->is_full_seq())
|
||||
return true;
|
||||
|
||||
// Loop with lo=0 and no upper bound behaves like R*
|
||||
// (r{0,} ≡ r*), so it is self-stabilizing.
|
||||
if (regex->is_loop() && regex->is_nullable()) {
|
||||
// A nullable loop with a star-like body: heuristic check.
|
||||
// Only mark as self-stabilizing if the body is a Kleene closure.
|
||||
// Loop(R, 0, ∞) ~ R* — but we rely on the sgraph to normalize
|
||||
// these, so only catch exact star nodes above.
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Self-stabilizing propagation through derivatives
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void nseq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) {
|
||||
if (!parent || !deriv)
|
||||
return;
|
||||
|
||||
// If the derivative is already known to be self-stabilizing (either
|
||||
// inherently or from a prior propagation), nothing to do.
|
||||
if (is_self_stabilizing(deriv))
|
||||
return;
|
||||
|
||||
// If the derivative is itself inherently self-stabilizing
|
||||
// (e.g., it is a star or full_seq), mark it now.
|
||||
if (compute_self_stabilizing(deriv)) {
|
||||
set_self_stabilizing(deriv);
|
||||
return;
|
||||
}
|
||||
|
||||
// Rule 1: Star parent.
|
||||
// D(c, R*) = D(c, R) · R*. The derivative always contains the
|
||||
// R* tail, so it is self-stabilizing regardless of D(c,R).
|
||||
if (parent->is_star()) {
|
||||
set_self_stabilizing(deriv);
|
||||
return;
|
||||
}
|
||||
|
||||
// Rule 2: Full_seq parent.
|
||||
// D(c, Σ*) = Σ*, and Σ* is self-stabilizing.
|
||||
// (The derivative should be Σ* itself; mark it for safety.)
|
||||
if (parent->is_full_seq()) {
|
||||
set_self_stabilizing(deriv);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if parent is self-stabilizing (either inherently or marked).
|
||||
bool parent_ss = is_self_stabilizing(parent) || compute_self_stabilizing(parent);
|
||||
|
||||
// Rule 3: Concat parent R · S.
|
||||
// D(c, R·S) = D(c,R)·S | (nullable(R) ? D(c,S) : ∅).
|
||||
// If S is self-stabilizing, the D(c,R)·S branch inherits it.
|
||||
// If the whole parent R·S is self-stabilizing, the derivative is too.
|
||||
if (parent->is_concat() && parent->num_args() == 2) {
|
||||
euf::snode* tail = parent->arg(1);
|
||||
bool tail_ss = is_self_stabilizing(tail) || compute_self_stabilizing(tail);
|
||||
if (tail_ss || parent_ss) {
|
||||
set_self_stabilizing(deriv);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 4: Union parent R | S.
|
||||
// D(c, R|S) = D(c,R) | D(c,S).
|
||||
// Self-stabilizing if both children are self-stabilizing.
|
||||
if (parent->is_union() && parent->num_args() == 2) {
|
||||
euf::snode* lhs = parent->arg(0);
|
||||
euf::snode* rhs = parent->arg(1);
|
||||
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
|
||||
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
|
||||
if (lhs_ss && rhs_ss) {
|
||||
set_self_stabilizing(deriv);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 5: Intersection parent R ∩ S.
|
||||
// D(c, R∩S) = D(c,R) ∩ D(c,S).
|
||||
// Self-stabilizing if both children are self-stabilizing.
|
||||
if (parent->is_intersect() && parent->num_args() == 2) {
|
||||
euf::snode* lhs = parent->arg(0);
|
||||
euf::snode* rhs = parent->arg(1);
|
||||
bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs);
|
||||
bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs);
|
||||
if (lhs_ss && rhs_ss) {
|
||||
set_self_stabilizing(deriv);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 6: Complement parent ~R.
|
||||
// D(c, ~R) = ~D(c, R).
|
||||
// Preserves self-stabilizing from R.
|
||||
if (parent->is_complement() && parent->num_args() == 1) {
|
||||
euf::snode* inner = parent->arg(0);
|
||||
bool inner_ss = is_self_stabilizing(inner) || compute_self_stabilizing(inner);
|
||||
if (inner_ss) {
|
||||
set_self_stabilizing(deriv);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Rule 7: Generic self-stabilizing parent.
|
||||
// If the parent was explicitly marked self-stabilizing (e.g., via
|
||||
// a previous propagation), propagate to the derivative.
|
||||
if (parent_ss) {
|
||||
set_self_stabilizing(deriv);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Derivative with propagation
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* nseq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) {
|
||||
if (!re || !elem)
|
||||
return nullptr;
|
||||
euf::snode* deriv = derivative(re, elem);
|
||||
if (deriv)
|
||||
propagate_self_stabilizing(re, deriv);
|
||||
return deriv;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Uniform derivative (symbolic character consumption)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* nseq_regex::try_uniform_derivative(euf::snode* regex) {
|
||||
if (!regex)
|
||||
return nullptr;
|
||||
|
||||
// Quick exits: trivial regexes with known uniform derivatives.
|
||||
// Σ* (full_seq) has derivative Σ* for every character.
|
||||
if (regex->is_full_seq())
|
||||
return regex;
|
||||
// ∅ (fail) has derivative ∅ for every character — but this means
|
||||
// every character is rejected. Return fail so the caller can
|
||||
// detect a conflict.
|
||||
if (regex->is_fail())
|
||||
return regex;
|
||||
|
||||
// Compute minterms: the character-class partition of the alphabet
|
||||
// induced by the regex.
|
||||
euf::snode_vector minterms;
|
||||
m_sg.compute_minterms(regex, minterms);
|
||||
if (minterms.empty())
|
||||
return nullptr;
|
||||
|
||||
// Compute the derivative for each non-empty minterm. If all produce
|
||||
// the same result, the derivative is independent of the character
|
||||
// value and we can consume a symbolic character deterministically.
|
||||
euf::snode* uniform = nullptr;
|
||||
for (euf::snode* mt : minterms) {
|
||||
if (!mt || mt->is_fail())
|
||||
continue; // empty character class — no character belongs to it
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(regex, mt);
|
||||
if (!deriv)
|
||||
return nullptr; // derivative computation failed
|
||||
if (!uniform) {
|
||||
uniform = deriv;
|
||||
} else if (uniform->id() != deriv->id()) {
|
||||
return nullptr; // different derivatives — not uniform
|
||||
}
|
||||
}
|
||||
return uniform; // may be nullptr if all minterms were fail/empty
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Ground prefix consumption
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nseq_regex::is_empty_regex(euf::snode* re) const {
|
||||
|
|
@ -68,6 +351,377 @@ namespace smt {
|
|||
return false;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// BFS regex emptiness check — helper: collect character boundaries
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void nseq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const {
|
||||
if (!re || !re->get_expr())
|
||||
return;
|
||||
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
expr* e = re->get_expr();
|
||||
|
||||
// Range predicate re.range(lo, hi): boundary at lo and hi+1
|
||||
// Range arguments are string expressions (e.g., str.unit(ch))
|
||||
expr* lo_expr = nullptr;
|
||||
expr* hi_expr = nullptr;
|
||||
if (seq.re.is_range(e, lo_expr, hi_expr)) {
|
||||
zstring s_lo, s_hi;
|
||||
if (lo_expr && seq.str.is_string(lo_expr, s_lo) && s_lo.length() == 1)
|
||||
bounds.push_back(s_lo[0]);
|
||||
if (hi_expr && seq.str.is_string(hi_expr, s_hi) && s_hi.length() == 1 && s_hi[0] < zstring::max_char())
|
||||
bounds.push_back(s_hi[0] + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
// to_re(s): boundary at first character and first+1
|
||||
expr* body = nullptr;
|
||||
if (seq.re.is_to_re(e, body)) {
|
||||
zstring s;
|
||||
if (seq.str.is_string(body, s) && s.length() > 0) {
|
||||
unsigned first_ch = s[0];
|
||||
bounds.push_back(first_ch);
|
||||
if (first_ch < zstring::max_char())
|
||||
bounds.push_back(first_ch + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Leaf nodes with no character discrimination
|
||||
if (re->is_fail() || re->is_full_char() || re->is_full_seq())
|
||||
return;
|
||||
|
||||
// Recurse into children (handles union, concat, star, loop, etc.)
|
||||
for (unsigned i = 0; i < re->num_args(); ++i)
|
||||
collect_char_boundaries(re->arg(i), bounds);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// BFS regex emptiness check — helper: alphabet representatives
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void nseq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) {
|
||||
unsigned_vector bounds;
|
||||
bounds.push_back(0); // always include character 0
|
||||
collect_char_boundaries(re, bounds);
|
||||
|
||||
// Sort and deduplicate
|
||||
std::sort(bounds.begin(), bounds.end());
|
||||
unsigned prev = UINT_MAX;
|
||||
for (unsigned b : bounds) {
|
||||
if (b != prev) {
|
||||
reps.push_back(m_sg.mk_char(b));
|
||||
prev = b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// BFS regex emptiness check
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
lbool nseq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
|
||||
if (!re || !re->get_expr())
|
||||
return l_undef;
|
||||
if (re->is_fail())
|
||||
return l_true;
|
||||
if (re->is_nullable())
|
||||
return l_false;
|
||||
// Structural quick checks for kinds that are never empty
|
||||
if (re->is_star() || re->is_full_char() || re->is_full_seq() || re->is_to_re())
|
||||
return l_false;
|
||||
// Structural emptiness catches simple cases
|
||||
if (is_empty_regex(re))
|
||||
return l_true;
|
||||
// Only handle ground regexes; non-ground can't be fully explored
|
||||
if (!re->is_ground())
|
||||
return l_undef;
|
||||
|
||||
// BFS over the Brzozowski derivative automaton.
|
||||
// Each state is a derivative regex snode identified by its id.
|
||||
// We explore states by computing derivatives for representative
|
||||
// characters from the alphabet partition.
|
||||
uint_set visited;
|
||||
euf::snode_vector worklist;
|
||||
worklist.push_back(re);
|
||||
visited.insert(re->id());
|
||||
|
||||
unsigned states_explored = 0;
|
||||
bool had_failed_deriv = false;
|
||||
|
||||
while (!worklist.empty()) {
|
||||
if (states_explored >= max_states)
|
||||
return l_undef;
|
||||
|
||||
euf::snode* current = worklist.back();
|
||||
worklist.pop_back();
|
||||
++states_explored;
|
||||
|
||||
// Compute representative characters for current state's
|
||||
// alphabet partition. Each representative is a concrete
|
||||
// character snode whose equivalence class has identical
|
||||
// derivative behavior.
|
||||
euf::snode_vector reps;
|
||||
get_alphabet_representatives(current, reps);
|
||||
|
||||
if (reps.empty()) {
|
||||
// No representatives means no character predicates;
|
||||
// use a default character to explore the single partition.
|
||||
reps.push_back(m_sg.mk_char('a'));
|
||||
}
|
||||
|
||||
for (euf::snode* ch : reps) {
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(current, ch);
|
||||
if (!deriv) {
|
||||
// Derivative computation failed for this character.
|
||||
// Track the failure but continue with other characters.
|
||||
had_failed_deriv = true;
|
||||
continue;
|
||||
}
|
||||
if (deriv->is_nullable())
|
||||
return l_false; // found an accepting state
|
||||
if (deriv->is_fail())
|
||||
continue; // dead-end, no need to explore further
|
||||
if (is_empty_regex(deriv))
|
||||
continue; // structurally empty subtree
|
||||
if (!visited.contains(deriv->id())) {
|
||||
visited.insert(deriv->id());
|
||||
worklist.push_back(deriv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Exhausted all reachable states without finding a nullable one.
|
||||
// If we had any failed derivative computations, the result is
|
||||
// inconclusive since we may have missed reachable states.
|
||||
if (had_failed_deriv)
|
||||
return l_undef;
|
||||
|
||||
return l_true;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Multi-regex intersection emptiness check
|
||||
// BFS over the product of Brzozowski derivative automata.
|
||||
// Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
lbool nseq_regex::check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
|
||||
unsigned max_states) {
|
||||
if (regexes.empty())
|
||||
return l_false; // empty intersection = full language (vacuously non-empty)
|
||||
|
||||
// Quick checks: if any regex is fail/empty, intersection is empty
|
||||
for (euf::snode* re : regexes) {
|
||||
if (!re || !re->get_expr())
|
||||
return l_undef;
|
||||
if (re->is_fail() || is_empty_regex(re))
|
||||
return l_true;
|
||||
}
|
||||
|
||||
// Check if all are nullable (intersection accepts ε)
|
||||
bool all_nullable = true;
|
||||
for (euf::snode* re : regexes) {
|
||||
if (!re->is_nullable()) { all_nullable = false; break; }
|
||||
}
|
||||
if (all_nullable)
|
||||
return l_false;
|
||||
|
||||
// Single regex: delegate to is_empty_bfs
|
||||
if (regexes.size() == 1)
|
||||
return is_empty_bfs(regexes[0], max_states);
|
||||
|
||||
// Build product BFS. State = tuple of regex snode ids.
|
||||
// Use a map from state hash to visited set.
|
||||
using state_t = svector<unsigned>;
|
||||
|
||||
auto state_hash = [](state_t const& s) -> unsigned {
|
||||
unsigned h = 0;
|
||||
for (unsigned id : s)
|
||||
h = h * 31 + id;
|
||||
return h;
|
||||
};
|
||||
|
||||
auto state_eq = [](state_t const& a, state_t const& b) -> bool {
|
||||
if (a.size() != b.size()) return false;
|
||||
for (unsigned i = 0; i < a.size(); ++i)
|
||||
if (a[i] != b[i]) return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
// Use simple set via sorted vector of hashes (good enough for bounded BFS)
|
||||
std::unordered_set<unsigned> visited_hashes;
|
||||
|
||||
struct bfs_state {
|
||||
ptr_vector<euf::snode> regexes;
|
||||
};
|
||||
|
||||
std::vector<bfs_state> worklist;
|
||||
bfs_state initial;
|
||||
initial.regexes.append(regexes);
|
||||
worklist.push_back(std::move(initial));
|
||||
|
||||
state_t init_ids;
|
||||
for (euf::snode* re : regexes)
|
||||
init_ids.push_back(re->id());
|
||||
visited_hashes.insert(state_hash(init_ids));
|
||||
|
||||
unsigned states_explored = 0;
|
||||
bool had_failed = false;
|
||||
|
||||
// Collect alphabet representatives from the intersection of all regexes
|
||||
// (merge boundaries from all)
|
||||
unsigned_vector all_bounds;
|
||||
all_bounds.push_back(0);
|
||||
for (euf::snode* re : regexes)
|
||||
collect_char_boundaries(re, all_bounds);
|
||||
std::sort(all_bounds.begin(), all_bounds.end());
|
||||
|
||||
euf::snode_vector reps;
|
||||
unsigned prev = UINT_MAX;
|
||||
for (unsigned b : all_bounds) {
|
||||
if (b != prev) {
|
||||
reps.push_back(m_sg.mk_char(b));
|
||||
prev = b;
|
||||
}
|
||||
}
|
||||
if (reps.empty())
|
||||
reps.push_back(m_sg.mk_char('a'));
|
||||
|
||||
while (!worklist.empty()) {
|
||||
if (states_explored >= max_states)
|
||||
return l_undef;
|
||||
|
||||
bfs_state current = std::move(worklist.back());
|
||||
worklist.pop_back();
|
||||
++states_explored;
|
||||
|
||||
for (euf::snode* ch : reps) {
|
||||
ptr_vector<euf::snode> derivs;
|
||||
bool any_fail = false;
|
||||
bool all_null = true;
|
||||
bool deriv_failed = false;
|
||||
|
||||
for (euf::snode* re : current.regexes) {
|
||||
euf::snode* d = m_sg.brzozowski_deriv(re, ch);
|
||||
if (!d) { deriv_failed = true; break; }
|
||||
if (d->is_fail()) { any_fail = true; break; }
|
||||
if (!d->is_nullable()) all_null = false;
|
||||
derivs.push_back(d);
|
||||
}
|
||||
|
||||
if (deriv_failed) { had_failed = true; continue; }
|
||||
if (any_fail) continue; // this character leads to empty intersection
|
||||
|
||||
if (all_null)
|
||||
return l_false; // found an accepting state in the product
|
||||
|
||||
// Check if any component is structurally empty
|
||||
bool any_empty = false;
|
||||
for (euf::snode* d : derivs) {
|
||||
if (is_empty_regex(d)) { any_empty = true; break; }
|
||||
}
|
||||
if (any_empty) continue;
|
||||
|
||||
// Compute state hash and check visited
|
||||
state_t ids;
|
||||
for (euf::snode* d : derivs)
|
||||
ids.push_back(d->id());
|
||||
unsigned h = state_hash(ids);
|
||||
if (visited_hashes.count(h) == 0) {
|
||||
visited_hashes.insert(h);
|
||||
bfs_state next;
|
||||
next.regexes.append(derivs);
|
||||
worklist.push_back(std::move(next));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (had_failed)
|
||||
return l_undef;
|
||||
return l_true; // exhausted all states, intersection is empty
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Language subset check: L(A) ⊆ L(B)
|
||||
// via intersection(A, complement(B)) = ∅
|
||||
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
lbool nseq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) {
|
||||
if (!subset_re || !superset_re)
|
||||
return l_undef;
|
||||
|
||||
// Quick checks
|
||||
if (subset_re->is_fail() || is_empty_regex(subset_re))
|
||||
return l_true; // ∅ ⊆ anything
|
||||
if (superset_re->is_full_seq())
|
||||
return l_true; // anything ⊆ Σ*
|
||||
if (subset_re == superset_re)
|
||||
return l_true; // L ⊆ L
|
||||
|
||||
// Build complement(superset)
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
ast_manager& mgr = m_sg.get_manager();
|
||||
expr* sup_expr = superset_re->get_expr();
|
||||
if (!sup_expr)
|
||||
return l_undef;
|
||||
expr_ref comp(seq.re.mk_complement(sup_expr), mgr);
|
||||
euf::snode* comp_sn = m_sg.mk(comp);
|
||||
if (!comp_sn)
|
||||
return l_undef;
|
||||
|
||||
// Build intersection and check emptiness
|
||||
// subset ∩ complement(superset) should be empty for subset relation
|
||||
expr* sub_expr = subset_re->get_expr();
|
||||
if (!sub_expr)
|
||||
return l_undef;
|
||||
expr_ref inter(seq.re.mk_inter(sub_expr, comp.get()), mgr);
|
||||
euf::snode* inter_sn = m_sg.mk(inter);
|
||||
if (!inter_sn)
|
||||
return l_undef;
|
||||
|
||||
return is_empty_bfs(inter_sn);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Collect primitive regex intersection for a variable
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* nseq_regex::collect_primitive_regex_intersection(
|
||||
euf::snode* var, seq::nielsen_node const& node) {
|
||||
if (!var)
|
||||
return nullptr;
|
||||
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
ast_manager& mgr = m_sg.get_manager();
|
||||
euf::snode* result = nullptr;
|
||||
|
||||
for (auto const& mem : node.str_mems()) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
continue;
|
||||
// Primitive constraint: str is a single variable
|
||||
if (!mem.is_primitive())
|
||||
continue;
|
||||
euf::snode* first = mem.m_str->first();
|
||||
if (!first || first != var)
|
||||
continue;
|
||||
|
||||
if (!result) {
|
||||
result = mem.m_regex;
|
||||
} else {
|
||||
expr* r1 = result->get_expr();
|
||||
expr* r2 = mem.m_regex->get_expr();
|
||||
if (r1 && r2) {
|
||||
expr_ref inter(seq.re.mk_inter(r1, r2), mgr);
|
||||
result = m_sg.mk(inter);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Cycle detection
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
@ -88,11 +742,14 @@ namespace smt {
|
|||
euf::snode* first = mem.m_str->first();
|
||||
if (!first || !first->is_char())
|
||||
break;
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, first);
|
||||
euf::snode* parent_re = mem.m_regex;
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, first);
|
||||
if (!deriv)
|
||||
break;
|
||||
if (deriv->is_fail())
|
||||
return simplify_status::conflict;
|
||||
// propagate self-stabilizing flag from parent to derivative
|
||||
propagate_self_stabilizing(parent_re, deriv);
|
||||
mem.m_str = m_sg.drop_first(mem.m_str);
|
||||
mem.m_regex = deriv;
|
||||
}
|
||||
|
|
@ -361,13 +1018,6 @@ namespace smt {
|
|||
if (!cycle_regex || !current_regex)
|
||||
return nullptr;
|
||||
|
||||
// The stabilizer is the Kleene star of the "cycle body" regex.
|
||||
// If the cycle regex and current regex are the same (pointer equal),
|
||||
// the stabilizer is cycle_regex* (Kleene star).
|
||||
// This mirrors ZIPT's StabilizerFromCycle which extracts the
|
||||
// regex between the cycle entry and current point and wraps it in *.
|
||||
|
||||
// Build cycle_regex* via the sgraph's expression factory
|
||||
expr* re_expr = cycle_regex->get_expr();
|
||||
if (!re_expr)
|
||||
return nullptr;
|
||||
|
|
@ -378,31 +1028,253 @@ namespace smt {
|
|||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Stabilizer-based subsumption
|
||||
// Extract cycle history tokens
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nseq_regex::try_subsume(seq::str_mem const& mem) {
|
||||
// Check if the derivation history exhibits a cycle, and if so,
|
||||
// whether the current regex is subsumed by the stabilizer.
|
||||
euf::snode* cycle = extract_cycle(mem);
|
||||
if (!cycle)
|
||||
euf::snode* nseq_regex::extract_cycle_history(seq::str_mem const& current,
|
||||
seq::str_mem const& ancestor) {
|
||||
// The history is built by simplify_and_init as a left-associative
|
||||
// string concat chain: concat(concat(concat(nil, c1), c2), c3).
|
||||
// Extract the tokens consumed since the ancestor.
|
||||
if (!current.m_history)
|
||||
return nullptr;
|
||||
|
||||
unsigned cur_len = current.m_history->length();
|
||||
unsigned anc_len = ancestor.m_history ? ancestor.m_history->length() : 0;
|
||||
|
||||
if (cur_len <= anc_len)
|
||||
return nullptr;
|
||||
|
||||
if (anc_len == 0)
|
||||
return current.m_history;
|
||||
|
||||
return m_sg.drop_left(current.m_history, anc_len);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Get filtered stabilizer star
|
||||
// Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* nseq_regex::get_filtered_stabilizer_star(euf::snode* re,
|
||||
euf::snode* excluded_char) {
|
||||
if (!re)
|
||||
return nullptr;
|
||||
|
||||
ptr_vector<euf::snode> const* stabs = get_stabilizers(re);
|
||||
if (!stabs || stabs->empty())
|
||||
return nullptr;
|
||||
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
ast_manager& m = m_sg.get_manager();
|
||||
euf::snode* filtered_union = nullptr;
|
||||
|
||||
for (euf::snode* s : *stabs) {
|
||||
if (!s)
|
||||
continue;
|
||||
// Keep only stabilizers whose language cannot start with excluded_char
|
||||
euf::snode* d = m_sg.brzozowski_deriv(s, excluded_char);
|
||||
if (d && d->is_fail()) {
|
||||
if (!filtered_union) {
|
||||
filtered_union = s;
|
||||
} else {
|
||||
expr* e1 = filtered_union->get_expr();
|
||||
expr* e2 = s->get_expr();
|
||||
if (e1 && e2) {
|
||||
expr_ref u(seq.re.mk_union(e1, e2), m);
|
||||
filtered_union = m_sg.mk(u);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!filtered_union)
|
||||
return nullptr;
|
||||
|
||||
expr* fe = filtered_union->get_expr();
|
||||
if (!fe)
|
||||
return nullptr;
|
||||
expr_ref star_expr(seq.re.mk_star(fe), m);
|
||||
return m_sg.mk(star_expr);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Strengthened stabilizer construction with sub-cycle detection
|
||||
// Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
euf::snode* nseq_regex::strengthened_stabilizer(euf::snode* cycle_regex,
|
||||
euf::snode* cycle_history) {
|
||||
if (!cycle_regex || !cycle_history)
|
||||
return nullptr;
|
||||
|
||||
// Flatten the history concat chain into a vector of character tokens.
|
||||
euf::snode_vector tokens;
|
||||
cycle_history->collect_tokens(tokens);
|
||||
|
||||
if (tokens.empty())
|
||||
return nullptr;
|
||||
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
ast_manager& m = m_sg.get_manager();
|
||||
|
||||
// Replay tokens on the cycle regex, detecting sub-cycles.
|
||||
// A sub-cycle is detected when the derivative returns to cycle_regex.
|
||||
svector<std::pair<unsigned, unsigned>> sub_cycles;
|
||||
unsigned cycle_start = 0;
|
||||
euf::snode* current_re = cycle_regex;
|
||||
|
||||
for (unsigned i = 0; i < tokens.size(); ++i) {
|
||||
euf::snode* tok = tokens[i];
|
||||
if (!tok)
|
||||
return nullptr;
|
||||
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(current_re, tok);
|
||||
if (!deriv)
|
||||
return nullptr;
|
||||
|
||||
// Sub-cycle: derivative returned to the cycle entry regex
|
||||
if (deriv == cycle_regex ||
|
||||
(deriv->get_expr() && cycle_regex->get_expr() &&
|
||||
deriv->get_expr() == cycle_regex->get_expr())) {
|
||||
sub_cycles.push_back(std::make_pair(cycle_start, i + 1));
|
||||
cycle_start = i + 1;
|
||||
current_re = cycle_regex;
|
||||
} else {
|
||||
current_re = deriv;
|
||||
}
|
||||
}
|
||||
|
||||
// Remaining tokens that don't complete a sub-cycle
|
||||
if (cycle_start < tokens.size())
|
||||
sub_cycles.push_back(std::make_pair(cycle_start, tokens.size()));
|
||||
|
||||
if (sub_cycles.empty())
|
||||
return nullptr;
|
||||
|
||||
// Build a stabilizer body for each sub-cycle.
|
||||
// body = to_re(t0) · [filteredStar(R1, t1)] · to_re(t1) · ... · to_re(t_{n-1})
|
||||
euf::snode* overall_union = nullptr;
|
||||
|
||||
for (auto const& sc : sub_cycles) {
|
||||
unsigned start = sc.first;
|
||||
unsigned end = sc.second;
|
||||
if (start >= end)
|
||||
continue;
|
||||
|
||||
euf::snode* re_state = cycle_regex;
|
||||
euf::snode* body = nullptr;
|
||||
|
||||
for (unsigned i = start; i < end; ++i) {
|
||||
euf::snode* tok = tokens[i];
|
||||
if (!tok)
|
||||
break;
|
||||
|
||||
// Insert filtered stabilizer star before each token after the first
|
||||
if (i > start) {
|
||||
euf::snode* filtered = get_filtered_stabilizer_star(re_state, tok);
|
||||
if (filtered) {
|
||||
expr* fe = filtered->get_expr();
|
||||
if (fe) {
|
||||
if (!body) {
|
||||
body = filtered;
|
||||
} else {
|
||||
expr* be = body->get_expr();
|
||||
if (be) {
|
||||
expr_ref cat(seq.re.mk_concat(be, fe), m);
|
||||
body = m_sg.mk(cat);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert char token to regex: to_re(unit(tok))
|
||||
expr* tok_expr = tok->get_expr();
|
||||
if (!tok_expr)
|
||||
break;
|
||||
|
||||
expr_ref unit_str(seq.str.mk_unit(tok_expr), m);
|
||||
expr_ref tok_re(seq.re.mk_to_re(unit_str), m);
|
||||
euf::snode* tok_re_sn = m_sg.mk(tok_re);
|
||||
|
||||
if (!body) {
|
||||
body = tok_re_sn;
|
||||
} else {
|
||||
expr* be = body->get_expr();
|
||||
expr* te = tok_re_sn->get_expr();
|
||||
if (be && te) {
|
||||
expr_ref cat(seq.re.mk_concat(be, te), m);
|
||||
body = m_sg.mk(cat);
|
||||
}
|
||||
}
|
||||
|
||||
// Advance the regex state
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(re_state, tok);
|
||||
if (!deriv)
|
||||
break;
|
||||
re_state = deriv;
|
||||
}
|
||||
|
||||
if (!body)
|
||||
continue;
|
||||
|
||||
if (!overall_union) {
|
||||
overall_union = body;
|
||||
} else {
|
||||
expr* oe = overall_union->get_expr();
|
||||
expr* be = body->get_expr();
|
||||
if (oe && be) {
|
||||
expr_ref u(seq.re.mk_union(oe, be), m);
|
||||
overall_union = m_sg.mk(u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return overall_union;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Stabilizer-based subsumption (enhanced)
|
||||
// Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nseq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
return false;
|
||||
|
||||
euf::snode* stab = stabilizer_from_cycle(cycle, mem.m_regex);
|
||||
if (!stab)
|
||||
// 1. Leading token must be a variable
|
||||
euf::snode* first = mem.m_str->first();
|
||||
if (!first || !first->is_var())
|
||||
return false;
|
||||
|
||||
// A constraint x ∈ R is subsumed when R ⊆ stab.
|
||||
// For the simple case where cycle == current regex,
|
||||
// R ⊆ R* is always true (since R* accepts everything R does, and more).
|
||||
// This handles the common idempotent cycle case.
|
||||
if (cycle == mem.m_regex)
|
||||
return true;
|
||||
// 2. Must have stabilizers for the regex
|
||||
if (!has_stabilizers(mem.m_regex))
|
||||
return false;
|
||||
|
||||
// More sophisticated subsumption checks (regex containment)
|
||||
// would require a regex inclusion decision procedure.
|
||||
// For now, only handle the pointer-equality case.
|
||||
return false;
|
||||
// 3. Build stabStar = star(union(all stabilizers for this regex))
|
||||
euf::snode* stab_union = get_stabilizer_union(mem.m_regex);
|
||||
if (!stab_union)
|
||||
return false;
|
||||
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
ast_manager& mgr = m_sg.get_manager();
|
||||
expr* su_expr = stab_union->get_expr();
|
||||
if (!su_expr)
|
||||
return false;
|
||||
expr_ref stab_star(seq.re.mk_star(su_expr), mgr);
|
||||
euf::snode* stab_star_sn = m_sg.mk(stab_star);
|
||||
if (!stab_star_sn)
|
||||
return false;
|
||||
|
||||
// 4. Collect all primitive regex constraints on variable `first`
|
||||
euf::snode* x_range = collect_primitive_regex_intersection(first, node);
|
||||
if (!x_range)
|
||||
return false;
|
||||
|
||||
// 5. Check L(x_range) ⊆ L(stab_star)
|
||||
lbool result = is_language_subset(x_range, stab_star_sn);
|
||||
return result == l_true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,17 +31,119 @@ Author:
|
|||
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "smt/seq/seq_nielsen.h"
|
||||
#include "util/uint_set.h"
|
||||
#include "util/lbool.h"
|
||||
|
||||
namespace smt {
|
||||
|
||||
class nseq_regex {
|
||||
euf::sgraph& m_sg;
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Stabilizer store (non-backtrackable, persists across solve() calls)
|
||||
// Mirrors ZIPT Environment.stabilizers / selfStabilizing
|
||||
// (Environment.cs:36-37)
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// Maps regex snode id → list of stabilizer snodes.
|
||||
// Each regex may accumulate multiple stabilizers from different
|
||||
// cycle detections. The list is deduplicated by pointer equality.
|
||||
u_map<ptr_vector<euf::snode>> m_stabilizers;
|
||||
|
||||
// Set of regex snode ids that are self-stabilizing, i.e., the
|
||||
// stabilizer for the regex is the regex itself (e.g., r*).
|
||||
// Mirrors ZIPT Environment.selfStabilizing (Environment.cs:37)
|
||||
uint_set m_self_stabilizing;
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// BFS emptiness check helpers (private)
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// Collect character boundary points from range predicates and
|
||||
// to_re string literals in a regex. Boundaries partition the
|
||||
// alphabet into equivalence classes where all characters in
|
||||
// the same class produce identical derivatives.
|
||||
void collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const;
|
||||
|
||||
// Build a set of representative character snodes, one per
|
||||
// alphabet equivalence class, derived from the boundary points
|
||||
// of the given regex.
|
||||
void get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps);
|
||||
|
||||
public:
|
||||
nseq_regex(euf::sgraph& sg) : m_sg(sg) {}
|
||||
|
||||
euf::sgraph& sg() { return m_sg; }
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Stabilizer store API
|
||||
// Mirrors ZIPT Environment stabilizer management
|
||||
// (Environment.cs:114-146)
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// Reset all stabilizer data. Called at the start of each solve()
|
||||
// invocation if fresh stabilizer state is desired.
|
||||
// Note: stabilizers persist across depth-bound iterations by default;
|
||||
// only call this to clear accumulated state.
|
||||
void reset_stabilizers();
|
||||
|
||||
// Add a stabilizer for a regex. De-duplicates by pointer equality.
|
||||
// Mirrors ZIPT Environment.AddStabilizer (Environment.cs:114-123).
|
||||
void add_stabilizer(euf::snode* regex, euf::snode* stabilizer);
|
||||
|
||||
// Get the union of all stabilizers registered for a regex.
|
||||
// Returns a single re.union snode combining all stabilizers,
|
||||
// or nullptr if no stabilizers exist for the regex.
|
||||
// Mirrors ZIPT Environment.GetStabilizerUnion (Environment.cs:125-128).
|
||||
euf::snode* get_stabilizer_union(euf::snode* regex);
|
||||
|
||||
// Check if any stabilizers have been registered for a regex.
|
||||
bool has_stabilizers(euf::snode* regex) const;
|
||||
|
||||
// Get raw stabilizer list for a regex (read-only).
|
||||
// Returns nullptr if no stabilizers exist.
|
||||
ptr_vector<euf::snode> const* get_stabilizers(euf::snode* regex) const;
|
||||
|
||||
// Mark a regex as self-stabilizing (stabilizer == regex itself).
|
||||
// Mirrors ZIPT Environment.SetSelfStabilizing (Environment.cs:143-146).
|
||||
void set_self_stabilizing(euf::snode* regex);
|
||||
|
||||
// Check if a regex is marked as self-stabilizing.
|
||||
// Mirrors ZIPT Environment.IsSelfStabilizing (Environment.cs:134-141).
|
||||
bool is_self_stabilizing(euf::snode* regex) const;
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Self-stabilizing auto-detection and propagation through derivatives
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
// Determine if a regex is inherently self-stabilizing based on its
|
||||
// structure. Returns true for:
|
||||
// - R* (Kleene star): D(c, R*) = D(c,R)·R*, so R* is its own
|
||||
// stabilizer regardless of the character.
|
||||
// - Σ* (full_seq): D(c, Σ*) = Σ*, trivially self-stabilizing.
|
||||
// - ∅ (fail/empty language): no live derivatives, trivially stable.
|
||||
// - Complement of full_seq (~Σ* = ∅): also trivially stable.
|
||||
// Does NOT mark the snode; call set_self_stabilizing to persist.
|
||||
bool compute_self_stabilizing(euf::snode* regex) const;
|
||||
|
||||
// After computing a derivative of parent, propagate the self-
|
||||
// stabilizing flag to the derivative result if warranted.
|
||||
// Applies structural rules:
|
||||
// - If parent is R* → derivative is always self-stabilizing
|
||||
// (derivative has the form D(c,R)·R* which contains the R* tail).
|
||||
// - If parent is R·S and S is self-stabilizing → derivative may
|
||||
// inherit from the self-stabilizing tail.
|
||||
// - If parent is R|S and both are self-stabilizing → derivative is.
|
||||
// - If parent is R∩S and both are self-stabilizing → derivative is.
|
||||
// - If parent is ~R and R is self-stabilizing → derivative is.
|
||||
// Updates the internal self-stabilizing set for the derivative.
|
||||
void propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv);
|
||||
|
||||
// Convenience: compute derivative and propagate self-stabilizing flags.
|
||||
// Equivalent to calling derivative() followed by
|
||||
// propagate_self_stabilizing().
|
||||
euf::snode* derivative_with_propagation(euf::snode* re, euf::snode* elem);
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Basic regex predicates
|
||||
// -----------------------------------------------------------------
|
||||
|
|
@ -51,6 +153,36 @@ namespace smt {
|
|||
// derived emptiness (e.g., union of empties, concat with empty).
|
||||
bool is_empty_regex(euf::snode* re) const;
|
||||
|
||||
// BFS emptiness check over the Brzozowski derivative automaton.
|
||||
// Explores reachable derivative states using representative
|
||||
// characters from each alphabet equivalence class.
|
||||
// l_true — regex is definitely empty (no accepting states reachable)
|
||||
// l_false — regex is definitely non-empty (found a nullable state)
|
||||
// l_undef — inconclusive (hit exploration bound or failed derivative)
|
||||
// max_states caps the number of explored states to prevent blowup.
|
||||
lbool is_empty_bfs(euf::snode* re, unsigned max_states = 10000);
|
||||
|
||||
// Check emptiness of the intersection of multiple regexes.
|
||||
// Uses BFS over the product of Brzozowski derivative automata.
|
||||
// l_true — intersection is definitely empty
|
||||
// l_false — intersection is definitely non-empty
|
||||
// l_undef — inconclusive (hit exploration bound)
|
||||
// Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
|
||||
lbool check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
|
||||
unsigned max_states = 10000);
|
||||
|
||||
// Check if L(subset_re) ⊆ L(superset_re).
|
||||
// Computed as: subset_re ∩ complement(superset_re) = ∅.
|
||||
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
|
||||
lbool is_language_subset(euf::snode* subset_re, euf::snode* superset_re);
|
||||
|
||||
// Collect all primitive regex constraints on variable `var` from
|
||||
// the node's str_mem list and return their intersection as a
|
||||
// single regex snode (using re.inter).
|
||||
// Returns nullptr if no primitive constraints found.
|
||||
euf::snode* collect_primitive_regex_intersection(
|
||||
euf::snode* var, seq::nielsen_node const& node);
|
||||
|
||||
// check if regex is the full language (Σ* / re.all)
|
||||
bool is_full_regex(euf::snode* re) const {
|
||||
return re && re->is_full_seq();
|
||||
|
|
@ -76,10 +208,23 @@ namespace smt {
|
|||
return m_sg.brzozowski_deriv(re, elem);
|
||||
}
|
||||
|
||||
// check if all minterms of the regex produce the same Brzozowski
|
||||
// derivative ("uniform derivative"). When this holds, the derivative
|
||||
// is independent of the character value: for any character c,
|
||||
// deriv(regex, c) = result. This enables deterministic consumption
|
||||
// of symbolic (variable) characters without branching.
|
||||
// Returns the uniform derivative if found, nullptr otherwise.
|
||||
// Mirrors ZIPT's SimplifyCharRegex uniform-derivative fast path.
|
||||
euf::snode* try_uniform_derivative(euf::snode* regex);
|
||||
|
||||
// compute derivative of a str_mem constraint: advance past one character.
|
||||
// the string side is shortened by drop_first and the regex is derived.
|
||||
// Propagates self-stabilizing flags from the parent regex to the derivative.
|
||||
seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) {
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem);
|
||||
euf::snode* parent_re = mem.m_regex;
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, elem);
|
||||
if (deriv)
|
||||
propagate_self_stabilizing(parent_re, deriv);
|
||||
euf::snode* new_str = m_sg.drop_first(mem.m_str);
|
||||
return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep);
|
||||
}
|
||||
|
|
@ -175,11 +320,42 @@ namespace smt {
|
|||
euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex,
|
||||
euf::snode* current_regex);
|
||||
|
||||
// Strengthened stabilizer construction with sub-cycle detection.
|
||||
// Replays the consumed character tokens from cycle_history on the
|
||||
// cycle regex, detecting sub-cycles (where the derivative loops
|
||||
// back to the original regex). For each sub-cycle, builds a
|
||||
// stabilizer from the interleaved character tokens and filtered
|
||||
// sub-stabilizers.
|
||||
// Returns a union of all sub-cycle stabilizer bodies, or nullptr
|
||||
// if no non-trivial stabilizer can be built.
|
||||
// Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225).
|
||||
euf::snode* strengthened_stabilizer(euf::snode* cycle_regex,
|
||||
euf::snode* cycle_history);
|
||||
|
||||
// Get filtered stabilizer star: for regex state re, retrieve
|
||||
// existing stabilizers, filter out those whose language can
|
||||
// start with any character in excluded_char, and wrap the
|
||||
// remaining in star(union(...)).
|
||||
// Returns nullptr (or empty-equivalent) if no valid stabilizers.
|
||||
// Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243).
|
||||
euf::snode* get_filtered_stabilizer_star(euf::snode* re,
|
||||
euf::snode* excluded_char);
|
||||
|
||||
// Extract the cycle portion of a str_mem's history by comparing
|
||||
// the current history with an ancestor's history length.
|
||||
// Returns the sub-sequence of tokens consumed since the ancestor,
|
||||
// or nullptr if the history did not advance.
|
||||
euf::snode* extract_cycle_history(seq::str_mem const& current,
|
||||
seq::str_mem const& ancestor);
|
||||
|
||||
// try to subsume a str_mem constraint using stabilizer-based
|
||||
// reasoning: if extract_cycle finds a cycle, check whether
|
||||
// the current regex is already covered by the stabilizer.
|
||||
// reasoning. Enhanced version: checks if the leading variable's
|
||||
// language (intersection of all its primitive regex constraints)
|
||||
// is a subset of star(union(stabilizers)) for the current regex.
|
||||
// Falls back to cycle-based pointer equality check.
|
||||
// returns true if the constraint can be dropped.
|
||||
bool try_subsume(seq::str_mem const& mem);
|
||||
// Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386).
|
||||
bool try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ Author:
|
|||
|
||||
#include "smt/seq/seq_nielsen.h"
|
||||
#include "smt/seq/seq_parikh.h"
|
||||
#include "smt/nseq_regex.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
|
|
@ -206,6 +207,19 @@ namespace seq {
|
|||
m_var_lb.insert(kv.m_key, kv.m_value);
|
||||
for (auto const& kv : parent.m_var_ub)
|
||||
m_var_ub.insert(kv.m_key, kv.m_value);
|
||||
// clone regex occurrence tracking
|
||||
m_regex_occurrence = parent.m_regex_occurrence;
|
||||
}
|
||||
|
||||
bool nielsen_node::track_regex_occurrence(unsigned regex_id, unsigned mem_id) {
|
||||
auto key = std::make_pair(regex_id, mem_id);
|
||||
auto it = m_regex_occurrence.find(key);
|
||||
if (it != m_regex_occurrence.end()) {
|
||||
// Already seen — cycle detected
|
||||
return true;
|
||||
}
|
||||
m_regex_occurrence[key] = m_id;
|
||||
return false;
|
||||
}
|
||||
|
||||
void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) {
|
||||
|
|
@ -499,11 +513,13 @@ namespace seq {
|
|||
m_sg(sg),
|
||||
m_solver(solver),
|
||||
m_parikh(alloc(seq_parikh, sg)),
|
||||
m_nseq_regex(alloc(smt::nseq_regex, sg)),
|
||||
m_len_vars(sg.get_manager()) {
|
||||
}
|
||||
|
||||
nielsen_graph::~nielsen_graph() {
|
||||
dealloc(m_parikh);
|
||||
dealloc(m_nseq_regex);
|
||||
reset();
|
||||
}
|
||||
|
||||
|
|
@ -1670,6 +1686,91 @@ namespace seq {
|
|||
}
|
||||
}
|
||||
|
||||
// consume symbolic characters (s_unit tokens) via uniform derivative
|
||||
// check. When all minterms of the regex produce the same Brzozowski
|
||||
// derivative, the derivative is independent of the character value
|
||||
// and we can deterministically consume the token. Forward direction
|
||||
// only (matches ZIPT history tracking convention).
|
||||
//
|
||||
// Extended: when uniform derivative fails but the token has a char_range
|
||||
// constraint (from apply_regex_var_split), check if the char_range is a
|
||||
// subset of a single minterm's character class. If so, the derivative
|
||||
// is deterministic for that token.
|
||||
// Mirrors ZIPT StrMem.SimplifyCharRegex lines 96-117.
|
||||
for (str_mem& mem : m_str_mem) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
continue;
|
||||
while (mem.m_str && !mem.m_str->is_empty()) {
|
||||
euf::snode* tok = mem.m_str->first();
|
||||
if (!tok || !tok->is_unit())
|
||||
break;
|
||||
// compute minterms and check for uniform derivative
|
||||
euf::snode_vector minterms;
|
||||
sg.compute_minterms(mem.m_regex, minterms);
|
||||
if (minterms.empty())
|
||||
break;
|
||||
euf::snode* uniform = nullptr;
|
||||
bool is_uniform = true;
|
||||
for (euf::snode* mt : minterms) {
|
||||
if (!mt || mt->is_fail())
|
||||
continue;
|
||||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
||||
if (!deriv) { is_uniform = false; break; }
|
||||
if (!uniform) {
|
||||
uniform = deriv;
|
||||
} else if (uniform->id() != deriv->id()) {
|
||||
is_uniform = false; break;
|
||||
}
|
||||
}
|
||||
if (is_uniform && uniform) {
|
||||
if (uniform->is_fail()) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::regex;
|
||||
return simplify_result::conflict;
|
||||
}
|
||||
mem.m_str = sg.drop_left(mem.m_str, 1);
|
||||
mem.m_regex = uniform;
|
||||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
||||
continue;
|
||||
}
|
||||
// Uniform derivative failed — try char_range subset approach.
|
||||
// If the symbolic char has a char_range constraint and that
|
||||
// range is a subset of exactly one minterm's character class,
|
||||
// we can deterministically take that minterm's derivative.
|
||||
if (m_char_ranges.contains(tok->id()) && g.m_parikh) {
|
||||
char_set const& cs = m_char_ranges[tok->id()];
|
||||
if (!cs.is_empty()) {
|
||||
euf::snode* matching_deriv = nullptr;
|
||||
bool found = false;
|
||||
for (euf::snode* mt : minterms) {
|
||||
if (!mt || mt->is_fail()) continue;
|
||||
if (!mt->get_expr()) continue;
|
||||
char_set mt_cs = g.m_parikh->minterm_to_char_set(mt->get_expr());
|
||||
if (cs.is_subset(mt_cs)) {
|
||||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
||||
if (!deriv) { found = false; break; }
|
||||
if (deriv->is_fail()) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::regex;
|
||||
return simplify_result::conflict;
|
||||
}
|
||||
matching_deriv = deriv;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found && matching_deriv) {
|
||||
mem.m_str = sg.drop_left(mem.m_str, 1);
|
||||
mem.m_regex = matching_deriv;
|
||||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// check for regex memberships that are immediately infeasible
|
||||
for (str_mem& mem : m_str_mem) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
|
|
@ -1692,6 +1793,23 @@ namespace seq {
|
|||
if (wi < m_str_mem.size())
|
||||
m_str_mem.shrink(wi);
|
||||
|
||||
// Regex widening: for each remaining str_mem, overapproximate
|
||||
// the string by replacing variables with their regex intersection
|
||||
// and check if the result intersected with the target regex is empty.
|
||||
// Detects infeasible constraints that would otherwise require
|
||||
// expensive exploration. Mirrors ZIPT NielsenNode.CheckRegexWidening.
|
||||
if (g.m_nseq_regex) {
|
||||
for (str_mem const& mem : m_str_mem) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
continue;
|
||||
if (g.check_regex_widening(*this, mem.m_str, mem.m_regex)) {
|
||||
m_is_general_conflict = true;
|
||||
m_reason = backtrack_reason::regex;
|
||||
return simplify_result::conflict;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// IntBounds initialization: derive per-variable Parikh length bounds from
|
||||
// remaining regex memberships and add to m_int_constraints.
|
||||
init_var_bounds_from_mems();
|
||||
|
|
@ -2075,6 +2193,15 @@ namespace seq {
|
|||
}
|
||||
|
||||
if (sr == simplify_result::satisfied || node->is_satisfied()) {
|
||||
// Before declaring SAT, check leaf-node regex feasibility:
|
||||
// for each variable with multiple regex constraints, verify
|
||||
// that the intersection of all its regexes is non-empty.
|
||||
// Mirrors ZIPT NielsenNode.CheckRegex.
|
||||
if (!check_leaf_regex(*node)) {
|
||||
node->set_general_conflict(true);
|
||||
node->set_reason(backtrack_reason::regex);
|
||||
return search_result::unsat;
|
||||
}
|
||||
m_sat_node = node;
|
||||
m_sat_path = cur_path;
|
||||
return search_result::sat;
|
||||
|
|
@ -3132,9 +3259,10 @@ namespace seq {
|
|||
// Modifier: apply_star_intr
|
||||
// Star introduction: for a str_mem x·s ∈ R where a cycle is detected
|
||||
// (backedge exists), introduce a stabilizer constraint x ∈ base*.
|
||||
// Creates a child that adds x ∈ base* membership and constrains
|
||||
// the remainder not to start with base.
|
||||
// mirrors ZIPT's StarIntrModifier
|
||||
// Creates a child that splits x into pr·po, adds pr ∈ base* and
|
||||
// po·s ∈ R, with a blocking constraint on po.
|
||||
// Enhanced to use strengthened_stabilizer and register via add_stabilizer.
|
||||
// mirrors ZIPT's StarIntrModifier (StarIntrModifier.cs:22-85)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nielsen_graph::apply_star_intr(nielsen_node* node) {
|
||||
|
|
@ -3149,41 +3277,86 @@ namespace seq {
|
|||
euf::snode* first = mem.m_str->first();
|
||||
if (!first || !first->is_var()) continue;
|
||||
|
||||
// Introduce a fresh variable z constrained by z ∈ R*,
|
||||
// replacing x → z·fresh_post. This breaks the cycle because
|
||||
// z is a new variable that won't trigger star_intr again.
|
||||
euf::snode* x = first;
|
||||
euf::snode* z = mk_fresh_var();
|
||||
euf::snode* fresh_post = mk_fresh_var();
|
||||
euf::snode* str_tail = m_sg.drop_first(mem.m_str);
|
||||
|
||||
// Build z ∈ R* membership: the star of the current regex
|
||||
seq_util& seq = m_sg.get_seq_util();
|
||||
ast_manager& mgr = m_sg.get_manager();
|
||||
expr* re_expr = mem.m_regex->get_expr();
|
||||
if (!re_expr)
|
||||
continue;
|
||||
expr_ref star_re(seq.re.mk_star(re_expr), seq.get_manager());
|
||||
|
||||
// Determine the stabilizer base:
|
||||
// 1. If self-stabilizing, use regex itself as stabilizer
|
||||
// 2. Otherwise, try strengthened_stabilizer from cycle history
|
||||
// 3. Fall back to simple star(R)
|
||||
euf::snode* stab_base = nullptr;
|
||||
|
||||
if (m_nseq_regex && m_nseq_regex->is_self_stabilizing(mem.m_regex)) {
|
||||
stab_base = mem.m_regex;
|
||||
}
|
||||
else if (m_nseq_regex && mem.m_history) {
|
||||
stab_base = m_nseq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history);
|
||||
}
|
||||
|
||||
if (!stab_base) {
|
||||
// Fall back: simple star of the cycle regex
|
||||
stab_base = mem.m_regex;
|
||||
}
|
||||
|
||||
// Register the stabilizer
|
||||
if (m_nseq_regex)
|
||||
m_nseq_regex->add_stabilizer(mem.m_regex, stab_base);
|
||||
|
||||
// Check if stabilizer equals regex → self-stabilizing
|
||||
if (m_nseq_regex && stab_base == mem.m_regex)
|
||||
m_nseq_regex->set_self_stabilizing(mem.m_regex);
|
||||
|
||||
// Build stab_star = star(stab_base)
|
||||
expr* stab_expr = stab_base->get_expr();
|
||||
if (!stab_expr)
|
||||
continue;
|
||||
expr_ref star_re(seq.re.mk_star(stab_expr), mgr);
|
||||
euf::snode* star_sn = m_sg.mk(star_re);
|
||||
if (!star_sn)
|
||||
continue;
|
||||
|
||||
// Try subsumption: if L(x_range) ⊆ L(stab_star), drop the constraint
|
||||
if (m_nseq_regex && m_nseq_regex->try_subsume(mem, *node))
|
||||
continue;
|
||||
|
||||
// Create child: x → pr · po
|
||||
euf::snode* pr = mk_fresh_var();
|
||||
euf::snode* po = mk_fresh_var();
|
||||
euf::snode* str_tail = m_sg.drop_first(mem.m_str);
|
||||
|
||||
nielsen_node* child = mk_child(node);
|
||||
|
||||
// Add membership: z ∈ R* (stabilizer constraint)
|
||||
child->add_str_mem(str_mem(z, star_sn, mem.m_history, next_mem_id(), mem.m_dep));
|
||||
// Add membership: pr ∈ stab_base* (stabilizer constraint)
|
||||
child->add_str_mem(str_mem(pr, star_sn, mem.m_history, next_mem_id(), mem.m_dep));
|
||||
|
||||
// Add remaining membership: fresh_post · tail ∈ R
|
||||
euf::snode* post_tail = str_tail->is_empty() ? fresh_post : m_sg.mk_concat(fresh_post, str_tail);
|
||||
child->add_str_mem(str_mem(post_tail, mem.m_regex, mem.m_history, next_mem_id(), mem.m_dep));
|
||||
// Add remaining membership: po · tail ∈ R (same regex, trimmed history)
|
||||
euf::snode* post_tail = str_tail->is_empty() ? po : m_sg.mk_concat(po, str_tail);
|
||||
child->add_str_mem(str_mem(post_tail, mem.m_regex, nullptr, next_mem_id(), mem.m_dep));
|
||||
|
||||
// Substitute x → z · fresh_post
|
||||
// Blocking constraint: po must NOT start with stab_base
|
||||
// po ∈ complement(non_nullable(stab_base) · Σ*)
|
||||
// This prevents redundant unrolling.
|
||||
if (!stab_base->is_nullable()) {
|
||||
sort* str_sort = seq.str.mk_string_sort();
|
||||
expr_ref full_seq(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr);
|
||||
expr_ref base_then_all(seq.re.mk_concat(stab_expr, full_seq), mgr);
|
||||
expr_ref block_re(seq.re.mk_complement(base_then_all), mgr);
|
||||
euf::snode* block_sn = m_sg.mk(block_re);
|
||||
if (block_sn)
|
||||
child->add_str_mem(str_mem(po, block_sn, nullptr, next_mem_id(), mem.m_dep));
|
||||
}
|
||||
|
||||
// Substitute x → pr · po
|
||||
nielsen_edge* e = mk_edge(node, child, false);
|
||||
euf::snode* replacement = m_sg.mk_concat(z, fresh_post);
|
||||
euf::snode* replacement = m_sg.mk_concat(pr, po);
|
||||
nielsen_subst s(x, replacement, mem.m_dep);
|
||||
e->add_subst(s);
|
||||
child->apply_subst(m_sg, s);
|
||||
|
||||
// Do not re-set backedge — z is fresh, so star_intr won't fire again
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -4101,5 +4274,160 @@ namespace seq {
|
|||
return mdl.get() != nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
// -----------------------------------------------------------------------
|
||||
// Regex widening: overapproximate string and check intersection emptiness
|
||||
// Mirrors ZIPT NielsenNode.CheckRegexWidening (NielsenNode.cs:1350-1380)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nielsen_graph::check_regex_widening(nielsen_node const& node,
|
||||
euf::snode* str, euf::snode* regex) {
|
||||
if (!str || !regex || !m_nseq_regex)
|
||||
return false;
|
||||
// Only apply to ground regexes with non-trivial strings
|
||||
if (!regex->is_ground())
|
||||
return false;
|
||||
|
||||
seq_util seq(m_sg.get_manager());
|
||||
ast_manager& mgr = m_sg.get_manager();
|
||||
|
||||
// Build the overapproximation regex for the string.
|
||||
// Variables → intersection of their primitive regex constraints (or Σ*)
|
||||
// Symbolic chars → re.range from char_ranges (or full_char)
|
||||
// Concrete chars → to_re(unit(c))
|
||||
euf::snode_vector tokens;
|
||||
str->collect_tokens(tokens);
|
||||
if (tokens.empty())
|
||||
return false;
|
||||
|
||||
euf::snode* approx = nullptr;
|
||||
for (euf::snode* tok : tokens) {
|
||||
euf::snode* tok_re = nullptr;
|
||||
|
||||
if (tok->is_char()) {
|
||||
// Concrete character → to_re(unit(c))
|
||||
expr* te = tok->get_expr();
|
||||
if (!te) return false;
|
||||
expr_ref unit_s(seq.str.mk_unit(te), mgr);
|
||||
expr_ref tre(seq.re.mk_to_re(unit_s), mgr);
|
||||
tok_re = m_sg.mk(tre);
|
||||
}
|
||||
else if (tok->is_var()) {
|
||||
// Variable → intersection of primitive regex constraints, or Σ*
|
||||
euf::snode* x_range = m_nseq_regex->collect_primitive_regex_intersection(tok, node);
|
||||
if (x_range) {
|
||||
tok_re = x_range;
|
||||
} else {
|
||||
// Unconstrained variable: approximate as Σ*
|
||||
sort* str_sort = seq.str.mk_string_sort();
|
||||
expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr);
|
||||
tok_re = m_sg.mk(all_re);
|
||||
}
|
||||
}
|
||||
else if (tok->is_unit()) {
|
||||
// Symbolic char — try to get char_range
|
||||
if (node.char_ranges().contains(tok->id())) {
|
||||
char_set const& cs = node.char_ranges()[tok->id()];
|
||||
if (!cs.is_empty()) {
|
||||
// Build union of re.range for each interval
|
||||
euf::snode* range_re = nullptr;
|
||||
for (auto const& r : cs.ranges()) {
|
||||
expr_ref lo(seq.mk_char(r.m_lo), mgr);
|
||||
expr_ref hi(seq.mk_char(r.m_hi - 1), mgr);
|
||||
expr_ref rng(seq.re.mk_range(
|
||||
seq.str.mk_string(zstring(r.m_lo)),
|
||||
seq.str.mk_string(zstring(r.m_hi - 1))), mgr);
|
||||
euf::snode* rng_sn = m_sg.mk(rng);
|
||||
if (!range_re) {
|
||||
range_re = rng_sn;
|
||||
} else {
|
||||
expr_ref u(seq.re.mk_union(range_re->get_expr(), rng_sn->get_expr()), mgr);
|
||||
range_re = m_sg.mk(u);
|
||||
}
|
||||
}
|
||||
tok_re = range_re;
|
||||
}
|
||||
}
|
||||
if (!tok_re) {
|
||||
// Unconstrained symbolic char: approximate as full_char (single char, any value)
|
||||
sort* str_sort = seq.str.mk_string_sort();
|
||||
expr_ref fc(seq.re.mk_full_char(seq.re.mk_re(str_sort)), mgr);
|
||||
tok_re = m_sg.mk(fc);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Unknown token type — approximate as Σ*
|
||||
sort* str_sort = seq.str.mk_string_sort();
|
||||
expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr);
|
||||
tok_re = m_sg.mk(all_re);
|
||||
}
|
||||
|
||||
if (!tok_re)
|
||||
return false;
|
||||
|
||||
if (!approx) {
|
||||
approx = tok_re;
|
||||
} else {
|
||||
expr* ae = approx->get_expr();
|
||||
expr* te = tok_re->get_expr();
|
||||
if (!ae || !te) return false;
|
||||
expr_ref cat(seq.re.mk_concat(ae, te), mgr);
|
||||
approx = m_sg.mk(cat);
|
||||
}
|
||||
}
|
||||
|
||||
if (!approx)
|
||||
return false;
|
||||
|
||||
// Check intersection(approx, regex) for emptiness
|
||||
// Build intersection regex
|
||||
expr* ae = approx->get_expr();
|
||||
expr* re = regex->get_expr();
|
||||
if (!ae || !re)
|
||||
return false;
|
||||
expr_ref inter(seq.re.mk_inter(ae, re), mgr);
|
||||
euf::snode* inter_sn = m_sg.mk(inter);
|
||||
if (!inter_sn)
|
||||
return false;
|
||||
|
||||
lbool result = m_nseq_regex->is_empty_bfs(inter_sn, 5000);
|
||||
return result == l_true;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Leaf-node regex intersection emptiness check
|
||||
// Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nielsen_graph::check_leaf_regex(nielsen_node const& node) {
|
||||
if (!m_nseq_regex)
|
||||
return true;
|
||||
|
||||
// Group str_mem constraints by variable (primitive constraints only)
|
||||
u_map<ptr_vector<euf::snode>> var_regexes;
|
||||
|
||||
for (auto const& mem : node.str_mems()) {
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
continue;
|
||||
if (!mem.is_primitive())
|
||||
continue;
|
||||
euf::snode* first = mem.m_str->first();
|
||||
if (!first || !first->is_var())
|
||||
continue;
|
||||
auto& list = var_regexes.insert_if_not_there(first->id(), ptr_vector<euf::snode>());
|
||||
list.push_back(mem.m_regex);
|
||||
}
|
||||
|
||||
// For each variable with 2+ regex constraints, check intersection non-emptiness
|
||||
for (auto& [var_id, regexes] : var_regexes) {
|
||||
if (regexes.size() < 2)
|
||||
continue;
|
||||
lbool result = m_nseq_regex->check_intersection_emptiness(regexes, 5000);
|
||||
if (result == l_true) {
|
||||
// Intersection is empty — infeasible
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -244,6 +244,10 @@ Author:
|
|||
#include <map>
|
||||
#include "model/model.h"
|
||||
|
||||
namespace smt {
|
||||
class nseq_regex; // forward declaration (defined in smt/nseq_regex.h)
|
||||
}
|
||||
|
||||
namespace seq {
|
||||
|
||||
// forward declarations
|
||||
|
|
@ -538,6 +542,12 @@ namespace seq {
|
|||
// asserted when this node's solver scope is entered.
|
||||
unsigned m_parent_ic_count = 0;
|
||||
|
||||
// RegexOccurrence: maps (regex snode id, str_mem id) → node id where
|
||||
// this regex was last seen on the current DFS path.
|
||||
// Used for precise cycle detection with history-length-based progress.
|
||||
// Mirrors ZIPT LocalInfo.RegexOccurrence (LocalInfo.cs:34)
|
||||
std::map<std::pair<unsigned, unsigned>, unsigned> m_regex_occurrence;
|
||||
|
||||
public:
|
||||
nielsen_node(nielsen_graph* graph, unsigned id);
|
||||
|
||||
|
|
@ -628,6 +638,15 @@ namespace seq {
|
|||
// clone constraints from a parent node
|
||||
void clone_from(nielsen_node const& parent);
|
||||
|
||||
// Regex occurrence tracking: record current regex state for cycle detection.
|
||||
// Returns true if a cycle is detected (same regex seen before on this path).
|
||||
bool track_regex_occurrence(unsigned regex_id, unsigned mem_id);
|
||||
|
||||
// Get the regex occurrence map (for undo on backtrack).
|
||||
std::map<std::pair<unsigned, unsigned>, unsigned> const& regex_occurrence() const {
|
||||
return m_regex_occurrence;
|
||||
}
|
||||
|
||||
// apply a substitution to all constraints
|
||||
void apply_subst(euf::sgraph& sg, nielsen_subst const& s);
|
||||
|
||||
|
|
@ -739,6 +758,11 @@ namespace seq {
|
|||
// Parikh image filter: generates modular length constraints from regex
|
||||
// memberships. Allocated in the constructor; owned by this graph.
|
||||
seq_parikh* m_parikh = nullptr;
|
||||
|
||||
// Regex membership module: stabilizers, emptiness checks, language
|
||||
// inclusion, derivatives. Allocated in the constructor; owned by this graph.
|
||||
smt::nseq_regex* m_nseq_regex = nullptr;
|
||||
|
||||
// -----------------------------------------------
|
||||
// Modification counter for substitution length tracking.
|
||||
// mirrors ZIPT's LocalInfo.CurrentModificationCnt
|
||||
|
|
@ -871,9 +895,28 @@ namespace seq {
|
|||
// Caller takes ownership of the returned model pointer.
|
||||
bool solve_sat_path_ints(model_ref& mdl);
|
||||
|
||||
// accessor for the nseq_regex module
|
||||
smt::nseq_regex* nseq_regex_module() const { return m_nseq_regex; }
|
||||
|
||||
private:
|
||||
search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);
|
||||
|
||||
// Regex widening: overapproximate `str` by replacing variables with
|
||||
// the intersection of their primitive regex constraints (or Σ* if
|
||||
// unconstrained), replacing symbolic chars with their char ranges,
|
||||
// then checking if the approximation intersected with `regex` is empty.
|
||||
// Returns true if widening detects infeasibility (UNSAT).
|
||||
// Mirrors ZIPT NielsenNode.CheckRegexWidening (NielsenNode.cs:1350-1380)
|
||||
bool check_regex_widening(nielsen_node const& node,
|
||||
euf::snode* str, euf::snode* regex);
|
||||
|
||||
// Check regex feasibility at a leaf node: for each variable with
|
||||
// multiple primitive regex constraints, check that the intersection
|
||||
// of all its regexes is non-empty.
|
||||
// Returns true if all constraints are feasible.
|
||||
// Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329)
|
||||
bool check_leaf_regex(nielsen_node const& node);
|
||||
|
||||
// Apply the Parikh image filter to a node: generate modular length
|
||||
// constraints from regex memberships and append them to the node's
|
||||
// int_constraints. Also performs a lightweight feasibility pre-check;
|
||||
|
|
|
|||
|
|
@ -449,6 +449,7 @@ static void test_sgraph_factory() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
// mk_var
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
|
|
@ -463,7 +464,7 @@ static void test_sgraph_factory() {
|
|||
SASSERT(a->length() == 1);
|
||||
|
||||
// mk_empty
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
SASSERT(e && e->is_empty());
|
||||
SASSERT(e->is_nullable());
|
||||
SASSERT(e->length() == 0);
|
||||
|
|
@ -499,6 +500,7 @@ static void test_sgraph_indexing() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* b = sg.mk_char('B');
|
||||
|
|
@ -533,7 +535,7 @@ static void test_sgraph_indexing() {
|
|||
SASSERT(a->at(1) == nullptr);
|
||||
|
||||
// empty: at(0) is nullptr
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
SASSERT(e->at(0) == nullptr);
|
||||
euf::snode_vector empty_tokens;
|
||||
e->collect_tokens(empty_tokens);
|
||||
|
|
@ -547,6 +549,7 @@ static void test_sgraph_drop() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* b = sg.mk_char('B');
|
||||
|
|
@ -591,7 +594,7 @@ static void test_sgraph_drop() {
|
|||
SASSERT(e->is_empty());
|
||||
|
||||
// drop from empty: no change
|
||||
euf::snode* ee = sg.drop_first(sg.mk_empty());
|
||||
euf::snode* ee = sg.drop_first(sg.mk_empty_seq(seq.str.mk_string_sort()));
|
||||
SASSERT(ee->is_empty());
|
||||
}
|
||||
|
||||
|
|
@ -602,6 +605,7 @@ static void test_sgraph_subst() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
|
|
@ -624,7 +628,7 @@ static void test_sgraph_subst() {
|
|||
SASSERT(same == xax);
|
||||
|
||||
// substitution of variable with empty
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
euf::snode* collapsed = sg.subst(xax, x, e);
|
||||
SASSERT(collapsed->length() == 1); // just 'a' remains
|
||||
SASSERT(collapsed == a);
|
||||
|
|
|
|||
|
|
@ -91,14 +91,15 @@ static void test_nseq_simplification() {
|
|||
std::cout << "test_nseq_simplification\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util su(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
nseq_basic_dummy_solver solver;
|
||||
seq::nielsen_graph ng(sg, solver);
|
||||
|
||||
// Add a trivial equality: empty = empty
|
||||
euf::snode* empty1 = sg.mk_empty();
|
||||
euf::snode* empty2 = sg.mk_empty();
|
||||
euf::snode* empty1 = sg.mk_empty_seq(su.str.mk_string_sort());
|
||||
euf::snode* empty2 = sg.mk_empty_seq(su.str.mk_string_sort());
|
||||
|
||||
ng.add_str_eq(empty1, empty2);
|
||||
|
||||
|
|
@ -113,6 +114,7 @@ static void test_nseq_node_satisfied() {
|
|||
std::cout << "test_nseq_node_satisfied\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util su(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
nseq_basic_dummy_solver solver;
|
||||
|
|
@ -123,7 +125,7 @@ static void test_nseq_node_satisfied() {
|
|||
SASSERT(node->is_satisfied());
|
||||
|
||||
// add a trivial equality
|
||||
euf::snode* empty = sg.mk_empty();
|
||||
euf::snode* empty = sg.mk_empty_seq(su.str.mk_string_sort());
|
||||
seq::dep_tracker dep;
|
||||
seq::str_eq eq(empty, empty, dep);
|
||||
node->add_str_eq(eq);
|
||||
|
|
|
|||
|
|
@ -16,6 +16,9 @@ Abstract:
|
|||
#include "ast/euf/euf_egraph.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "smt/nseq_regex.h"
|
||||
#include "smt/seq/seq_nielsen.h"
|
||||
#include "util/lbool.h"
|
||||
#include "util/zstring.h"
|
||||
#include <iostream>
|
||||
|
||||
// Test 1: nseq_regex instantiation
|
||||
|
|
@ -66,9 +69,643 @@ static void test_nseq_regex_is_full() {
|
|||
std::cout << " ok: re.all not recognized as empty\n";
|
||||
}
|
||||
|
||||
// Test 4: strengthened_stabilizer — null safety
|
||||
static void test_strengthened_stabilizer_null() {
|
||||
std::cout << "test_strengthened_stabilizer_null\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
|
||||
SASSERT(nr.strengthened_stabilizer(nullptr, nullptr) == nullptr);
|
||||
|
||||
seq_util su(m);
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
sort* re_sort = su.re.mk_re(str_sort);
|
||||
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
|
||||
euf::snode* full_re = sg.mk(full_e);
|
||||
|
||||
SASSERT(nr.strengthened_stabilizer(full_re, nullptr) == nullptr);
|
||||
SASSERT(nr.strengthened_stabilizer(nullptr, full_re) == nullptr);
|
||||
std::cout << " ok\n";
|
||||
}
|
||||
|
||||
// Test 5: strengthened_stabilizer — single char cycle on a*
|
||||
// Regex a*, history = 'a'. D('a', a*) = a* (sub-cycle back to start).
|
||||
// Stabilizer body should be to_re("a").
|
||||
static void test_strengthened_stabilizer_single_char() {
|
||||
std::cout << "test_strengthened_stabilizer_single_char\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
// Build a*
|
||||
expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m);
|
||||
euf::snode* re_star_a = sg.mk(star_a);
|
||||
|
||||
// Build history = char 'a' (single token, no concat needed)
|
||||
euf::snode* tok_a = sg.mk_char('a');
|
||||
euf::snode* history = tok_a;
|
||||
|
||||
euf::snode* result = nr.strengthened_stabilizer(re_star_a, history);
|
||||
// Should produce a non-null stabilizer body (to_re("a"))
|
||||
SASSERT(result != nullptr);
|
||||
std::cout << " ok: a* with history 'a' -> non-null stabilizer\n";
|
||||
}
|
||||
|
||||
// Test 6: strengthened_stabilizer — two-char cycle with sub-cycle
|
||||
// Regex (ab)*, history = 'a', 'b'. D('a', (ab)*) = b(ab)*, D('b', b(ab)*) = (ab)*
|
||||
// This should detect a sub-cycle and build a stabilizer body.
|
||||
static void test_strengthened_stabilizer_two_char() {
|
||||
std::cout << "test_strengthened_stabilizer_two_char\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
// Build (ab)*
|
||||
expr_ref ab(su.re.mk_to_re(su.str.mk_string("ab")), m);
|
||||
expr_ref star_ab(su.re.mk_star(ab), m);
|
||||
euf::snode* re_star_ab = sg.mk(star_ab);
|
||||
|
||||
// Build history: concat(char_a, char_b) using string concat
|
||||
euf::snode* tok_a = sg.mk_char('a');
|
||||
euf::snode* tok_b = sg.mk_char('b');
|
||||
euf::snode* history = sg.mk_concat(tok_a, tok_b);
|
||||
|
||||
euf::snode* result = nr.strengthened_stabilizer(re_star_ab, history);
|
||||
// Should produce a non-null stabilizer body
|
||||
SASSERT(result != nullptr);
|
||||
std::cout << " ok: (ab)* with history 'ab' -> non-null stabilizer\n";
|
||||
}
|
||||
|
||||
// Test 7: get_filtered_stabilizer_star — no stabilizers registered
|
||||
static void test_filtered_stabilizer_star_empty() {
|
||||
std::cout << "test_filtered_stabilizer_star_empty\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
sort* re_sort = su.re.mk_re(str_sort);
|
||||
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
|
||||
euf::snode* full_re = sg.mk(full_e);
|
||||
euf::snode* tok_a = sg.mk_char('a');
|
||||
|
||||
euf::snode* result = nr.get_filtered_stabilizer_star(full_re, tok_a);
|
||||
SASSERT(result == nullptr);
|
||||
std::cout << " ok: no stabilizers -> nullptr\n";
|
||||
}
|
||||
|
||||
// Test 8: get_filtered_stabilizer_star — with registered stabilizer that passes filter
|
||||
static void test_filtered_stabilizer_star_with_stab() {
|
||||
std::cout << "test_filtered_stabilizer_star_with_stab\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
// Build a* as the regex state
|
||||
expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m);
|
||||
euf::snode* re_star_a = sg.mk(star_a);
|
||||
|
||||
// Register a stabilizer: to_re("b") — only accepts "b"
|
||||
expr_ref stab_b(su.re.mk_to_re(su.str.mk_string("b")), m);
|
||||
euf::snode* stab_b_sn = sg.mk(stab_b);
|
||||
nr.add_stabilizer(re_star_a, stab_b_sn);
|
||||
|
||||
// Exclude char 'a': D('a', to_re("b")) should be fail
|
||||
euf::snode* tok_a = sg.mk_char('a');
|
||||
euf::snode* result = nr.get_filtered_stabilizer_star(re_star_a, tok_a);
|
||||
// to_re("b") should pass the filter → result is star(to_re("b"))
|
||||
SASSERT(result != nullptr);
|
||||
SASSERT(result->is_star());
|
||||
std::cout << " ok: filter keeps to_re('b') when excluding 'a'\n";
|
||||
}
|
||||
|
||||
// Test 9: get_filtered_stabilizer_star — stabilizer filtered out
|
||||
static void test_filtered_stabilizer_star_filtered() {
|
||||
std::cout << "test_filtered_stabilizer_star_filtered\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
// Build a* as the regex state
|
||||
expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m);
|
||||
euf::snode* re_star_a = sg.mk(star_a);
|
||||
|
||||
// Register a stabilizer: to_re("a") — accepts "a"
|
||||
expr_ref stab_a(su.re.mk_to_re(su.str.mk_string("a")), m);
|
||||
euf::snode* stab_a_sn = sg.mk(stab_a);
|
||||
nr.add_stabilizer(re_star_a, stab_a_sn);
|
||||
|
||||
// Exclude char 'a': D('a', to_re("a")) is NOT fail → filtered out
|
||||
euf::snode* tok_a = sg.mk_char('a');
|
||||
euf::snode* result = nr.get_filtered_stabilizer_star(re_star_a, tok_a);
|
||||
SASSERT(result == nullptr);
|
||||
std::cout << " ok: filter removes to_re('a') when excluding 'a'\n";
|
||||
}
|
||||
|
||||
// Test 10: extract_cycle_history — basic extraction
|
||||
static void test_extract_cycle_history_basic() {
|
||||
std::cout << "test_extract_cycle_history_basic\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
sort* re_sort = su.re.mk_re(str_sort);
|
||||
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
|
||||
euf::snode* full_re = sg.mk(full_e);
|
||||
|
||||
euf::snode* tok_a = sg.mk_char('a');
|
||||
euf::snode* tok_b = sg.mk_char('b');
|
||||
euf::snode* tok_c = sg.mk_char('c');
|
||||
|
||||
// Ancestor history: just 'a' (length 1)
|
||||
euf::snode* anc_hist = tok_a;
|
||||
|
||||
// Current history: concat(concat(a, b), c) = a,b,c (length 3)
|
||||
euf::snode* cur_hist = sg.mk_concat(sg.mk_concat(tok_a, tok_b), tok_c);
|
||||
|
||||
euf::snode* empty_str = sg.mk_empty_seq(str_sort);
|
||||
seq::dep_tracker empty_dep;
|
||||
|
||||
seq::str_mem ancestor(empty_str, full_re, anc_hist, 0, empty_dep);
|
||||
seq::str_mem current(empty_str, full_re, cur_hist, 0, empty_dep);
|
||||
|
||||
euf::snode* cycle = nr.extract_cycle_history(current, ancestor);
|
||||
// Should return the last 2 tokens (b, c)
|
||||
SASSERT(cycle != nullptr);
|
||||
SASSERT(cycle->length() == 2);
|
||||
std::cout << " ok: extracted cycle of length 2\n";
|
||||
}
|
||||
|
||||
// Test 11: extract_cycle_history — null ancestor history
|
||||
static void test_extract_cycle_history_null_ancestor() {
|
||||
std::cout << "test_extract_cycle_history_null_ancestor\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
sort* re_sort = su.re.mk_re(str_sort);
|
||||
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
|
||||
euf::snode* full_re = sg.mk(full_e);
|
||||
|
||||
euf::snode* tok_a = sg.mk_char('a');
|
||||
euf::snode* tok_b = sg.mk_char('b');
|
||||
euf::snode* cur_hist = sg.mk_concat(tok_a, tok_b);
|
||||
euf::snode* empty_str = sg.mk_empty_seq(str_sort);
|
||||
seq::dep_tracker empty_dep;
|
||||
|
||||
// Ancestor has no history (nullptr)
|
||||
seq::str_mem ancestor(empty_str, full_re, nullptr, 0, empty_dep);
|
||||
seq::str_mem current(empty_str, full_re, cur_hist, 0, empty_dep);
|
||||
|
||||
euf::snode* cycle = nr.extract_cycle_history(current, ancestor);
|
||||
// With null ancestor history, entire current history is the cycle
|
||||
SASSERT(cycle != nullptr);
|
||||
SASSERT(cycle->length() == 2);
|
||||
std::cout << " ok: null ancestor -> full history as cycle\n";
|
||||
}
|
||||
|
||||
// Test 12: BFS emptiness — re.none (empty language) is empty
|
||||
static void test_bfs_empty_none() {
|
||||
std::cout << "test_bfs_empty_none\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
sort* re_sort = su.re.mk_re(str_sort);
|
||||
|
||||
expr_ref none_e(su.re.mk_empty(re_sort), m);
|
||||
euf::snode* none_re = sg.mk(none_e);
|
||||
lbool result = nr.is_empty_bfs(none_re);
|
||||
SASSERT(result == l_true);
|
||||
std::cout << " ok: re.none -> l_true (empty)\n";
|
||||
}
|
||||
|
||||
// Test 13: BFS emptiness — full_seq (Sigma*) is NOT empty
|
||||
static void test_bfs_nonempty_full() {
|
||||
std::cout << "test_bfs_nonempty_full\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
sort* re_sort = su.re.mk_re(str_sort);
|
||||
|
||||
expr_ref full_e(su.re.mk_full_seq(re_sort), m);
|
||||
euf::snode* full_re = sg.mk(full_e);
|
||||
lbool result = nr.is_empty_bfs(full_re);
|
||||
SASSERT(result == l_false);
|
||||
std::cout << " ok: full_seq -> l_false (non-empty)\n";
|
||||
}
|
||||
|
||||
// Test 14: BFS emptiness — to_re("abc") is NOT empty
|
||||
static void test_bfs_nonempty_to_re() {
|
||||
std::cout << "test_bfs_nonempty_to_re\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
expr_ref to_re_abc(su.re.mk_to_re(su.str.mk_string("abc")), m);
|
||||
euf::snode* re_abc = sg.mk(to_re_abc);
|
||||
lbool result = nr.is_empty_bfs(re_abc);
|
||||
SASSERT(result == l_false);
|
||||
std::cout << " ok: to_re(\"abc\") -> l_false (non-empty)\n";
|
||||
}
|
||||
|
||||
// Test 15: BFS emptiness — a* is NOT empty (accepts epsilon)
|
||||
static void test_bfs_nonempty_star() {
|
||||
std::cout << "test_bfs_nonempty_star\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m);
|
||||
euf::snode* re_star_a = sg.mk(star_a);
|
||||
lbool result = nr.is_empty_bfs(re_star_a);
|
||||
SASSERT(result == l_false);
|
||||
std::cout << " ok: a* -> l_false (non-empty, accepts epsilon)\n";
|
||||
}
|
||||
|
||||
// Test 16: BFS emptiness — union(none, none) is empty
|
||||
static void test_bfs_empty_union_of_empties() {
|
||||
std::cout << "test_bfs_empty_union_of_empties\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
sort* re_sort = su.re.mk_re(str_sort);
|
||||
|
||||
expr_ref none1(su.re.mk_empty(re_sort), m);
|
||||
expr_ref none2(su.re.mk_empty(re_sort), m);
|
||||
expr_ref union_e(su.re.mk_union(none1, none2), m);
|
||||
euf::snode* re_union = sg.mk(union_e);
|
||||
lbool result = nr.is_empty_bfs(re_union);
|
||||
SASSERT(result == l_true);
|
||||
std::cout << " ok: union(none, none) -> l_true (empty)\n";
|
||||
}
|
||||
|
||||
// Test 17: BFS emptiness — re.range('a','z') is NOT empty
|
||||
static void test_bfs_nonempty_range() {
|
||||
std::cout << "test_bfs_nonempty_range\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
|
||||
expr_ref lo(su.mk_char('a'), m);
|
||||
expr_ref hi(su.mk_char('z'), m);
|
||||
expr_ref range_e(su.re.mk_range(su.str.mk_unit(lo), su.str.mk_unit(hi)), m);
|
||||
euf::snode* re_range = sg.mk(range_e);
|
||||
lbool result = nr.is_empty_bfs(re_range);
|
||||
SASSERT(result == l_false);
|
||||
std::cout << " ok: range('a','z') -> l_false (non-empty)\n";
|
||||
}
|
||||
|
||||
// Test 18: BFS emptiness — complement(full_seq) = empty
|
||||
static void test_bfs_empty_complement_full() {
|
||||
std::cout << "test_bfs_empty_complement_full\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
sort* re_sort = su.re.mk_re(str_sort);
|
||||
|
||||
expr_ref comp_full(su.re.mk_complement(su.re.mk_full_seq(re_sort)), m);
|
||||
euf::snode* re_comp = sg.mk(comp_full);
|
||||
lbool result = nr.is_empty_bfs(re_comp);
|
||||
SASSERT(result == l_true);
|
||||
std::cout << " ok: ~full_seq -> l_true (empty)\n";
|
||||
}
|
||||
|
||||
// Test 19: BFS emptiness — nullptr returns l_undef
|
||||
static void test_bfs_null_safety() {
|
||||
std::cout << "test_bfs_null_safety\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
|
||||
lbool result = nr.is_empty_bfs(nullptr);
|
||||
SASSERT(result == l_undef);
|
||||
std::cout << " ok: nullptr -> l_undef\n";
|
||||
}
|
||||
|
||||
// Test 20: BFS emptiness — max_states bound respected
|
||||
static void test_bfs_bounded() {
|
||||
std::cout << "test_bfs_bounded\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
// (a|b)+ requires at least one char; with max_states=1 should bail
|
||||
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
|
||||
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
|
||||
expr_ref ab_union(su.re.mk_union(a_re, b_re), m);
|
||||
expr_ref ab_plus(su.re.mk_plus(ab_union), m);
|
||||
euf::snode* re_plus = sg.mk(ab_plus);
|
||||
|
||||
lbool result = nr.is_empty_bfs(re_plus, 1);
|
||||
SASSERT(result == l_undef);
|
||||
std::cout << " ok: (a|b)+ with max_states=1 -> l_undef (bounded)\n";
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// New tests for regex membership completion (Phase 1-4)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
// Test: char_set::is_subset
|
||||
static void test_char_set_is_subset() {
|
||||
std::cout << "test_char_set_is_subset\n";
|
||||
|
||||
// {a} ⊆ {a,b,c} = [97,100)
|
||||
char_set cs1(char_range('a', 'b')); // {a}
|
||||
char_set cs2(char_range('a', 'd')); // {a,b,c}
|
||||
SASSERT(cs1.is_subset(cs2));
|
||||
SASSERT(!cs2.is_subset(cs1));
|
||||
|
||||
// empty ⊆ anything
|
||||
char_set empty;
|
||||
SASSERT(empty.is_subset(cs1));
|
||||
SASSERT(empty.is_subset(cs2));
|
||||
|
||||
// self ⊆ self
|
||||
SASSERT(cs1.is_subset(cs1));
|
||||
SASSERT(cs2.is_subset(cs2));
|
||||
|
||||
// disjoint: {x} not ⊆ {a}
|
||||
char_set cs3(char_range('x', 'y'));
|
||||
SASSERT(!cs3.is_subset(cs1));
|
||||
|
||||
std::cout << " ok\n";
|
||||
}
|
||||
|
||||
// Test: stabilizer store basic operations
|
||||
static void test_stabilizer_store_basic() {
|
||||
std::cout << "test_stabilizer_store_basic\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
|
||||
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
|
||||
euf::snode* a_sn = sg.mk(a_re);
|
||||
euf::snode* b_sn = sg.mk(b_re);
|
||||
|
||||
SASSERT(!nr.has_stabilizers(a_sn));
|
||||
nr.add_stabilizer(a_sn, b_sn);
|
||||
SASSERT(nr.has_stabilizers(a_sn));
|
||||
SASSERT(nr.get_stabilizer_union(a_sn) == b_sn);
|
||||
|
||||
// dedup: adding same stabilizer again
|
||||
nr.add_stabilizer(a_sn, b_sn);
|
||||
auto* stabs = nr.get_stabilizers(a_sn);
|
||||
SASSERT(stabs && stabs->size() == 1);
|
||||
|
||||
// reset
|
||||
nr.reset_stabilizers();
|
||||
SASSERT(!nr.has_stabilizers(a_sn));
|
||||
|
||||
std::cout << " ok\n";
|
||||
}
|
||||
|
||||
// Test: self-stabilizing flag
|
||||
static void test_self_stabilizing() {
|
||||
std::cout << "test_self_stabilizing\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
|
||||
euf::snode* a_sn = sg.mk(a_re);
|
||||
|
||||
SASSERT(!nr.is_self_stabilizing(a_sn));
|
||||
nr.set_self_stabilizing(a_sn);
|
||||
SASSERT(nr.is_self_stabilizing(a_sn));
|
||||
|
||||
// star should be detected as self-stabilizing
|
||||
expr_ref star_a(su.re.mk_star(a_re), m);
|
||||
euf::snode* star_sn = sg.mk(star_a);
|
||||
SASSERT(nr.compute_self_stabilizing(star_sn));
|
||||
|
||||
std::cout << " ok\n";
|
||||
}
|
||||
|
||||
// Test: check_intersection_emptiness — SAT case
|
||||
static void test_check_intersection_sat() {
|
||||
std::cout << "test_check_intersection_sat\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
// a* ∩ (a|b)* should be non-empty (both accept "a")
|
||||
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
|
||||
expr_ref star_a(su.re.mk_star(a_re), m);
|
||||
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
|
||||
expr_ref ab_union(su.re.mk_union(a_re, b_re), m);
|
||||
expr_ref star_ab(su.re.mk_star(ab_union), m);
|
||||
|
||||
euf::snode* s1 = sg.mk(star_a);
|
||||
euf::snode* s2 = sg.mk(star_ab);
|
||||
ptr_vector<euf::snode> regexes;
|
||||
regexes.push_back(s1);
|
||||
regexes.push_back(s2);
|
||||
|
||||
lbool result = nr.check_intersection_emptiness(regexes);
|
||||
SASSERT(result == l_false); // non-empty
|
||||
std::cout << " ok: a* ∩ (a|b)* is non-empty\n";
|
||||
}
|
||||
|
||||
// Test: check_intersection_emptiness — UNSAT case
|
||||
static void test_check_intersection_unsat() {
|
||||
std::cout << "test_check_intersection_unsat\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
|
||||
// to_re("a") ∩ to_re("b") should be empty
|
||||
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
|
||||
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
|
||||
euf::snode* s1 = sg.mk(a_re);
|
||||
euf::snode* s2 = sg.mk(b_re);
|
||||
ptr_vector<euf::snode> regexes;
|
||||
regexes.push_back(s1);
|
||||
regexes.push_back(s2);
|
||||
|
||||
lbool result = nr.check_intersection_emptiness(regexes);
|
||||
SASSERT(result == l_true); // empty
|
||||
std::cout << " ok: to_re(a) ∩ to_re(b) is empty\n";
|
||||
}
|
||||
|
||||
// Test: is_language_subset — true case
|
||||
static void test_is_language_subset_true() {
|
||||
std::cout << "test_is_language_subset_true\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
// a* ⊆ (a|b)* should be true
|
||||
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
|
||||
expr_ref star_a(su.re.mk_star(a_re), m);
|
||||
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
|
||||
expr_ref ab_union(su.re.mk_union(a_re, b_re), m);
|
||||
expr_ref star_ab(su.re.mk_star(ab_union), m);
|
||||
|
||||
euf::snode* subset = sg.mk(star_a);
|
||||
euf::snode* superset = sg.mk(star_ab);
|
||||
|
||||
lbool result = nr.is_language_subset(subset, superset);
|
||||
SASSERT(result == l_true);
|
||||
std::cout << " ok: a* ⊆ (a|b)*\n";
|
||||
}
|
||||
|
||||
// Test: is_language_subset — false case
|
||||
static void test_is_language_subset_false() {
|
||||
std::cout << "test_is_language_subset_false\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
|
||||
// (a|b)* ⊄ a* should be false (b ∈ (a|b)* but b ∉ a*)
|
||||
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
|
||||
expr_ref star_a(su.re.mk_star(a_re), m);
|
||||
expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m);
|
||||
expr_ref ab_union(su.re.mk_union(a_re, b_re), m);
|
||||
expr_ref star_ab(su.re.mk_star(ab_union), m);
|
||||
|
||||
euf::snode* subset = sg.mk(star_ab);
|
||||
euf::snode* superset = sg.mk(star_a);
|
||||
|
||||
lbool result = nr.is_language_subset(subset, superset);
|
||||
SASSERT(result == l_false);
|
||||
std::cout << " ok: (a|b)* ⊄ a*\n";
|
||||
}
|
||||
|
||||
// Test: is_language_subset — trivial cases
|
||||
static void test_is_language_subset_trivial() {
|
||||
std::cout << "test_is_language_subset_trivial\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
smt::nseq_regex nr(sg);
|
||||
seq_util su(m);
|
||||
sort* str_sort = su.str.mk_string_sort();
|
||||
|
||||
// ∅ ⊆ anything = true
|
||||
expr_ref none(su.re.mk_empty(su.re.mk_re(str_sort)), m);
|
||||
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
|
||||
euf::snode* empty_sn = sg.mk(none);
|
||||
euf::snode* a_sn = sg.mk(a_re);
|
||||
SASSERT(nr.is_language_subset(empty_sn, a_sn) == l_true);
|
||||
|
||||
// anything ⊆ Σ* = true
|
||||
expr_ref full(su.re.mk_full_seq(su.re.mk_re(str_sort)), m);
|
||||
euf::snode* full_sn = sg.mk(full);
|
||||
SASSERT(nr.is_language_subset(a_sn, full_sn) == l_true);
|
||||
|
||||
// L ⊆ L = true (same pointer)
|
||||
SASSERT(nr.is_language_subset(a_sn, a_sn) == l_true);
|
||||
|
||||
std::cout << " ok\n";
|
||||
}
|
||||
|
||||
void tst_nseq_regex() {
|
||||
test_nseq_regex_instantiation();
|
||||
test_nseq_regex_is_empty();
|
||||
test_nseq_regex_is_full();
|
||||
test_strengthened_stabilizer_null();
|
||||
test_strengthened_stabilizer_single_char();
|
||||
test_strengthened_stabilizer_two_char();
|
||||
test_filtered_stabilizer_star_empty();
|
||||
test_filtered_stabilizer_star_with_stab();
|
||||
test_filtered_stabilizer_star_filtered();
|
||||
test_extract_cycle_history_basic();
|
||||
test_extract_cycle_history_null_ancestor();
|
||||
test_bfs_empty_none();
|
||||
test_bfs_nonempty_full();
|
||||
test_bfs_nonempty_to_re();
|
||||
test_bfs_nonempty_star();
|
||||
test_bfs_empty_union_of_empties();
|
||||
test_bfs_nonempty_range();
|
||||
test_bfs_empty_complement_full();
|
||||
test_bfs_null_safety();
|
||||
test_bfs_bounded();
|
||||
// New tests for regex membership completion
|
||||
test_char_set_is_subset();
|
||||
test_stabilizer_store_basic();
|
||||
test_self_stabilizing();
|
||||
test_check_intersection_sat();
|
||||
test_check_intersection_unsat();
|
||||
test_is_language_subset_true();
|
||||
test_is_language_subset_false();
|
||||
test_is_language_subset_trivial();
|
||||
std::cout << "nseq_regex: all tests passed\n";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,9 +57,10 @@ public:
|
|||
// -----------------------------------------------------------------------
|
||||
struct str_builder {
|
||||
euf::sgraph& sg;
|
||||
seq_util& su;
|
||||
euf::snode* vars[26] = {}; // vars['A'-'A'] .. vars['Z'-'A']
|
||||
|
||||
explicit str_builder(euf::sgraph& sg) : sg(sg) {}
|
||||
str_builder(euf::sgraph& sg, seq_util& su) : sg(sg), su(su) {}
|
||||
|
||||
euf::snode* var(char c) {
|
||||
int idx = c - 'A';
|
||||
|
|
@ -76,7 +77,7 @@ struct str_builder {
|
|||
: sg.mk_char((unsigned)(unsigned char)*p);
|
||||
result = result ? sg.mk_concat(result, tok) : tok;
|
||||
}
|
||||
return result ? result : sg.mk_empty();
|
||||
return result ? result : sg.mk_empty_seq(su.str.mk_string_sort());
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -190,7 +191,7 @@ struct nseq_fixture {
|
|||
static ast_manager& init(ast_manager& m) { reg_decl_plugins(m); return m; }
|
||||
|
||||
nseq_fixture()
|
||||
: eg(init(m)), sg(m, eg), ng(sg, dummy_solver), su(m), sb(sg), rb(m, su, sg)
|
||||
: eg(init(m)), sg(m, eg), ng(sg, dummy_solver), su(m), sb(sg, su), rb(m, su, sg)
|
||||
{}
|
||||
|
||||
euf::snode* S(const char* s) { return sb.parse(s); }
|
||||
|
|
|
|||
|
|
@ -74,11 +74,12 @@ static void test_str_eq() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
|
||||
|
|
@ -121,7 +122,7 @@ static void test_str_mem() {
|
|||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
// create a regex: re.all (.*)
|
||||
expr_ref star_fc(seq.re.mk_full_seq(str_sort), m);
|
||||
|
|
@ -149,11 +150,12 @@ static void test_nielsen_subst() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
seq::dep_tracker dep;
|
||||
|
||||
|
|
@ -208,7 +210,7 @@ static void test_nielsen_node() {
|
|||
// regex membership
|
||||
expr_ref re_all(seq.re.mk_full_seq(str_sort), m);
|
||||
euf::snode* regex = sg.mk(re_all);
|
||||
euf::snode* empty = sg.mk_empty();
|
||||
euf::snode* empty = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
root->add_str_mem(seq::str_mem(x, regex, empty, 0, dep));
|
||||
SASSERT(root->str_mems().size() == 1);
|
||||
|
||||
|
|
@ -311,7 +313,7 @@ static void test_nielsen_subst_apply() {
|
|||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* b = sg.mk_char('B');
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
// create node with constraint: concat(x, A) = concat(B, y)
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
|
|
@ -362,6 +364,7 @@ static void test_nielsen_expansion() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
dummy_simple_solver solver;
|
||||
seq::nielsen_graph ng(sg, solver);
|
||||
|
|
@ -379,7 +382,7 @@ static void test_nielsen_expansion() {
|
|||
seq::dep_tracker dep;
|
||||
|
||||
// branch 1: x -> eps (eliminating, progress)
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
seq::nielsen_node* child1 = ng.mk_child(root);
|
||||
seq::nielsen_subst s1(x, e, dep);
|
||||
child1->apply_subst(sg, s1);
|
||||
|
|
@ -1720,9 +1723,10 @@ static void test_simplify_empty_propagation() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* xy = sg.mk_concat(x, y);
|
||||
|
|
@ -1743,9 +1747,10 @@ static void test_simplify_empty_vs_char() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
|
||||
// ε = A → rhs has non-variable token → conflict
|
||||
|
|
@ -1791,11 +1796,12 @@ static void test_simplify_trivial_removal() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
|
|
@ -1814,10 +1820,11 @@ static void test_simplify_all_trivial_satisfied() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
|
|
@ -1838,7 +1845,7 @@ static void test_simplify_regex_infeasible() {
|
|||
seq_util seq(m);
|
||||
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
expr_ref ch_a(seq.str.mk_char('A'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
|
|
@ -1865,7 +1872,7 @@ static void test_simplify_nullable_removal() {
|
|||
seq_util seq(m);
|
||||
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
expr_ref ch_a(seq.str.mk_char('A'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
|
|
@ -1894,7 +1901,7 @@ static void test_simplify_brzozowski_sat() {
|
|||
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
expr_ref ch_a(seq.str.mk_char('A'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
|
|
@ -1923,7 +1930,7 @@ static void test_simplify_brzozowski_rtl_suffix() {
|
|||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* xa = sg.mk_concat(x, a);
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
expr_ref ch_b(seq.str.mk_char('B'), m);
|
||||
expr_ref unit_b(seq.str.mk_unit(ch_b), m);
|
||||
|
|
@ -1955,13 +1962,14 @@ static void test_simplify_multiple_eqs() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* z = sg.mk_var(symbol("z"));
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
seq::nielsen_node* node = ng.mk_node();
|
||||
seq::dep_tracker dep; dep.insert(0);
|
||||
|
|
@ -2083,7 +2091,7 @@ static void test_explain_conflict_mem_only() {
|
|||
seq_util seq(m);
|
||||
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
expr_ref ch_a(seq.str.mk_char('A'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
|
|
@ -2115,7 +2123,7 @@ static void test_explain_conflict_mixed_eq_mem() {
|
|||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* b = sg.mk_char('B');
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
// eq[0]: A = B (conflict)
|
||||
ng.add_str_eq(a, b);
|
||||
|
|
@ -2418,9 +2426,10 @@ static void test_length_constraints_empty_side() {
|
|||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort());
|
||||
|
||||
// x = ε
|
||||
dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver);
|
||||
|
|
|
|||
|
|
@ -402,6 +402,27 @@ bool char_set::is_disjoint(char_set const& other) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool char_set::is_subset(char_set const& other) const {
|
||||
// Every range in *this must be fully covered by ranges in other.
|
||||
// Both are sorted, non-overlapping.
|
||||
if (m_ranges.empty())
|
||||
return true;
|
||||
unsigned j = 0;
|
||||
for (unsigned i = 0; i < m_ranges.size(); ++i) {
|
||||
unsigned lo = m_ranges[i].m_lo;
|
||||
unsigned hi = m_ranges[i].m_hi;
|
||||
// Advance j to find covering range in other
|
||||
while (j < other.m_ranges.size() && other.m_ranges[j].m_hi <= lo)
|
||||
++j;
|
||||
if (j >= other.m_ranges.size())
|
||||
return false;
|
||||
// other.m_ranges[j] must fully contain [lo, hi)
|
||||
if (other.m_ranges[j].m_lo > lo || other.m_ranges[j].m_hi < hi)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream& char_set::display(std::ostream& out) const {
|
||||
if (m_ranges.empty()) {
|
||||
out << "{}";
|
||||
|
|
|
|||
|
|
@ -156,6 +156,9 @@ public:
|
|||
// check if two sets are disjoint
|
||||
bool is_disjoint(char_set const& other) const;
|
||||
|
||||
// check if this set is a subset of other (every char in this is also in other)
|
||||
bool is_subset(char_set const& other) const;
|
||||
|
||||
bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; }
|
||||
|
||||
char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue