From 27f5541b0bd518ae3477df3b31ea9b001537ebc7 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 13 Mar 2026 18:19:25 -0700 Subject: [PATCH] updates Signed-off-by: Nikolaj Bjorner --- src/smt/nseq_regex.cpp | 926 ++++++++++++++++++++++++++++++++++-- src/smt/nseq_regex.h | 184 ++++++- src/smt/seq/seq_nielsen.cpp | 370 +++++++++++++- src/smt/seq/seq_nielsen.h | 43 ++ src/test/euf_sgraph.cpp | 12 +- src/test/nseq_basic.cpp | 8 +- src/test/nseq_regex.cpp | 637 +++++++++++++++++++++++++ src/test/nseq_zipt.cpp | 7 +- src/test/seq_nielsen.cpp | 45 +- src/util/zstring.cpp | 21 + src/util/zstring.h | 3 + 11 files changed, 2176 insertions(+), 80 deletions(-) diff --git a/src/smt/nseq_regex.cpp b/src/smt/nseq_regex.cpp index df9847b6c..743b74ef6 100644 --- a/src/smt/nseq_regex.cpp +++ b/src/smt/nseq_regex.cpp @@ -15,11 +15,294 @@ Author: --*/ #include "smt/nseq_regex.h" +#include namespace smt { // ----------------------------------------------------------------------- - // Regex emptiness checking (structural analysis) + // Stabilizer store + // ----------------------------------------------------------------------- + + void nseq_regex::reset_stabilizers() { + m_stabilizers.reset(); + m_self_stabilizing.reset(); + } + + void nseq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) { + if (!regex || !stabilizer) + return; + + unsigned id = regex->id(); + auto& stabs = m_stabilizers.insert_if_not_there(id, ptr_vector()); + + // De-duplicate by pointer equality (mirrors ZIPT Environment.AddStabilizer + // which checks reference equality before adding). + for (euf::snode* s : stabs) + if (s == stabilizer) + return; + stabs.push_back(stabilizer); + } + + euf::snode* nseq_regex::get_stabilizer_union(euf::snode* regex) { + if (!regex) + return nullptr; + + if (!m_stabilizers.contains(regex->id())) + return nullptr; + + auto& stabs = m_stabilizers[regex->id()]; + if (stabs.empty()) + return nullptr; + + // Single stabilizer: return it directly. + if (stabs.size() == 1) + return stabs[0]; + + // Multiple stabilizers: build re.union chain. + // union(s1, union(s2, ... union(sN-1, sN)...)) + seq_util& seq = m_sg.get_seq_util(); + euf::snode* result = stabs[stabs.size() - 1]; + for (unsigned i = stabs.size() - 1; i-- > 0; ) { + expr* lhs = stabs[i]->get_expr(); + expr* rhs = result->get_expr(); + if (!lhs || !rhs) + return nullptr; + expr_ref un(seq.re.mk_union(lhs, rhs), m_sg.get_manager()); + result = m_sg.mk(un); + } + return result; + } + + bool nseq_regex::has_stabilizers(euf::snode* regex) const { + if (!regex) + return false; + if (!m_stabilizers.contains(regex->id())) + return false; + return !m_stabilizers[regex->id()].empty(); + } + + ptr_vector const* nseq_regex::get_stabilizers(euf::snode* regex) const { + if (!regex) + return nullptr; + if (!m_stabilizers.contains(regex->id())) + return nullptr; + return &m_stabilizers[regex->id()]; + } + + void nseq_regex::set_self_stabilizing(euf::snode* regex) { + if (!regex) + return; + m_self_stabilizing.insert(regex->id()); + } + + bool nseq_regex::is_self_stabilizing(euf::snode* regex) const { + if (!regex) + return false; + return m_self_stabilizing.contains(regex->id()); + } + + // ----------------------------------------------------------------------- + // Self-stabilizing auto-detection + // ----------------------------------------------------------------------- + + bool nseq_regex::compute_self_stabilizing(euf::snode* regex) const { + if (!regex) + return false; + + // R* is always self-stabilizing: D(c, R*) = D(c,R) · R*, + // so R* appears as the tail of every derivative and acts as + // its own stabilizer. + if (regex->is_star()) + return true; + + // Σ* (full_seq, i.e., re.all / .*) is self-stabilizing: + // D(c, Σ*) = Σ* for every character c. + if (regex->is_full_seq()) + return true; + + // ∅ (fail / empty language) is trivially self-stabilizing: + // it has no live derivatives, so the flag is vacuously true. + if (regex->is_fail()) + return true; + + // Complement of full_seq is ∅ (complement of Σ*), which is + // also trivially self-stabilizing. + if (regex->is_complement() && regex->num_args() == 1 && + regex->arg(0)->is_full_seq()) + return true; + + // Loop with lo=0 and no upper bound behaves like R* + // (r{0,} ≡ r*), so it is self-stabilizing. + if (regex->is_loop() && regex->is_nullable()) { + // A nullable loop with a star-like body: heuristic check. + // Only mark as self-stabilizing if the body is a Kleene closure. + // Loop(R, 0, ∞) ~ R* — but we rely on the sgraph to normalize + // these, so only catch exact star nodes above. + } + + return false; + } + + // ----------------------------------------------------------------------- + // Self-stabilizing propagation through derivatives + // ----------------------------------------------------------------------- + + void nseq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) { + if (!parent || !deriv) + return; + + // If the derivative is already known to be self-stabilizing (either + // inherently or from a prior propagation), nothing to do. + if (is_self_stabilizing(deriv)) + return; + + // If the derivative is itself inherently self-stabilizing + // (e.g., it is a star or full_seq), mark it now. + if (compute_self_stabilizing(deriv)) { + set_self_stabilizing(deriv); + return; + } + + // Rule 1: Star parent. + // D(c, R*) = D(c, R) · R*. The derivative always contains the + // R* tail, so it is self-stabilizing regardless of D(c,R). + if (parent->is_star()) { + set_self_stabilizing(deriv); + return; + } + + // Rule 2: Full_seq parent. + // D(c, Σ*) = Σ*, and Σ* is self-stabilizing. + // (The derivative should be Σ* itself; mark it for safety.) + if (parent->is_full_seq()) { + set_self_stabilizing(deriv); + return; + } + + // Check if parent is self-stabilizing (either inherently or marked). + bool parent_ss = is_self_stabilizing(parent) || compute_self_stabilizing(parent); + + // Rule 3: Concat parent R · S. + // D(c, R·S) = D(c,R)·S | (nullable(R) ? D(c,S) : ∅). + // If S is self-stabilizing, the D(c,R)·S branch inherits it. + // If the whole parent R·S is self-stabilizing, the derivative is too. + if (parent->is_concat() && parent->num_args() == 2) { + euf::snode* tail = parent->arg(1); + bool tail_ss = is_self_stabilizing(tail) || compute_self_stabilizing(tail); + if (tail_ss || parent_ss) { + set_self_stabilizing(deriv); + return; + } + } + + // Rule 4: Union parent R | S. + // D(c, R|S) = D(c,R) | D(c,S). + // Self-stabilizing if both children are self-stabilizing. + if (parent->is_union() && parent->num_args() == 2) { + euf::snode* lhs = parent->arg(0); + euf::snode* rhs = parent->arg(1); + bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs); + bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs); + if (lhs_ss && rhs_ss) { + set_self_stabilizing(deriv); + return; + } + } + + // Rule 5: Intersection parent R ∩ S. + // D(c, R∩S) = D(c,R) ∩ D(c,S). + // Self-stabilizing if both children are self-stabilizing. + if (parent->is_intersect() && parent->num_args() == 2) { + euf::snode* lhs = parent->arg(0); + euf::snode* rhs = parent->arg(1); + bool lhs_ss = is_self_stabilizing(lhs) || compute_self_stabilizing(lhs); + bool rhs_ss = is_self_stabilizing(rhs) || compute_self_stabilizing(rhs); + if (lhs_ss && rhs_ss) { + set_self_stabilizing(deriv); + return; + } + } + + // Rule 6: Complement parent ~R. + // D(c, ~R) = ~D(c, R). + // Preserves self-stabilizing from R. + if (parent->is_complement() && parent->num_args() == 1) { + euf::snode* inner = parent->arg(0); + bool inner_ss = is_self_stabilizing(inner) || compute_self_stabilizing(inner); + if (inner_ss) { + set_self_stabilizing(deriv); + return; + } + } + + // Rule 7: Generic self-stabilizing parent. + // If the parent was explicitly marked self-stabilizing (e.g., via + // a previous propagation), propagate to the derivative. + if (parent_ss) { + set_self_stabilizing(deriv); + return; + } + } + + // ----------------------------------------------------------------------- + // Derivative with propagation + // ----------------------------------------------------------------------- + + euf::snode* nseq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) { + if (!re || !elem) + return nullptr; + euf::snode* deriv = derivative(re, elem); + if (deriv) + propagate_self_stabilizing(re, deriv); + return deriv; + } + + // ----------------------------------------------------------------------- + // Uniform derivative (symbolic character consumption) + // ----------------------------------------------------------------------- + + euf::snode* nseq_regex::try_uniform_derivative(euf::snode* regex) { + if (!regex) + return nullptr; + + // Quick exits: trivial regexes with known uniform derivatives. + // Σ* (full_seq) has derivative Σ* for every character. + if (regex->is_full_seq()) + return regex; + // ∅ (fail) has derivative ∅ for every character — but this means + // every character is rejected. Return fail so the caller can + // detect a conflict. + if (regex->is_fail()) + return regex; + + // Compute minterms: the character-class partition of the alphabet + // induced by the regex. + euf::snode_vector minterms; + m_sg.compute_minterms(regex, minterms); + if (minterms.empty()) + return nullptr; + + // Compute the derivative for each non-empty minterm. If all produce + // the same result, the derivative is independent of the character + // value and we can consume a symbolic character deterministically. + euf::snode* uniform = nullptr; + for (euf::snode* mt : minterms) { + if (!mt || mt->is_fail()) + continue; // empty character class — no character belongs to it + euf::snode* deriv = m_sg.brzozowski_deriv(regex, mt); + if (!deriv) + return nullptr; // derivative computation failed + if (!uniform) { + uniform = deriv; + } else if (uniform->id() != deriv->id()) { + return nullptr; // different derivatives — not uniform + } + } + return uniform; // may be nullptr if all minterms were fail/empty + } + + // ----------------------------------------------------------------------- + // Ground prefix consumption // ----------------------------------------------------------------------- bool nseq_regex::is_empty_regex(euf::snode* re) const { @@ -68,6 +351,377 @@ namespace smt { return false; } + // ----------------------------------------------------------------------- + // BFS regex emptiness check — helper: collect character boundaries + // ----------------------------------------------------------------------- + + void nseq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const { + if (!re || !re->get_expr()) + return; + + seq_util& seq = m_sg.get_seq_util(); + expr* e = re->get_expr(); + + // Range predicate re.range(lo, hi): boundary at lo and hi+1 + // Range arguments are string expressions (e.g., str.unit(ch)) + expr* lo_expr = nullptr; + expr* hi_expr = nullptr; + if (seq.re.is_range(e, lo_expr, hi_expr)) { + zstring s_lo, s_hi; + if (lo_expr && seq.str.is_string(lo_expr, s_lo) && s_lo.length() == 1) + bounds.push_back(s_lo[0]); + if (hi_expr && seq.str.is_string(hi_expr, s_hi) && s_hi.length() == 1 && s_hi[0] < zstring::max_char()) + bounds.push_back(s_hi[0] + 1); + return; + } + + // to_re(s): boundary at first character and first+1 + expr* body = nullptr; + if (seq.re.is_to_re(e, body)) { + zstring s; + if (seq.str.is_string(body, s) && s.length() > 0) { + unsigned first_ch = s[0]; + bounds.push_back(first_ch); + if (first_ch < zstring::max_char()) + bounds.push_back(first_ch + 1); + } + return; + } + + // Leaf nodes with no character discrimination + if (re->is_fail() || re->is_full_char() || re->is_full_seq()) + return; + + // Recurse into children (handles union, concat, star, loop, etc.) + for (unsigned i = 0; i < re->num_args(); ++i) + collect_char_boundaries(re->arg(i), bounds); + } + + // ----------------------------------------------------------------------- + // BFS regex emptiness check — helper: alphabet representatives + // ----------------------------------------------------------------------- + + void nseq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) { + unsigned_vector bounds; + bounds.push_back(0); // always include character 0 + collect_char_boundaries(re, bounds); + + // Sort and deduplicate + std::sort(bounds.begin(), bounds.end()); + unsigned prev = UINT_MAX; + for (unsigned b : bounds) { + if (b != prev) { + reps.push_back(m_sg.mk_char(b)); + prev = b; + } + } + } + + // ----------------------------------------------------------------------- + // BFS regex emptiness check + // ----------------------------------------------------------------------- + + lbool nseq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) { + if (!re || !re->get_expr()) + return l_undef; + if (re->is_fail()) + return l_true; + if (re->is_nullable()) + return l_false; + // Structural quick checks for kinds that are never empty + if (re->is_star() || re->is_full_char() || re->is_full_seq() || re->is_to_re()) + return l_false; + // Structural emptiness catches simple cases + if (is_empty_regex(re)) + return l_true; + // Only handle ground regexes; non-ground can't be fully explored + if (!re->is_ground()) + return l_undef; + + // BFS over the Brzozowski derivative automaton. + // Each state is a derivative regex snode identified by its id. + // We explore states by computing derivatives for representative + // characters from the alphabet partition. + uint_set visited; + euf::snode_vector worklist; + worklist.push_back(re); + visited.insert(re->id()); + + unsigned states_explored = 0; + bool had_failed_deriv = false; + + while (!worklist.empty()) { + if (states_explored >= max_states) + return l_undef; + + euf::snode* current = worklist.back(); + worklist.pop_back(); + ++states_explored; + + // Compute representative characters for current state's + // alphabet partition. Each representative is a concrete + // character snode whose equivalence class has identical + // derivative behavior. + euf::snode_vector reps; + get_alphabet_representatives(current, reps); + + if (reps.empty()) { + // No representatives means no character predicates; + // use a default character to explore the single partition. + reps.push_back(m_sg.mk_char('a')); + } + + for (euf::snode* ch : reps) { + euf::snode* deriv = m_sg.brzozowski_deriv(current, ch); + if (!deriv) { + // Derivative computation failed for this character. + // Track the failure but continue with other characters. + had_failed_deriv = true; + continue; + } + if (deriv->is_nullable()) + return l_false; // found an accepting state + if (deriv->is_fail()) + continue; // dead-end, no need to explore further + if (is_empty_regex(deriv)) + continue; // structurally empty subtree + if (!visited.contains(deriv->id())) { + visited.insert(deriv->id()); + worklist.push_back(deriv); + } + } + } + + // Exhausted all reachable states without finding a nullable one. + // If we had any failed derivative computations, the result is + // inconclusive since we may have missed reachable states. + if (had_failed_deriv) + return l_undef; + + return l_true; + } + + // ----------------------------------------------------------------------- + // Multi-regex intersection emptiness check + // BFS over the product of Brzozowski derivative automata. + // Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469) + // ----------------------------------------------------------------------- + + lbool nseq_regex::check_intersection_emptiness(ptr_vector const& regexes, + unsigned max_states) { + if (regexes.empty()) + return l_false; // empty intersection = full language (vacuously non-empty) + + // Quick checks: if any regex is fail/empty, intersection is empty + for (euf::snode* re : regexes) { + if (!re || !re->get_expr()) + return l_undef; + if (re->is_fail() || is_empty_regex(re)) + return l_true; + } + + // Check if all are nullable (intersection accepts ε) + bool all_nullable = true; + for (euf::snode* re : regexes) { + if (!re->is_nullable()) { all_nullable = false; break; } + } + if (all_nullable) + return l_false; + + // Single regex: delegate to is_empty_bfs + if (regexes.size() == 1) + return is_empty_bfs(regexes[0], max_states); + + // Build product BFS. State = tuple of regex snode ids. + // Use a map from state hash to visited set. + using state_t = svector; + + auto state_hash = [](state_t const& s) -> unsigned { + unsigned h = 0; + for (unsigned id : s) + h = h * 31 + id; + return h; + }; + + auto state_eq = [](state_t const& a, state_t const& b) -> bool { + if (a.size() != b.size()) return false; + for (unsigned i = 0; i < a.size(); ++i) + if (a[i] != b[i]) return false; + return true; + }; + + // Use simple set via sorted vector of hashes (good enough for bounded BFS) + std::unordered_set visited_hashes; + + struct bfs_state { + ptr_vector regexes; + }; + + std::vector worklist; + bfs_state initial; + initial.regexes.append(regexes); + worklist.push_back(std::move(initial)); + + state_t init_ids; + for (euf::snode* re : regexes) + init_ids.push_back(re->id()); + visited_hashes.insert(state_hash(init_ids)); + + unsigned states_explored = 0; + bool had_failed = false; + + // Collect alphabet representatives from the intersection of all regexes + // (merge boundaries from all) + unsigned_vector all_bounds; + all_bounds.push_back(0); + for (euf::snode* re : regexes) + collect_char_boundaries(re, all_bounds); + std::sort(all_bounds.begin(), all_bounds.end()); + + euf::snode_vector reps; + unsigned prev = UINT_MAX; + for (unsigned b : all_bounds) { + if (b != prev) { + reps.push_back(m_sg.mk_char(b)); + prev = b; + } + } + if (reps.empty()) + reps.push_back(m_sg.mk_char('a')); + + while (!worklist.empty()) { + if (states_explored >= max_states) + return l_undef; + + bfs_state current = std::move(worklist.back()); + worklist.pop_back(); + ++states_explored; + + for (euf::snode* ch : reps) { + ptr_vector derivs; + bool any_fail = false; + bool all_null = true; + bool deriv_failed = false; + + for (euf::snode* re : current.regexes) { + euf::snode* d = m_sg.brzozowski_deriv(re, ch); + if (!d) { deriv_failed = true; break; } + if (d->is_fail()) { any_fail = true; break; } + if (!d->is_nullable()) all_null = false; + derivs.push_back(d); + } + + if (deriv_failed) { had_failed = true; continue; } + if (any_fail) continue; // this character leads to empty intersection + + if (all_null) + return l_false; // found an accepting state in the product + + // Check if any component is structurally empty + bool any_empty = false; + for (euf::snode* d : derivs) { + if (is_empty_regex(d)) { any_empty = true; break; } + } + if (any_empty) continue; + + // Compute state hash and check visited + state_t ids; + for (euf::snode* d : derivs) + ids.push_back(d->id()); + unsigned h = state_hash(ids); + if (visited_hashes.count(h) == 0) { + visited_hashes.insert(h); + bfs_state next; + next.regexes.append(derivs); + worklist.push_back(std::move(next)); + } + } + } + + if (had_failed) + return l_undef; + return l_true; // exhausted all states, intersection is empty + } + + // ----------------------------------------------------------------------- + // Language subset check: L(A) ⊆ L(B) + // via intersection(A, complement(B)) = ∅ + // Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385) + // ----------------------------------------------------------------------- + + lbool nseq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) { + if (!subset_re || !superset_re) + return l_undef; + + // Quick checks + if (subset_re->is_fail() || is_empty_regex(subset_re)) + return l_true; // ∅ ⊆ anything + if (superset_re->is_full_seq()) + return l_true; // anything ⊆ Σ* + if (subset_re == superset_re) + return l_true; // L ⊆ L + + // Build complement(superset) + seq_util& seq = m_sg.get_seq_util(); + ast_manager& mgr = m_sg.get_manager(); + expr* sup_expr = superset_re->get_expr(); + if (!sup_expr) + return l_undef; + expr_ref comp(seq.re.mk_complement(sup_expr), mgr); + euf::snode* comp_sn = m_sg.mk(comp); + if (!comp_sn) + return l_undef; + + // Build intersection and check emptiness + // subset ∩ complement(superset) should be empty for subset relation + expr* sub_expr = subset_re->get_expr(); + if (!sub_expr) + return l_undef; + expr_ref inter(seq.re.mk_inter(sub_expr, comp.get()), mgr); + euf::snode* inter_sn = m_sg.mk(inter); + if (!inter_sn) + return l_undef; + + return is_empty_bfs(inter_sn); + } + + // ----------------------------------------------------------------------- + // Collect primitive regex intersection for a variable + // ----------------------------------------------------------------------- + + euf::snode* nseq_regex::collect_primitive_regex_intersection( + euf::snode* var, seq::nielsen_node const& node) { + if (!var) + return nullptr; + + seq_util& seq = m_sg.get_seq_util(); + ast_manager& mgr = m_sg.get_manager(); + euf::snode* result = nullptr; + + for (auto const& mem : node.str_mems()) { + if (!mem.m_str || !mem.m_regex) + continue; + // Primitive constraint: str is a single variable + if (!mem.is_primitive()) + continue; + euf::snode* first = mem.m_str->first(); + if (!first || first != var) + continue; + + if (!result) { + result = mem.m_regex; + } else { + expr* r1 = result->get_expr(); + expr* r2 = mem.m_regex->get_expr(); + if (r1 && r2) { + expr_ref inter(seq.re.mk_inter(r1, r2), mgr); + result = m_sg.mk(inter); + } + } + } + return result; + } + // ----------------------------------------------------------------------- // Cycle detection // ----------------------------------------------------------------------- @@ -88,11 +742,14 @@ namespace smt { euf::snode* first = mem.m_str->first(); if (!first || !first->is_char()) break; - euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, first); + euf::snode* parent_re = mem.m_regex; + euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, first); if (!deriv) break; if (deriv->is_fail()) return simplify_status::conflict; + // propagate self-stabilizing flag from parent to derivative + propagate_self_stabilizing(parent_re, deriv); mem.m_str = m_sg.drop_first(mem.m_str); mem.m_regex = deriv; } @@ -361,13 +1018,6 @@ namespace smt { if (!cycle_regex || !current_regex) return nullptr; - // The stabilizer is the Kleene star of the "cycle body" regex. - // If the cycle regex and current regex are the same (pointer equal), - // the stabilizer is cycle_regex* (Kleene star). - // This mirrors ZIPT's StabilizerFromCycle which extracts the - // regex between the cycle entry and current point and wraps it in *. - - // Build cycle_regex* via the sgraph's expression factory expr* re_expr = cycle_regex->get_expr(); if (!re_expr) return nullptr; @@ -378,31 +1028,253 @@ namespace smt { } // ----------------------------------------------------------------------- - // Stabilizer-based subsumption + // Extract cycle history tokens // ----------------------------------------------------------------------- - bool nseq_regex::try_subsume(seq::str_mem const& mem) { - // Check if the derivation history exhibits a cycle, and if so, - // whether the current regex is subsumed by the stabilizer. - euf::snode* cycle = extract_cycle(mem); - if (!cycle) + euf::snode* nseq_regex::extract_cycle_history(seq::str_mem const& current, + seq::str_mem const& ancestor) { + // The history is built by simplify_and_init as a left-associative + // string concat chain: concat(concat(concat(nil, c1), c2), c3). + // Extract the tokens consumed since the ancestor. + if (!current.m_history) + return nullptr; + + unsigned cur_len = current.m_history->length(); + unsigned anc_len = ancestor.m_history ? ancestor.m_history->length() : 0; + + if (cur_len <= anc_len) + return nullptr; + + if (anc_len == 0) + return current.m_history; + + return m_sg.drop_left(current.m_history, anc_len); + } + + // ----------------------------------------------------------------------- + // Get filtered stabilizer star + // Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243) + // ----------------------------------------------------------------------- + + euf::snode* nseq_regex::get_filtered_stabilizer_star(euf::snode* re, + euf::snode* excluded_char) { + if (!re) + return nullptr; + + ptr_vector const* stabs = get_stabilizers(re); + if (!stabs || stabs->empty()) + return nullptr; + + seq_util& seq = m_sg.get_seq_util(); + ast_manager& m = m_sg.get_manager(); + euf::snode* filtered_union = nullptr; + + for (euf::snode* s : *stabs) { + if (!s) + continue; + // Keep only stabilizers whose language cannot start with excluded_char + euf::snode* d = m_sg.brzozowski_deriv(s, excluded_char); + if (d && d->is_fail()) { + if (!filtered_union) { + filtered_union = s; + } else { + expr* e1 = filtered_union->get_expr(); + expr* e2 = s->get_expr(); + if (e1 && e2) { + expr_ref u(seq.re.mk_union(e1, e2), m); + filtered_union = m_sg.mk(u); + } + } + } + } + + if (!filtered_union) + return nullptr; + + expr* fe = filtered_union->get_expr(); + if (!fe) + return nullptr; + expr_ref star_expr(seq.re.mk_star(fe), m); + return m_sg.mk(star_expr); + } + + // ----------------------------------------------------------------------- + // Strengthened stabilizer construction with sub-cycle detection + // Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225) + // ----------------------------------------------------------------------- + + euf::snode* nseq_regex::strengthened_stabilizer(euf::snode* cycle_regex, + euf::snode* cycle_history) { + if (!cycle_regex || !cycle_history) + return nullptr; + + // Flatten the history concat chain into a vector of character tokens. + euf::snode_vector tokens; + cycle_history->collect_tokens(tokens); + + if (tokens.empty()) + return nullptr; + + seq_util& seq = m_sg.get_seq_util(); + ast_manager& m = m_sg.get_manager(); + + // Replay tokens on the cycle regex, detecting sub-cycles. + // A sub-cycle is detected when the derivative returns to cycle_regex. + svector> sub_cycles; + unsigned cycle_start = 0; + euf::snode* current_re = cycle_regex; + + for (unsigned i = 0; i < tokens.size(); ++i) { + euf::snode* tok = tokens[i]; + if (!tok) + return nullptr; + + euf::snode* deriv = m_sg.brzozowski_deriv(current_re, tok); + if (!deriv) + return nullptr; + + // Sub-cycle: derivative returned to the cycle entry regex + if (deriv == cycle_regex || + (deriv->get_expr() && cycle_regex->get_expr() && + deriv->get_expr() == cycle_regex->get_expr())) { + sub_cycles.push_back(std::make_pair(cycle_start, i + 1)); + cycle_start = i + 1; + current_re = cycle_regex; + } else { + current_re = deriv; + } + } + + // Remaining tokens that don't complete a sub-cycle + if (cycle_start < tokens.size()) + sub_cycles.push_back(std::make_pair(cycle_start, tokens.size())); + + if (sub_cycles.empty()) + return nullptr; + + // Build a stabilizer body for each sub-cycle. + // body = to_re(t0) · [filteredStar(R1, t1)] · to_re(t1) · ... · to_re(t_{n-1}) + euf::snode* overall_union = nullptr; + + for (auto const& sc : sub_cycles) { + unsigned start = sc.first; + unsigned end = sc.second; + if (start >= end) + continue; + + euf::snode* re_state = cycle_regex; + euf::snode* body = nullptr; + + for (unsigned i = start; i < end; ++i) { + euf::snode* tok = tokens[i]; + if (!tok) + break; + + // Insert filtered stabilizer star before each token after the first + if (i > start) { + euf::snode* filtered = get_filtered_stabilizer_star(re_state, tok); + if (filtered) { + expr* fe = filtered->get_expr(); + if (fe) { + if (!body) { + body = filtered; + } else { + expr* be = body->get_expr(); + if (be) { + expr_ref cat(seq.re.mk_concat(be, fe), m); + body = m_sg.mk(cat); + } + } + } + } + } + + // Convert char token to regex: to_re(unit(tok)) + expr* tok_expr = tok->get_expr(); + if (!tok_expr) + break; + + expr_ref unit_str(seq.str.mk_unit(tok_expr), m); + expr_ref tok_re(seq.re.mk_to_re(unit_str), m); + euf::snode* tok_re_sn = m_sg.mk(tok_re); + + if (!body) { + body = tok_re_sn; + } else { + expr* be = body->get_expr(); + expr* te = tok_re_sn->get_expr(); + if (be && te) { + expr_ref cat(seq.re.mk_concat(be, te), m); + body = m_sg.mk(cat); + } + } + + // Advance the regex state + euf::snode* deriv = m_sg.brzozowski_deriv(re_state, tok); + if (!deriv) + break; + re_state = deriv; + } + + if (!body) + continue; + + if (!overall_union) { + overall_union = body; + } else { + expr* oe = overall_union->get_expr(); + expr* be = body->get_expr(); + if (oe && be) { + expr_ref u(seq.re.mk_union(oe, be), m); + overall_union = m_sg.mk(u); + } + } + } + + return overall_union; + } + + // ----------------------------------------------------------------------- + // Stabilizer-based subsumption (enhanced) + // Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386) + // ----------------------------------------------------------------------- + + bool nseq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) { + if (!mem.m_str || !mem.m_regex) return false; - euf::snode* stab = stabilizer_from_cycle(cycle, mem.m_regex); - if (!stab) + // 1. Leading token must be a variable + euf::snode* first = mem.m_str->first(); + if (!first || !first->is_var()) return false; - // A constraint x ∈ R is subsumed when R ⊆ stab. - // For the simple case where cycle == current regex, - // R ⊆ R* is always true (since R* accepts everything R does, and more). - // This handles the common idempotent cycle case. - if (cycle == mem.m_regex) - return true; + // 2. Must have stabilizers for the regex + if (!has_stabilizers(mem.m_regex)) + return false; - // More sophisticated subsumption checks (regex containment) - // would require a regex inclusion decision procedure. - // For now, only handle the pointer-equality case. - return false; + // 3. Build stabStar = star(union(all stabilizers for this regex)) + euf::snode* stab_union = get_stabilizer_union(mem.m_regex); + if (!stab_union) + return false; + + seq_util& seq = m_sg.get_seq_util(); + ast_manager& mgr = m_sg.get_manager(); + expr* su_expr = stab_union->get_expr(); + if (!su_expr) + return false; + expr_ref stab_star(seq.re.mk_star(su_expr), mgr); + euf::snode* stab_star_sn = m_sg.mk(stab_star); + if (!stab_star_sn) + return false; + + // 4. Collect all primitive regex constraints on variable `first` + euf::snode* x_range = collect_primitive_regex_intersection(first, node); + if (!x_range) + return false; + + // 5. Check L(x_range) ⊆ L(stab_star) + lbool result = is_language_subset(x_range, stab_star_sn); + return result == l_true; } } diff --git a/src/smt/nseq_regex.h b/src/smt/nseq_regex.h index db9deebb9..6d354f99e 100644 --- a/src/smt/nseq_regex.h +++ b/src/smt/nseq_regex.h @@ -31,17 +31,119 @@ Author: #include "ast/euf/euf_sgraph.h" #include "smt/seq/seq_nielsen.h" +#include "util/uint_set.h" +#include "util/lbool.h" namespace smt { class nseq_regex { euf::sgraph& m_sg; + // ----------------------------------------------------------------- + // Stabilizer store (non-backtrackable, persists across solve() calls) + // Mirrors ZIPT Environment.stabilizers / selfStabilizing + // (Environment.cs:36-37) + // ----------------------------------------------------------------- + + // Maps regex snode id → list of stabilizer snodes. + // Each regex may accumulate multiple stabilizers from different + // cycle detections. The list is deduplicated by pointer equality. + u_map> m_stabilizers; + + // Set of regex snode ids that are self-stabilizing, i.e., the + // stabilizer for the regex is the regex itself (e.g., r*). + // Mirrors ZIPT Environment.selfStabilizing (Environment.cs:37) + uint_set m_self_stabilizing; + + // ----------------------------------------------------------------- + // BFS emptiness check helpers (private) + // ----------------------------------------------------------------- + + // Collect character boundary points from range predicates and + // to_re string literals in a regex. Boundaries partition the + // alphabet into equivalence classes where all characters in + // the same class produce identical derivatives. + void collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const; + + // Build a set of representative character snodes, one per + // alphabet equivalence class, derived from the boundary points + // of the given regex. + void get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps); + public: nseq_regex(euf::sgraph& sg) : m_sg(sg) {} euf::sgraph& sg() { return m_sg; } + // ----------------------------------------------------------------- + // Stabilizer store API + // Mirrors ZIPT Environment stabilizer management + // (Environment.cs:114-146) + // ----------------------------------------------------------------- + + // Reset all stabilizer data. Called at the start of each solve() + // invocation if fresh stabilizer state is desired. + // Note: stabilizers persist across depth-bound iterations by default; + // only call this to clear accumulated state. + void reset_stabilizers(); + + // Add a stabilizer for a regex. De-duplicates by pointer equality. + // Mirrors ZIPT Environment.AddStabilizer (Environment.cs:114-123). + void add_stabilizer(euf::snode* regex, euf::snode* stabilizer); + + // Get the union of all stabilizers registered for a regex. + // Returns a single re.union snode combining all stabilizers, + // or nullptr if no stabilizers exist for the regex. + // Mirrors ZIPT Environment.GetStabilizerUnion (Environment.cs:125-128). + euf::snode* get_stabilizer_union(euf::snode* regex); + + // Check if any stabilizers have been registered for a regex. + bool has_stabilizers(euf::snode* regex) const; + + // Get raw stabilizer list for a regex (read-only). + // Returns nullptr if no stabilizers exist. + ptr_vector const* get_stabilizers(euf::snode* regex) const; + + // Mark a regex as self-stabilizing (stabilizer == regex itself). + // Mirrors ZIPT Environment.SetSelfStabilizing (Environment.cs:143-146). + void set_self_stabilizing(euf::snode* regex); + + // Check if a regex is marked as self-stabilizing. + // Mirrors ZIPT Environment.IsSelfStabilizing (Environment.cs:134-141). + bool is_self_stabilizing(euf::snode* regex) const; + + // ----------------------------------------------------------------- + // Self-stabilizing auto-detection and propagation through derivatives + // ----------------------------------------------------------------- + + // Determine if a regex is inherently self-stabilizing based on its + // structure. Returns true for: + // - R* (Kleene star): D(c, R*) = D(c,R)·R*, so R* is its own + // stabilizer regardless of the character. + // - Σ* (full_seq): D(c, Σ*) = Σ*, trivially self-stabilizing. + // - ∅ (fail/empty language): no live derivatives, trivially stable. + // - Complement of full_seq (~Σ* = ∅): also trivially stable. + // Does NOT mark the snode; call set_self_stabilizing to persist. + bool compute_self_stabilizing(euf::snode* regex) const; + + // After computing a derivative of parent, propagate the self- + // stabilizing flag to the derivative result if warranted. + // Applies structural rules: + // - If parent is R* → derivative is always self-stabilizing + // (derivative has the form D(c,R)·R* which contains the R* tail). + // - If parent is R·S and S is self-stabilizing → derivative may + // inherit from the self-stabilizing tail. + // - If parent is R|S and both are self-stabilizing → derivative is. + // - If parent is R∩S and both are self-stabilizing → derivative is. + // - If parent is ~R and R is self-stabilizing → derivative is. + // Updates the internal self-stabilizing set for the derivative. + void propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv); + + // Convenience: compute derivative and propagate self-stabilizing flags. + // Equivalent to calling derivative() followed by + // propagate_self_stabilizing(). + euf::snode* derivative_with_propagation(euf::snode* re, euf::snode* elem); + // ----------------------------------------------------------------- // Basic regex predicates // ----------------------------------------------------------------- @@ -51,6 +153,36 @@ namespace smt { // derived emptiness (e.g., union of empties, concat with empty). bool is_empty_regex(euf::snode* re) const; + // BFS emptiness check over the Brzozowski derivative automaton. + // Explores reachable derivative states using representative + // characters from each alphabet equivalence class. + // l_true — regex is definitely empty (no accepting states reachable) + // l_false — regex is definitely non-empty (found a nullable state) + // l_undef — inconclusive (hit exploration bound or failed derivative) + // max_states caps the number of explored states to prevent blowup. + lbool is_empty_bfs(euf::snode* re, unsigned max_states = 10000); + + // Check emptiness of the intersection of multiple regexes. + // Uses BFS over the product of Brzozowski derivative automata. + // l_true — intersection is definitely empty + // l_false — intersection is definitely non-empty + // l_undef — inconclusive (hit exploration bound) + // Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469) + lbool check_intersection_emptiness(ptr_vector const& regexes, + unsigned max_states = 10000); + + // Check if L(subset_re) ⊆ L(superset_re). + // Computed as: subset_re ∩ complement(superset_re) = ∅. + // Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385) + lbool is_language_subset(euf::snode* subset_re, euf::snode* superset_re); + + // Collect all primitive regex constraints on variable `var` from + // the node's str_mem list and return their intersection as a + // single regex snode (using re.inter). + // Returns nullptr if no primitive constraints found. + euf::snode* collect_primitive_regex_intersection( + euf::snode* var, seq::nielsen_node const& node); + // check if regex is the full language (Σ* / re.all) bool is_full_regex(euf::snode* re) const { return re && re->is_full_seq(); @@ -76,10 +208,23 @@ namespace smt { return m_sg.brzozowski_deriv(re, elem); } + // check if all minterms of the regex produce the same Brzozowski + // derivative ("uniform derivative"). When this holds, the derivative + // is independent of the character value: for any character c, + // deriv(regex, c) = result. This enables deterministic consumption + // of symbolic (variable) characters without branching. + // Returns the uniform derivative if found, nullptr otherwise. + // Mirrors ZIPT's SimplifyCharRegex uniform-derivative fast path. + euf::snode* try_uniform_derivative(euf::snode* regex); + // compute derivative of a str_mem constraint: advance past one character. // the string side is shortened by drop_first and the regex is derived. + // Propagates self-stabilizing flags from the parent regex to the derivative. seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) { - euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem); + euf::snode* parent_re = mem.m_regex; + euf::snode* deriv = m_sg.brzozowski_deriv(parent_re, elem); + if (deriv) + propagate_self_stabilizing(parent_re, deriv); euf::snode* new_str = m_sg.drop_first(mem.m_str); return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep); } @@ -175,11 +320,42 @@ namespace smt { euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex, euf::snode* current_regex); + // Strengthened stabilizer construction with sub-cycle detection. + // Replays the consumed character tokens from cycle_history on the + // cycle regex, detecting sub-cycles (where the derivative loops + // back to the original regex). For each sub-cycle, builds a + // stabilizer from the interleaved character tokens and filtered + // sub-stabilizers. + // Returns a union of all sub-cycle stabilizer bodies, or nullptr + // if no non-trivial stabilizer can be built. + // Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225). + euf::snode* strengthened_stabilizer(euf::snode* cycle_regex, + euf::snode* cycle_history); + + // Get filtered stabilizer star: for regex state re, retrieve + // existing stabilizers, filter out those whose language can + // start with any character in excluded_char, and wrap the + // remaining in star(union(...)). + // Returns nullptr (or empty-equivalent) if no valid stabilizers. + // Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243). + euf::snode* get_filtered_stabilizer_star(euf::snode* re, + euf::snode* excluded_char); + + // Extract the cycle portion of a str_mem's history by comparing + // the current history with an ancestor's history length. + // Returns the sub-sequence of tokens consumed since the ancestor, + // or nullptr if the history did not advance. + euf::snode* extract_cycle_history(seq::str_mem const& current, + seq::str_mem const& ancestor); + // try to subsume a str_mem constraint using stabilizer-based - // reasoning: if extract_cycle finds a cycle, check whether - // the current regex is already covered by the stabilizer. + // reasoning. Enhanced version: checks if the leading variable's + // language (intersection of all its primitive regex constraints) + // is a subset of star(union(stabilizers)) for the current regex. + // Falls back to cycle-based pointer equality check. // returns true if the constraint can be dropped. - bool try_subsume(seq::str_mem const& mem); + // Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386). + bool try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node); }; } diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index c4bf1e57a..52d3ec36b 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -21,6 +21,7 @@ Author: #include "smt/seq/seq_nielsen.h" #include "smt/seq/seq_parikh.h" +#include "smt/nseq_regex.h" #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" #include "ast/rewriter/seq_rewriter.h" @@ -206,6 +207,19 @@ namespace seq { m_var_lb.insert(kv.m_key, kv.m_value); for (auto const& kv : parent.m_var_ub) m_var_ub.insert(kv.m_key, kv.m_value); + // clone regex occurrence tracking + m_regex_occurrence = parent.m_regex_occurrence; + } + + bool nielsen_node::track_regex_occurrence(unsigned regex_id, unsigned mem_id) { + auto key = std::make_pair(regex_id, mem_id); + auto it = m_regex_occurrence.find(key); + if (it != m_regex_occurrence.end()) { + // Already seen — cycle detected + return true; + } + m_regex_occurrence[key] = m_id; + return false; } void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) { @@ -499,11 +513,13 @@ namespace seq { m_sg(sg), m_solver(solver), m_parikh(alloc(seq_parikh, sg)), + m_nseq_regex(alloc(smt::nseq_regex, sg)), m_len_vars(sg.get_manager()) { } nielsen_graph::~nielsen_graph() { dealloc(m_parikh); + dealloc(m_nseq_regex); reset(); } @@ -1670,6 +1686,91 @@ namespace seq { } } + // consume symbolic characters (s_unit tokens) via uniform derivative + // check. When all minterms of the regex produce the same Brzozowski + // derivative, the derivative is independent of the character value + // and we can deterministically consume the token. Forward direction + // only (matches ZIPT history tracking convention). + // + // Extended: when uniform derivative fails but the token has a char_range + // constraint (from apply_regex_var_split), check if the char_range is a + // subset of a single minterm's character class. If so, the derivative + // is deterministic for that token. + // Mirrors ZIPT StrMem.SimplifyCharRegex lines 96-117. + for (str_mem& mem : m_str_mem) { + if (!mem.m_str || !mem.m_regex) + continue; + while (mem.m_str && !mem.m_str->is_empty()) { + euf::snode* tok = mem.m_str->first(); + if (!tok || !tok->is_unit()) + break; + // compute minterms and check for uniform derivative + euf::snode_vector minterms; + sg.compute_minterms(mem.m_regex, minterms); + if (minterms.empty()) + break; + euf::snode* uniform = nullptr; + bool is_uniform = true; + for (euf::snode* mt : minterms) { + if (!mt || mt->is_fail()) + continue; + euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt); + if (!deriv) { is_uniform = false; break; } + if (!uniform) { + uniform = deriv; + } else if (uniform->id() != deriv->id()) { + is_uniform = false; break; + } + } + if (is_uniform && uniform) { + if (uniform->is_fail()) { + m_is_general_conflict = true; + m_reason = backtrack_reason::regex; + return simplify_result::conflict; + } + mem.m_str = sg.drop_left(mem.m_str, 1); + mem.m_regex = uniform; + mem.m_history = sg.mk_concat(mem.m_history, tok); + continue; + } + // Uniform derivative failed — try char_range subset approach. + // If the symbolic char has a char_range constraint and that + // range is a subset of exactly one minterm's character class, + // we can deterministically take that minterm's derivative. + if (m_char_ranges.contains(tok->id()) && g.m_parikh) { + char_set const& cs = m_char_ranges[tok->id()]; + if (!cs.is_empty()) { + euf::snode* matching_deriv = nullptr; + bool found = false; + for (euf::snode* mt : minterms) { + if (!mt || mt->is_fail()) continue; + if (!mt->get_expr()) continue; + char_set mt_cs = g.m_parikh->minterm_to_char_set(mt->get_expr()); + if (cs.is_subset(mt_cs)) { + euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt); + if (!deriv) { found = false; break; } + if (deriv->is_fail()) { + m_is_general_conflict = true; + m_reason = backtrack_reason::regex; + return simplify_result::conflict; + } + matching_deriv = deriv; + found = true; + break; + } + } + if (found && matching_deriv) { + mem.m_str = sg.drop_left(mem.m_str, 1); + mem.m_regex = matching_deriv; + mem.m_history = sg.mk_concat(mem.m_history, tok); + continue; + } + } + } + break; + } + } + // check for regex memberships that are immediately infeasible for (str_mem& mem : m_str_mem) { if (!mem.m_str || !mem.m_regex) @@ -1692,6 +1793,23 @@ namespace seq { if (wi < m_str_mem.size()) m_str_mem.shrink(wi); + // Regex widening: for each remaining str_mem, overapproximate + // the string by replacing variables with their regex intersection + // and check if the result intersected with the target regex is empty. + // Detects infeasible constraints that would otherwise require + // expensive exploration. Mirrors ZIPT NielsenNode.CheckRegexWidening. + if (g.m_nseq_regex) { + for (str_mem const& mem : m_str_mem) { + if (!mem.m_str || !mem.m_regex) + continue; + if (g.check_regex_widening(*this, mem.m_str, mem.m_regex)) { + m_is_general_conflict = true; + m_reason = backtrack_reason::regex; + return simplify_result::conflict; + } + } + } + // IntBounds initialization: derive per-variable Parikh length bounds from // remaining regex memberships and add to m_int_constraints. init_var_bounds_from_mems(); @@ -2075,6 +2193,15 @@ namespace seq { } if (sr == simplify_result::satisfied || node->is_satisfied()) { + // Before declaring SAT, check leaf-node regex feasibility: + // for each variable with multiple regex constraints, verify + // that the intersection of all its regexes is non-empty. + // Mirrors ZIPT NielsenNode.CheckRegex. + if (!check_leaf_regex(*node)) { + node->set_general_conflict(true); + node->set_reason(backtrack_reason::regex); + return search_result::unsat; + } m_sat_node = node; m_sat_path = cur_path; return search_result::sat; @@ -3132,9 +3259,10 @@ namespace seq { // Modifier: apply_star_intr // Star introduction: for a str_mem x·s ∈ R where a cycle is detected // (backedge exists), introduce a stabilizer constraint x ∈ base*. - // Creates a child that adds x ∈ base* membership and constrains - // the remainder not to start with base. - // mirrors ZIPT's StarIntrModifier + // Creates a child that splits x into pr·po, adds pr ∈ base* and + // po·s ∈ R, with a blocking constraint on po. + // Enhanced to use strengthened_stabilizer and register via add_stabilizer. + // mirrors ZIPT's StarIntrModifier (StarIntrModifier.cs:22-85) // ----------------------------------------------------------------------- bool nielsen_graph::apply_star_intr(nielsen_node* node) { @@ -3149,41 +3277,86 @@ namespace seq { euf::snode* first = mem.m_str->first(); if (!first || !first->is_var()) continue; - // Introduce a fresh variable z constrained by z ∈ R*, - // replacing x → z·fresh_post. This breaks the cycle because - // z is a new variable that won't trigger star_intr again. euf::snode* x = first; - euf::snode* z = mk_fresh_var(); - euf::snode* fresh_post = mk_fresh_var(); - euf::snode* str_tail = m_sg.drop_first(mem.m_str); - - // Build z ∈ R* membership: the star of the current regex seq_util& seq = m_sg.get_seq_util(); + ast_manager& mgr = m_sg.get_manager(); expr* re_expr = mem.m_regex->get_expr(); if (!re_expr) continue; - expr_ref star_re(seq.re.mk_star(re_expr), seq.get_manager()); + + // Determine the stabilizer base: + // 1. If self-stabilizing, use regex itself as stabilizer + // 2. Otherwise, try strengthened_stabilizer from cycle history + // 3. Fall back to simple star(R) + euf::snode* stab_base = nullptr; + + if (m_nseq_regex && m_nseq_regex->is_self_stabilizing(mem.m_regex)) { + stab_base = mem.m_regex; + } + else if (m_nseq_regex && mem.m_history) { + stab_base = m_nseq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history); + } + + if (!stab_base) { + // Fall back: simple star of the cycle regex + stab_base = mem.m_regex; + } + + // Register the stabilizer + if (m_nseq_regex) + m_nseq_regex->add_stabilizer(mem.m_regex, stab_base); + + // Check if stabilizer equals regex → self-stabilizing + if (m_nseq_regex && stab_base == mem.m_regex) + m_nseq_regex->set_self_stabilizing(mem.m_regex); + + // Build stab_star = star(stab_base) + expr* stab_expr = stab_base->get_expr(); + if (!stab_expr) + continue; + expr_ref star_re(seq.re.mk_star(stab_expr), mgr); euf::snode* star_sn = m_sg.mk(star_re); if (!star_sn) continue; + // Try subsumption: if L(x_range) ⊆ L(stab_star), drop the constraint + if (m_nseq_regex && m_nseq_regex->try_subsume(mem, *node)) + continue; + + // Create child: x → pr · po + euf::snode* pr = mk_fresh_var(); + euf::snode* po = mk_fresh_var(); + euf::snode* str_tail = m_sg.drop_first(mem.m_str); + nielsen_node* child = mk_child(node); - // Add membership: z ∈ R* (stabilizer constraint) - child->add_str_mem(str_mem(z, star_sn, mem.m_history, next_mem_id(), mem.m_dep)); + // Add membership: pr ∈ stab_base* (stabilizer constraint) + child->add_str_mem(str_mem(pr, star_sn, mem.m_history, next_mem_id(), mem.m_dep)); - // Add remaining membership: fresh_post · tail ∈ R - euf::snode* post_tail = str_tail->is_empty() ? fresh_post : m_sg.mk_concat(fresh_post, str_tail); - child->add_str_mem(str_mem(post_tail, mem.m_regex, mem.m_history, next_mem_id(), mem.m_dep)); + // Add remaining membership: po · tail ∈ R (same regex, trimmed history) + euf::snode* post_tail = str_tail->is_empty() ? po : m_sg.mk_concat(po, str_tail); + child->add_str_mem(str_mem(post_tail, mem.m_regex, nullptr, next_mem_id(), mem.m_dep)); - // Substitute x → z · fresh_post + // Blocking constraint: po must NOT start with stab_base + // po ∈ complement(non_nullable(stab_base) · Σ*) + // This prevents redundant unrolling. + if (!stab_base->is_nullable()) { + sort* str_sort = seq.str.mk_string_sort(); + expr_ref full_seq(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr); + expr_ref base_then_all(seq.re.mk_concat(stab_expr, full_seq), mgr); + expr_ref block_re(seq.re.mk_complement(base_then_all), mgr); + euf::snode* block_sn = m_sg.mk(block_re); + if (block_sn) + child->add_str_mem(str_mem(po, block_sn, nullptr, next_mem_id(), mem.m_dep)); + } + + // Substitute x → pr · po nielsen_edge* e = mk_edge(node, child, false); - euf::snode* replacement = m_sg.mk_concat(z, fresh_post); + euf::snode* replacement = m_sg.mk_concat(pr, po); nielsen_subst s(x, replacement, mem.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); - // Do not re-set backedge — z is fresh, so star_intr won't fire again return true; } return false; @@ -4101,5 +4274,160 @@ namespace seq { return mdl.get() != nullptr; } -} + // ----------------------------------------------------------------------- + // Regex widening: overapproximate string and check intersection emptiness + // Mirrors ZIPT NielsenNode.CheckRegexWidening (NielsenNode.cs:1350-1380) + // ----------------------------------------------------------------------- + bool nielsen_graph::check_regex_widening(nielsen_node const& node, + euf::snode* str, euf::snode* regex) { + if (!str || !regex || !m_nseq_regex) + return false; + // Only apply to ground regexes with non-trivial strings + if (!regex->is_ground()) + return false; + + seq_util seq(m_sg.get_manager()); + ast_manager& mgr = m_sg.get_manager(); + + // Build the overapproximation regex for the string. + // Variables → intersection of their primitive regex constraints (or Σ*) + // Symbolic chars → re.range from char_ranges (or full_char) + // Concrete chars → to_re(unit(c)) + euf::snode_vector tokens; + str->collect_tokens(tokens); + if (tokens.empty()) + return false; + + euf::snode* approx = nullptr; + for (euf::snode* tok : tokens) { + euf::snode* tok_re = nullptr; + + if (tok->is_char()) { + // Concrete character → to_re(unit(c)) + expr* te = tok->get_expr(); + if (!te) return false; + expr_ref unit_s(seq.str.mk_unit(te), mgr); + expr_ref tre(seq.re.mk_to_re(unit_s), mgr); + tok_re = m_sg.mk(tre); + } + else if (tok->is_var()) { + // Variable → intersection of primitive regex constraints, or Σ* + euf::snode* x_range = m_nseq_regex->collect_primitive_regex_intersection(tok, node); + if (x_range) { + tok_re = x_range; + } else { + // Unconstrained variable: approximate as Σ* + sort* str_sort = seq.str.mk_string_sort(); + expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr); + tok_re = m_sg.mk(all_re); + } + } + else if (tok->is_unit()) { + // Symbolic char — try to get char_range + if (node.char_ranges().contains(tok->id())) { + char_set const& cs = node.char_ranges()[tok->id()]; + if (!cs.is_empty()) { + // Build union of re.range for each interval + euf::snode* range_re = nullptr; + for (auto const& r : cs.ranges()) { + expr_ref lo(seq.mk_char(r.m_lo), mgr); + expr_ref hi(seq.mk_char(r.m_hi - 1), mgr); + expr_ref rng(seq.re.mk_range( + seq.str.mk_string(zstring(r.m_lo)), + seq.str.mk_string(zstring(r.m_hi - 1))), mgr); + euf::snode* rng_sn = m_sg.mk(rng); + if (!range_re) { + range_re = rng_sn; + } else { + expr_ref u(seq.re.mk_union(range_re->get_expr(), rng_sn->get_expr()), mgr); + range_re = m_sg.mk(u); + } + } + tok_re = range_re; + } + } + if (!tok_re) { + // Unconstrained symbolic char: approximate as full_char (single char, any value) + sort* str_sort = seq.str.mk_string_sort(); + expr_ref fc(seq.re.mk_full_char(seq.re.mk_re(str_sort)), mgr); + tok_re = m_sg.mk(fc); + } + } + else { + // Unknown token type — approximate as Σ* + sort* str_sort = seq.str.mk_string_sort(); + expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr); + tok_re = m_sg.mk(all_re); + } + + if (!tok_re) + return false; + + if (!approx) { + approx = tok_re; + } else { + expr* ae = approx->get_expr(); + expr* te = tok_re->get_expr(); + if (!ae || !te) return false; + expr_ref cat(seq.re.mk_concat(ae, te), mgr); + approx = m_sg.mk(cat); + } + } + + if (!approx) + return false; + + // Check intersection(approx, regex) for emptiness + // Build intersection regex + expr* ae = approx->get_expr(); + expr* re = regex->get_expr(); + if (!ae || !re) + return false; + expr_ref inter(seq.re.mk_inter(ae, re), mgr); + euf::snode* inter_sn = m_sg.mk(inter); + if (!inter_sn) + return false; + + lbool result = m_nseq_regex->is_empty_bfs(inter_sn, 5000); + return result == l_true; + } + + // ----------------------------------------------------------------------- + // Leaf-node regex intersection emptiness check + // Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329) + // ----------------------------------------------------------------------- + + bool nielsen_graph::check_leaf_regex(nielsen_node const& node) { + if (!m_nseq_regex) + return true; + + // Group str_mem constraints by variable (primitive constraints only) + u_map> var_regexes; + + for (auto const& mem : node.str_mems()) { + if (!mem.m_str || !mem.m_regex) + continue; + if (!mem.is_primitive()) + continue; + euf::snode* first = mem.m_str->first(); + if (!first || !first->is_var()) + continue; + auto& list = var_regexes.insert_if_not_there(first->id(), ptr_vector()); + list.push_back(mem.m_regex); + } + + // For each variable with 2+ regex constraints, check intersection non-emptiness + for (auto& [var_id, regexes] : var_regexes) { + if (regexes.size() < 2) + continue; + lbool result = m_nseq_regex->check_intersection_emptiness(regexes, 5000); + if (result == l_true) { + // Intersection is empty — infeasible + return false; + } + } + return true; + } + +} diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 27e2d83b3..987350924 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -244,6 +244,10 @@ Author: #include #include "model/model.h" +namespace smt { + class nseq_regex; // forward declaration (defined in smt/nseq_regex.h) +} + namespace seq { // forward declarations @@ -538,6 +542,12 @@ namespace seq { // asserted when this node's solver scope is entered. unsigned m_parent_ic_count = 0; + // RegexOccurrence: maps (regex snode id, str_mem id) → node id where + // this regex was last seen on the current DFS path. + // Used for precise cycle detection with history-length-based progress. + // Mirrors ZIPT LocalInfo.RegexOccurrence (LocalInfo.cs:34) + std::map, unsigned> m_regex_occurrence; + public: nielsen_node(nielsen_graph* graph, unsigned id); @@ -628,6 +638,15 @@ namespace seq { // clone constraints from a parent node void clone_from(nielsen_node const& parent); + // Regex occurrence tracking: record current regex state for cycle detection. + // Returns true if a cycle is detected (same regex seen before on this path). + bool track_regex_occurrence(unsigned regex_id, unsigned mem_id); + + // Get the regex occurrence map (for undo on backtrack). + std::map, unsigned> const& regex_occurrence() const { + return m_regex_occurrence; + } + // apply a substitution to all constraints void apply_subst(euf::sgraph& sg, nielsen_subst const& s); @@ -739,6 +758,11 @@ namespace seq { // Parikh image filter: generates modular length constraints from regex // memberships. Allocated in the constructor; owned by this graph. seq_parikh* m_parikh = nullptr; + + // Regex membership module: stabilizers, emptiness checks, language + // inclusion, derivatives. Allocated in the constructor; owned by this graph. + smt::nseq_regex* m_nseq_regex = nullptr; + // ----------------------------------------------- // Modification counter for substitution length tracking. // mirrors ZIPT's LocalInfo.CurrentModificationCnt @@ -871,9 +895,28 @@ namespace seq { // Caller takes ownership of the returned model pointer. bool solve_sat_path_ints(model_ref& mdl); + // accessor for the nseq_regex module + smt::nseq_regex* nseq_regex_module() const { return m_nseq_regex; } + private: search_result search_dfs(nielsen_node* node, unsigned depth, svector& cur_path); + // Regex widening: overapproximate `str` by replacing variables with + // the intersection of their primitive regex constraints (or Σ* if + // unconstrained), replacing symbolic chars with their char ranges, + // then checking if the approximation intersected with `regex` is empty. + // Returns true if widening detects infeasibility (UNSAT). + // Mirrors ZIPT NielsenNode.CheckRegexWidening (NielsenNode.cs:1350-1380) + bool check_regex_widening(nielsen_node const& node, + euf::snode* str, euf::snode* regex); + + // Check regex feasibility at a leaf node: for each variable with + // multiple primitive regex constraints, check that the intersection + // of all its regexes is non-empty. + // Returns true if all constraints are feasible. + // Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329) + bool check_leaf_regex(nielsen_node const& node); + // Apply the Parikh image filter to a node: generate modular length // constraints from regex memberships and append them to the node's // int_constraints. Also performs a lightweight feasibility pre-check; diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp index 569e8fd75..82d99b4fb 100644 --- a/src/test/euf_sgraph.cpp +++ b/src/test/euf_sgraph.cpp @@ -449,6 +449,7 @@ static void test_sgraph_factory() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); // mk_var euf::snode* x = sg.mk_var(symbol("x")); @@ -463,7 +464,7 @@ static void test_sgraph_factory() { SASSERT(a->length() == 1); // mk_empty - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); SASSERT(e && e->is_empty()); SASSERT(e->is_nullable()); SASSERT(e->length() == 0); @@ -499,6 +500,7 @@ static void test_sgraph_indexing() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -533,7 +535,7 @@ static void test_sgraph_indexing() { SASSERT(a->at(1) == nullptr); // empty: at(0) is nullptr - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); SASSERT(e->at(0) == nullptr); euf::snode_vector empty_tokens; e->collect_tokens(empty_tokens); @@ -547,6 +549,7 @@ static void test_sgraph_drop() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -591,7 +594,7 @@ static void test_sgraph_drop() { SASSERT(e->is_empty()); // drop from empty: no change - euf::snode* ee = sg.drop_first(sg.mk_empty()); + euf::snode* ee = sg.drop_first(sg.mk_empty_seq(seq.str.mk_string_sort())); SASSERT(ee->is_empty()); } @@ -602,6 +605,7 @@ static void test_sgraph_subst() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -624,7 +628,7 @@ static void test_sgraph_subst() { SASSERT(same == xax); // substitution of variable with empty - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); euf::snode* collapsed = sg.subst(xax, x, e); SASSERT(collapsed->length() == 1); // just 'a' remains SASSERT(collapsed == a); diff --git a/src/test/nseq_basic.cpp b/src/test/nseq_basic.cpp index ff9318e84..c896be9f4 100644 --- a/src/test/nseq_basic.cpp +++ b/src/test/nseq_basic.cpp @@ -91,14 +91,15 @@ static void test_nseq_simplification() { std::cout << "test_nseq_simplification\n"; ast_manager m; reg_decl_plugins(m); + seq_util su(m); euf::egraph eg(m); euf::sgraph sg(m, eg); nseq_basic_dummy_solver solver; seq::nielsen_graph ng(sg, solver); // Add a trivial equality: empty = empty - euf::snode* empty1 = sg.mk_empty(); - euf::snode* empty2 = sg.mk_empty(); + euf::snode* empty1 = sg.mk_empty_seq(su.str.mk_string_sort()); + euf::snode* empty2 = sg.mk_empty_seq(su.str.mk_string_sort()); ng.add_str_eq(empty1, empty2); @@ -113,6 +114,7 @@ static void test_nseq_node_satisfied() { std::cout << "test_nseq_node_satisfied\n"; ast_manager m; reg_decl_plugins(m); + seq_util su(m); euf::egraph eg(m); euf::sgraph sg(m, eg); nseq_basic_dummy_solver solver; @@ -123,7 +125,7 @@ static void test_nseq_node_satisfied() { SASSERT(node->is_satisfied()); // add a trivial equality - euf::snode* empty = sg.mk_empty(); + euf::snode* empty = sg.mk_empty_seq(su.str.mk_string_sort()); seq::dep_tracker dep; seq::str_eq eq(empty, empty, dep); node->add_str_eq(eq); diff --git a/src/test/nseq_regex.cpp b/src/test/nseq_regex.cpp index 41a58f6b2..6970413b0 100644 --- a/src/test/nseq_regex.cpp +++ b/src/test/nseq_regex.cpp @@ -16,6 +16,9 @@ Abstract: #include "ast/euf/euf_egraph.h" #include "ast/euf/euf_sgraph.h" #include "smt/nseq_regex.h" +#include "smt/seq/seq_nielsen.h" +#include "util/lbool.h" +#include "util/zstring.h" #include // Test 1: nseq_regex instantiation @@ -66,9 +69,643 @@ static void test_nseq_regex_is_full() { std::cout << " ok: re.all not recognized as empty\n"; } +// Test 4: strengthened_stabilizer — null safety +static void test_strengthened_stabilizer_null() { + std::cout << "test_strengthened_stabilizer_null\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + + SASSERT(nr.strengthened_stabilizer(nullptr, nullptr) == nullptr); + + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + sort* re_sort = su.re.mk_re(str_sort); + expr_ref full_e(su.re.mk_full_seq(re_sort), m); + euf::snode* full_re = sg.mk(full_e); + + SASSERT(nr.strengthened_stabilizer(full_re, nullptr) == nullptr); + SASSERT(nr.strengthened_stabilizer(nullptr, full_re) == nullptr); + std::cout << " ok\n"; +} + +// Test 5: strengthened_stabilizer — single char cycle on a* +// Regex a*, history = 'a'. D('a', a*) = a* (sub-cycle back to start). +// Stabilizer body should be to_re("a"). +static void test_strengthened_stabilizer_single_char() { + std::cout << "test_strengthened_stabilizer_single_char\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + // Build a* + expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m); + euf::snode* re_star_a = sg.mk(star_a); + + // Build history = char 'a' (single token, no concat needed) + euf::snode* tok_a = sg.mk_char('a'); + euf::snode* history = tok_a; + + euf::snode* result = nr.strengthened_stabilizer(re_star_a, history); + // Should produce a non-null stabilizer body (to_re("a")) + SASSERT(result != nullptr); + std::cout << " ok: a* with history 'a' -> non-null stabilizer\n"; +} + +// Test 6: strengthened_stabilizer — two-char cycle with sub-cycle +// Regex (ab)*, history = 'a', 'b'. D('a', (ab)*) = b(ab)*, D('b', b(ab)*) = (ab)* +// This should detect a sub-cycle and build a stabilizer body. +static void test_strengthened_stabilizer_two_char() { + std::cout << "test_strengthened_stabilizer_two_char\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + // Build (ab)* + expr_ref ab(su.re.mk_to_re(su.str.mk_string("ab")), m); + expr_ref star_ab(su.re.mk_star(ab), m); + euf::snode* re_star_ab = sg.mk(star_ab); + + // Build history: concat(char_a, char_b) using string concat + euf::snode* tok_a = sg.mk_char('a'); + euf::snode* tok_b = sg.mk_char('b'); + euf::snode* history = sg.mk_concat(tok_a, tok_b); + + euf::snode* result = nr.strengthened_stabilizer(re_star_ab, history); + // Should produce a non-null stabilizer body + SASSERT(result != nullptr); + std::cout << " ok: (ab)* with history 'ab' -> non-null stabilizer\n"; +} + +// Test 7: get_filtered_stabilizer_star — no stabilizers registered +static void test_filtered_stabilizer_star_empty() { + std::cout << "test_filtered_stabilizer_star_empty\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + sort* str_sort = su.str.mk_string_sort(); + sort* re_sort = su.re.mk_re(str_sort); + expr_ref full_e(su.re.mk_full_seq(re_sort), m); + euf::snode* full_re = sg.mk(full_e); + euf::snode* tok_a = sg.mk_char('a'); + + euf::snode* result = nr.get_filtered_stabilizer_star(full_re, tok_a); + SASSERT(result == nullptr); + std::cout << " ok: no stabilizers -> nullptr\n"; +} + +// Test 8: get_filtered_stabilizer_star — with registered stabilizer that passes filter +static void test_filtered_stabilizer_star_with_stab() { + std::cout << "test_filtered_stabilizer_star_with_stab\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + // Build a* as the regex state + expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m); + euf::snode* re_star_a = sg.mk(star_a); + + // Register a stabilizer: to_re("b") — only accepts "b" + expr_ref stab_b(su.re.mk_to_re(su.str.mk_string("b")), m); + euf::snode* stab_b_sn = sg.mk(stab_b); + nr.add_stabilizer(re_star_a, stab_b_sn); + + // Exclude char 'a': D('a', to_re("b")) should be fail + euf::snode* tok_a = sg.mk_char('a'); + euf::snode* result = nr.get_filtered_stabilizer_star(re_star_a, tok_a); + // to_re("b") should pass the filter → result is star(to_re("b")) + SASSERT(result != nullptr); + SASSERT(result->is_star()); + std::cout << " ok: filter keeps to_re('b') when excluding 'a'\n"; +} + +// Test 9: get_filtered_stabilizer_star — stabilizer filtered out +static void test_filtered_stabilizer_star_filtered() { + std::cout << "test_filtered_stabilizer_star_filtered\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + // Build a* as the regex state + expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m); + euf::snode* re_star_a = sg.mk(star_a); + + // Register a stabilizer: to_re("a") — accepts "a" + expr_ref stab_a(su.re.mk_to_re(su.str.mk_string("a")), m); + euf::snode* stab_a_sn = sg.mk(stab_a); + nr.add_stabilizer(re_star_a, stab_a_sn); + + // Exclude char 'a': D('a', to_re("a")) is NOT fail → filtered out + euf::snode* tok_a = sg.mk_char('a'); + euf::snode* result = nr.get_filtered_stabilizer_star(re_star_a, tok_a); + SASSERT(result == nullptr); + std::cout << " ok: filter removes to_re('a') when excluding 'a'\n"; +} + +// Test 10: extract_cycle_history — basic extraction +static void test_extract_cycle_history_basic() { + std::cout << "test_extract_cycle_history_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + sort* str_sort = su.str.mk_string_sort(); + sort* re_sort = su.re.mk_re(str_sort); + expr_ref full_e(su.re.mk_full_seq(re_sort), m); + euf::snode* full_re = sg.mk(full_e); + + euf::snode* tok_a = sg.mk_char('a'); + euf::snode* tok_b = sg.mk_char('b'); + euf::snode* tok_c = sg.mk_char('c'); + + // Ancestor history: just 'a' (length 1) + euf::snode* anc_hist = tok_a; + + // Current history: concat(concat(a, b), c) = a,b,c (length 3) + euf::snode* cur_hist = sg.mk_concat(sg.mk_concat(tok_a, tok_b), tok_c); + + euf::snode* empty_str = sg.mk_empty_seq(str_sort); + seq::dep_tracker empty_dep; + + seq::str_mem ancestor(empty_str, full_re, anc_hist, 0, empty_dep); + seq::str_mem current(empty_str, full_re, cur_hist, 0, empty_dep); + + euf::snode* cycle = nr.extract_cycle_history(current, ancestor); + // Should return the last 2 tokens (b, c) + SASSERT(cycle != nullptr); + SASSERT(cycle->length() == 2); + std::cout << " ok: extracted cycle of length 2\n"; +} + +// Test 11: extract_cycle_history — null ancestor history +static void test_extract_cycle_history_null_ancestor() { + std::cout << "test_extract_cycle_history_null_ancestor\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + sort* str_sort = su.str.mk_string_sort(); + sort* re_sort = su.re.mk_re(str_sort); + expr_ref full_e(su.re.mk_full_seq(re_sort), m); + euf::snode* full_re = sg.mk(full_e); + + euf::snode* tok_a = sg.mk_char('a'); + euf::snode* tok_b = sg.mk_char('b'); + euf::snode* cur_hist = sg.mk_concat(tok_a, tok_b); + euf::snode* empty_str = sg.mk_empty_seq(str_sort); + seq::dep_tracker empty_dep; + + // Ancestor has no history (nullptr) + seq::str_mem ancestor(empty_str, full_re, nullptr, 0, empty_dep); + seq::str_mem current(empty_str, full_re, cur_hist, 0, empty_dep); + + euf::snode* cycle = nr.extract_cycle_history(current, ancestor); + // With null ancestor history, entire current history is the cycle + SASSERT(cycle != nullptr); + SASSERT(cycle->length() == 2); + std::cout << " ok: null ancestor -> full history as cycle\n"; +} + +// Test 12: BFS emptiness — re.none (empty language) is empty +static void test_bfs_empty_none() { + std::cout << "test_bfs_empty_none\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + sort* re_sort = su.re.mk_re(str_sort); + + expr_ref none_e(su.re.mk_empty(re_sort), m); + euf::snode* none_re = sg.mk(none_e); + lbool result = nr.is_empty_bfs(none_re); + SASSERT(result == l_true); + std::cout << " ok: re.none -> l_true (empty)\n"; +} + +// Test 13: BFS emptiness — full_seq (Sigma*) is NOT empty +static void test_bfs_nonempty_full() { + std::cout << "test_bfs_nonempty_full\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + sort* re_sort = su.re.mk_re(str_sort); + + expr_ref full_e(su.re.mk_full_seq(re_sort), m); + euf::snode* full_re = sg.mk(full_e); + lbool result = nr.is_empty_bfs(full_re); + SASSERT(result == l_false); + std::cout << " ok: full_seq -> l_false (non-empty)\n"; +} + +// Test 14: BFS emptiness — to_re("abc") is NOT empty +static void test_bfs_nonempty_to_re() { + std::cout << "test_bfs_nonempty_to_re\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + expr_ref to_re_abc(su.re.mk_to_re(su.str.mk_string("abc")), m); + euf::snode* re_abc = sg.mk(to_re_abc); + lbool result = nr.is_empty_bfs(re_abc); + SASSERT(result == l_false); + std::cout << " ok: to_re(\"abc\") -> l_false (non-empty)\n"; +} + +// Test 15: BFS emptiness — a* is NOT empty (accepts epsilon) +static void test_bfs_nonempty_star() { + std::cout << "test_bfs_nonempty_star\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m); + euf::snode* re_star_a = sg.mk(star_a); + lbool result = nr.is_empty_bfs(re_star_a); + SASSERT(result == l_false); + std::cout << " ok: a* -> l_false (non-empty, accepts epsilon)\n"; +} + +// Test 16: BFS emptiness — union(none, none) is empty +static void test_bfs_empty_union_of_empties() { + std::cout << "test_bfs_empty_union_of_empties\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + sort* re_sort = su.re.mk_re(str_sort); + + expr_ref none1(su.re.mk_empty(re_sort), m); + expr_ref none2(su.re.mk_empty(re_sort), m); + expr_ref union_e(su.re.mk_union(none1, none2), m); + euf::snode* re_union = sg.mk(union_e); + lbool result = nr.is_empty_bfs(re_union); + SASSERT(result == l_true); + std::cout << " ok: union(none, none) -> l_true (empty)\n"; +} + +// Test 17: BFS emptiness — re.range('a','z') is NOT empty +static void test_bfs_nonempty_range() { + std::cout << "test_bfs_nonempty_range\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + + expr_ref lo(su.mk_char('a'), m); + expr_ref hi(su.mk_char('z'), m); + expr_ref range_e(su.re.mk_range(su.str.mk_unit(lo), su.str.mk_unit(hi)), m); + euf::snode* re_range = sg.mk(range_e); + lbool result = nr.is_empty_bfs(re_range); + SASSERT(result == l_false); + std::cout << " ok: range('a','z') -> l_false (non-empty)\n"; +} + +// Test 18: BFS emptiness — complement(full_seq) = empty +static void test_bfs_empty_complement_full() { + std::cout << "test_bfs_empty_complement_full\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + sort* re_sort = su.re.mk_re(str_sort); + + expr_ref comp_full(su.re.mk_complement(su.re.mk_full_seq(re_sort)), m); + euf::snode* re_comp = sg.mk(comp_full); + lbool result = nr.is_empty_bfs(re_comp); + SASSERT(result == l_true); + std::cout << " ok: ~full_seq -> l_true (empty)\n"; +} + +// Test 19: BFS emptiness — nullptr returns l_undef +static void test_bfs_null_safety() { + std::cout << "test_bfs_null_safety\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + + lbool result = nr.is_empty_bfs(nullptr); + SASSERT(result == l_undef); + std::cout << " ok: nullptr -> l_undef\n"; +} + +// Test 20: BFS emptiness — max_states bound respected +static void test_bfs_bounded() { + std::cout << "test_bfs_bounded\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + // (a|b)+ requires at least one char; with max_states=1 should bail + expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); + expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m); + expr_ref ab_union(su.re.mk_union(a_re, b_re), m); + expr_ref ab_plus(su.re.mk_plus(ab_union), m); + euf::snode* re_plus = sg.mk(ab_plus); + + lbool result = nr.is_empty_bfs(re_plus, 1); + SASSERT(result == l_undef); + std::cout << " ok: (a|b)+ with max_states=1 -> l_undef (bounded)\n"; +} + +// ----------------------------------------------------------------------- +// New tests for regex membership completion (Phase 1-4) +// ----------------------------------------------------------------------- + +// Test: char_set::is_subset +static void test_char_set_is_subset() { + std::cout << "test_char_set_is_subset\n"; + + // {a} ⊆ {a,b,c} = [97,100) + char_set cs1(char_range('a', 'b')); // {a} + char_set cs2(char_range('a', 'd')); // {a,b,c} + SASSERT(cs1.is_subset(cs2)); + SASSERT(!cs2.is_subset(cs1)); + + // empty ⊆ anything + char_set empty; + SASSERT(empty.is_subset(cs1)); + SASSERT(empty.is_subset(cs2)); + + // self ⊆ self + SASSERT(cs1.is_subset(cs1)); + SASSERT(cs2.is_subset(cs2)); + + // disjoint: {x} not ⊆ {a} + char_set cs3(char_range('x', 'y')); + SASSERT(!cs3.is_subset(cs1)); + + std::cout << " ok\n"; +} + +// Test: stabilizer store basic operations +static void test_stabilizer_store_basic() { + std::cout << "test_stabilizer_store_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); + expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m); + euf::snode* a_sn = sg.mk(a_re); + euf::snode* b_sn = sg.mk(b_re); + + SASSERT(!nr.has_stabilizers(a_sn)); + nr.add_stabilizer(a_sn, b_sn); + SASSERT(nr.has_stabilizers(a_sn)); + SASSERT(nr.get_stabilizer_union(a_sn) == b_sn); + + // dedup: adding same stabilizer again + nr.add_stabilizer(a_sn, b_sn); + auto* stabs = nr.get_stabilizers(a_sn); + SASSERT(stabs && stabs->size() == 1); + + // reset + nr.reset_stabilizers(); + SASSERT(!nr.has_stabilizers(a_sn)); + + std::cout << " ok\n"; +} + +// Test: self-stabilizing flag +static void test_self_stabilizing() { + std::cout << "test_self_stabilizing\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); + euf::snode* a_sn = sg.mk(a_re); + + SASSERT(!nr.is_self_stabilizing(a_sn)); + nr.set_self_stabilizing(a_sn); + SASSERT(nr.is_self_stabilizing(a_sn)); + + // star should be detected as self-stabilizing + expr_ref star_a(su.re.mk_star(a_re), m); + euf::snode* star_sn = sg.mk(star_a); + SASSERT(nr.compute_self_stabilizing(star_sn)); + + std::cout << " ok\n"; +} + +// Test: check_intersection_emptiness — SAT case +static void test_check_intersection_sat() { + std::cout << "test_check_intersection_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + // a* ∩ (a|b)* should be non-empty (both accept "a") + expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); + expr_ref star_a(su.re.mk_star(a_re), m); + expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m); + expr_ref ab_union(su.re.mk_union(a_re, b_re), m); + expr_ref star_ab(su.re.mk_star(ab_union), m); + + euf::snode* s1 = sg.mk(star_a); + euf::snode* s2 = sg.mk(star_ab); + ptr_vector regexes; + regexes.push_back(s1); + regexes.push_back(s2); + + lbool result = nr.check_intersection_emptiness(regexes); + SASSERT(result == l_false); // non-empty + std::cout << " ok: a* ∩ (a|b)* is non-empty\n"; +} + +// Test: check_intersection_emptiness — UNSAT case +static void test_check_intersection_unsat() { + std::cout << "test_check_intersection_unsat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + + // to_re("a") ∩ to_re("b") should be empty + expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); + expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m); + euf::snode* s1 = sg.mk(a_re); + euf::snode* s2 = sg.mk(b_re); + ptr_vector regexes; + regexes.push_back(s1); + regexes.push_back(s2); + + lbool result = nr.check_intersection_emptiness(regexes); + SASSERT(result == l_true); // empty + std::cout << " ok: to_re(a) ∩ to_re(b) is empty\n"; +} + +// Test: is_language_subset — true case +static void test_is_language_subset_true() { + std::cout << "test_is_language_subset_true\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + // a* ⊆ (a|b)* should be true + expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); + expr_ref star_a(su.re.mk_star(a_re), m); + expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m); + expr_ref ab_union(su.re.mk_union(a_re, b_re), m); + expr_ref star_ab(su.re.mk_star(ab_union), m); + + euf::snode* subset = sg.mk(star_a); + euf::snode* superset = sg.mk(star_ab); + + lbool result = nr.is_language_subset(subset, superset); + SASSERT(result == l_true); + std::cout << " ok: a* ⊆ (a|b)*\n"; +} + +// Test: is_language_subset — false case +static void test_is_language_subset_false() { + std::cout << "test_is_language_subset_false\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + + // (a|b)* ⊄ a* should be false (b ∈ (a|b)* but b ∉ a*) + expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); + expr_ref star_a(su.re.mk_star(a_re), m); + expr_ref b_re(su.re.mk_to_re(su.str.mk_string("b")), m); + expr_ref ab_union(su.re.mk_union(a_re, b_re), m); + expr_ref star_ab(su.re.mk_star(ab_union), m); + + euf::snode* subset = sg.mk(star_ab); + euf::snode* superset = sg.mk(star_a); + + lbool result = nr.is_language_subset(subset, superset); + SASSERT(result == l_false); + std::cout << " ok: (a|b)* ⊄ a*\n"; +} + +// Test: is_language_subset — trivial cases +static void test_is_language_subset_trivial() { + std::cout << "test_is_language_subset_trivial\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + + // ∅ ⊆ anything = true + expr_ref none(su.re.mk_empty(su.re.mk_re(str_sort)), m); + expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); + euf::snode* empty_sn = sg.mk(none); + euf::snode* a_sn = sg.mk(a_re); + SASSERT(nr.is_language_subset(empty_sn, a_sn) == l_true); + + // anything ⊆ Σ* = true + expr_ref full(su.re.mk_full_seq(su.re.mk_re(str_sort)), m); + euf::snode* full_sn = sg.mk(full); + SASSERT(nr.is_language_subset(a_sn, full_sn) == l_true); + + // L ⊆ L = true (same pointer) + SASSERT(nr.is_language_subset(a_sn, a_sn) == l_true); + + std::cout << " ok\n"; +} + void tst_nseq_regex() { test_nseq_regex_instantiation(); test_nseq_regex_is_empty(); test_nseq_regex_is_full(); + test_strengthened_stabilizer_null(); + test_strengthened_stabilizer_single_char(); + test_strengthened_stabilizer_two_char(); + test_filtered_stabilizer_star_empty(); + test_filtered_stabilizer_star_with_stab(); + test_filtered_stabilizer_star_filtered(); + test_extract_cycle_history_basic(); + test_extract_cycle_history_null_ancestor(); + test_bfs_empty_none(); + test_bfs_nonempty_full(); + test_bfs_nonempty_to_re(); + test_bfs_nonempty_star(); + test_bfs_empty_union_of_empties(); + test_bfs_nonempty_range(); + test_bfs_empty_complement_full(); + test_bfs_null_safety(); + test_bfs_bounded(); + // New tests for regex membership completion + test_char_set_is_subset(); + test_stabilizer_store_basic(); + test_self_stabilizing(); + test_check_intersection_sat(); + test_check_intersection_unsat(); + test_is_language_subset_true(); + test_is_language_subset_false(); + test_is_language_subset_trivial(); std::cout << "nseq_regex: all tests passed\n"; } diff --git a/src/test/nseq_zipt.cpp b/src/test/nseq_zipt.cpp index 930749de3..4f8516da0 100644 --- a/src/test/nseq_zipt.cpp +++ b/src/test/nseq_zipt.cpp @@ -57,9 +57,10 @@ public: // ----------------------------------------------------------------------- struct str_builder { euf::sgraph& sg; + seq_util& su; euf::snode* vars[26] = {}; // vars['A'-'A'] .. vars['Z'-'A'] - explicit str_builder(euf::sgraph& sg) : sg(sg) {} + str_builder(euf::sgraph& sg, seq_util& su) : sg(sg), su(su) {} euf::snode* var(char c) { int idx = c - 'A'; @@ -76,7 +77,7 @@ struct str_builder { : sg.mk_char((unsigned)(unsigned char)*p); result = result ? sg.mk_concat(result, tok) : tok; } - return result ? result : sg.mk_empty(); + return result ? result : sg.mk_empty_seq(su.str.mk_string_sort()); } }; @@ -190,7 +191,7 @@ struct nseq_fixture { static ast_manager& init(ast_manager& m) { reg_decl_plugins(m); return m; } nseq_fixture() - : eg(init(m)), sg(m, eg), ng(sg, dummy_solver), su(m), sb(sg), rb(m, su, sg) + : eg(init(m)), sg(m, eg), ng(sg, dummy_solver), su(m), sb(sg, su), rb(m, su, sg) {} euf::snode* S(const char* s) { return sb.parse(s); } diff --git a/src/test/seq_nielsen.cpp b/src/test/seq_nielsen.cpp index abd91eb83..e33b03bd1 100644 --- a/src/test/seq_nielsen.cpp +++ b/src/test/seq_nielsen.cpp @@ -74,11 +74,12 @@ static void test_str_eq() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* a = sg.mk_char('A'); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); seq::dep_tracker dep; dep.insert(0); @@ -121,7 +122,7 @@ static void test_str_mem() { sort_ref str_sort(seq.str.mk_string_sort(), m); euf::snode* x = sg.mk_var(symbol("x")); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); // create a regex: re.all (.*) expr_ref star_fc(seq.re.mk_full_seq(str_sort), m); @@ -149,11 +150,12 @@ static void test_nielsen_subst() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* a = sg.mk_char('A'); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); seq::dep_tracker dep; @@ -208,7 +210,7 @@ static void test_nielsen_node() { // regex membership expr_ref re_all(seq.re.mk_full_seq(str_sort), m); euf::snode* regex = sg.mk(re_all); - euf::snode* empty = sg.mk_empty(); + euf::snode* empty = sg.mk_empty_seq(seq.str.mk_string_sort()); root->add_str_mem(seq::str_mem(x, regex, empty, 0, dep)); SASSERT(root->str_mems().size() == 1); @@ -311,7 +313,7 @@ static void test_nielsen_subst_apply() { euf::snode* y = sg.mk_var(symbol("y")); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); // create node with constraint: concat(x, A) = concat(B, y) seq::nielsen_node* node = ng.mk_node(); @@ -362,6 +364,7 @@ static void test_nielsen_expansion() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); @@ -379,7 +382,7 @@ static void test_nielsen_expansion() { seq::dep_tracker dep; // branch 1: x -> eps (eliminating, progress) - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); seq::nielsen_node* child1 = ng.mk_child(root); seq::nielsen_subst s1(x, e, dep); child1->apply_subst(sg, s1); @@ -1720,9 +1723,10 @@ static void test_simplify_empty_propagation() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* xy = sg.mk_concat(x, y); @@ -1743,9 +1747,10 @@ static void test_simplify_empty_vs_char() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); euf::snode* a = sg.mk_char('A'); // ε = A → rhs has non-variable token → conflict @@ -1791,11 +1796,12 @@ static void test_simplify_trivial_removal() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); seq::nielsen_node* node = ng.mk_node(); seq::dep_tracker dep; dep.insert(0); @@ -1814,10 +1820,11 @@ static void test_simplify_all_trivial_satisfied() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); seq::nielsen_node* node = ng.mk_node(); seq::dep_tracker dep; dep.insert(0); @@ -1838,7 +1845,7 @@ static void test_simplify_regex_infeasible() { seq_util seq(m); dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); expr_ref ch_a(seq.str.mk_char('A'), m); expr_ref unit_a(seq.str.mk_unit(ch_a), m); @@ -1865,7 +1872,7 @@ static void test_simplify_nullable_removal() { seq_util seq(m); dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); expr_ref ch_a(seq.str.mk_char('A'), m); expr_ref unit_a(seq.str.mk_unit(ch_a), m); @@ -1894,7 +1901,7 @@ static void test_simplify_brzozowski_sat() { dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); expr_ref ch_a(seq.str.mk_char('A'), m); expr_ref unit_a(seq.str.mk_unit(ch_a), m); @@ -1923,7 +1930,7 @@ static void test_simplify_brzozowski_rtl_suffix() { euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a = sg.mk_char('A'); euf::snode* xa = sg.mk_concat(x, a); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); expr_ref ch_b(seq.str.mk_char('B'), m); expr_ref unit_b(seq.str.mk_unit(ch_b), m); @@ -1955,13 +1962,14 @@ static void test_simplify_multiple_eqs() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* z = sg.mk_var(symbol("z")); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); seq::nielsen_node* node = ng.mk_node(); seq::dep_tracker dep; dep.insert(0); @@ -2083,7 +2091,7 @@ static void test_explain_conflict_mem_only() { seq_util seq(m); dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); expr_ref ch_a(seq.str.mk_char('A'), m); expr_ref unit_a(seq.str.mk_unit(ch_a), m); @@ -2115,7 +2123,7 @@ static void test_explain_conflict_mixed_eq_mem() { dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); // eq[0]: A = B (conflict) ng.add_str_eq(a, b); @@ -2418,9 +2426,10 @@ static void test_length_constraints_empty_side() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); + seq_util seq(m); euf::snode* x = sg.mk_var(symbol("x")); - euf::snode* e = sg.mk_empty(); + euf::snode* e = sg.mk_empty_seq(seq.str.mk_string_sort()); // x = ε dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); diff --git a/src/util/zstring.cpp b/src/util/zstring.cpp index fa6dde45a..2d17166c1 100644 --- a/src/util/zstring.cpp +++ b/src/util/zstring.cpp @@ -402,6 +402,27 @@ bool char_set::is_disjoint(char_set const& other) const { return true; } +bool char_set::is_subset(char_set const& other) const { + // Every range in *this must be fully covered by ranges in other. + // Both are sorted, non-overlapping. + if (m_ranges.empty()) + return true; + unsigned j = 0; + for (unsigned i = 0; i < m_ranges.size(); ++i) { + unsigned lo = m_ranges[i].m_lo; + unsigned hi = m_ranges[i].m_hi; + // Advance j to find covering range in other + while (j < other.m_ranges.size() && other.m_ranges[j].m_hi <= lo) + ++j; + if (j >= other.m_ranges.size()) + return false; + // other.m_ranges[j] must fully contain [lo, hi) + if (other.m_ranges[j].m_lo > lo || other.m_ranges[j].m_hi < hi) + return false; + } + return true; +} + std::ostream& char_set::display(std::ostream& out) const { if (m_ranges.empty()) { out << "{}"; diff --git a/src/util/zstring.h b/src/util/zstring.h index f7df66455..0cf982874 100644 --- a/src/util/zstring.h +++ b/src/util/zstring.h @@ -156,6 +156,9 @@ public: // check if two sets are disjoint bool is_disjoint(char_set const& other) const; + // check if this set is a subset of other (every char in this is also in other) + bool is_subset(char_set const& other) const; + bool operator==(char_set const& other) const { return m_ranges == other.m_ranges; } char_set clone() const { char_set r; r.m_ranges = m_ranges; return r; }