From 7c7ca2782271e207a1c81d160ba3a674ebd01808 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Sat, 6 Jun 2026 15:24:25 +0200 Subject: [PATCH] Sounder? --- src/smt/seq/seq_nielsen.cpp | 37 ++++++++++++++++++++++++---------- src/smt/seq/seq_nielsen.h | 8 ++++---- src/smt/seq/seq_nielsen_pp.cpp | 14 ++++++------- src/smt/seq_model.cpp | 14 ++++++++----- src/smt/theory_nseq.cpp | 15 ++++++++++++++ 5 files changed, 61 insertions(+), 27 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index d0173f795..48457f9a3 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -942,15 +942,24 @@ namespace seq { return; if (src_re->is_fail() || dst_re->is_fail()) return; - - euf::snode* label_re = to_partial_label_regex(label); - SASSERT(label_re); - - if (!m_seq.is_re(label_re->get_expr()) || !label_re->is_ground()) + // The partial DFA must track ONLY the concrete Brzozowski automaton of + // the original ground regexes. Projection operators (re.proj) are + // synthetic stabilizers minted by cycle decomposition; every fresh + // snapshot index ν is a new expression, so recording projection-derived + // states as DFA nodes makes the SCC grow without bound (a newly-marked + // edge on every extraction) and re-triggers cycle decomposition forever + // (e.g. a cycle variable x'∈π(R) being decomposed again and again + // against its own / a sibling regex's cycle). Reject such edges. + if (src_re->has_projection() || dst_re->has_projection()) return; + //euf::snode* label_re = to_partial_label_regex(label); + //SASSERT(label_re); + //if (!m_seq.is_re(label_re->get_expr()) || !label_re->is_ground()) + // return; + expr* src_e = src_re->get_expr(); - expr* label_e = label_re->get_expr(); + //expr* label_e = label_re->get_expr(); expr* dst_e = dst_re->get_expr(); // Deduplicate transitions by (src, dst) only — NOT by label. The @@ -971,7 +980,7 @@ namespace seq { // already-pinned expression is harmless; the wasted slot is bounded // by the unique-edge count. m_partial_dfa_pin.push_back(src_e); - m_partial_dfa_pin.push_back(label_e); + //m_partial_dfa_pin.push_back(label_e); m_partial_dfa_pin.push_back(dst_e); unsigned edge_idx = m_partial_dfa_edges.size(); @@ -979,7 +988,7 @@ namespace seq { partial_dfa_edge e; e.m_src = src_e; - e.m_label = label_e; + //e.m_label = label_e; e.m_dst = dst_e; m_partial_dfa_edges.push_back(e); @@ -3284,7 +3293,7 @@ namespace seq { euf::snode_vector mts; m_sg.compute_minterms(re, mts); for (euf::snode* mt : mts) { - std::cout << "minterm: " << mk_pp(mt->get_expr(), m) << std::endl; + // std::cout << "minterm: " << mk_pp(mt->get_expr(), m) << std::endl; euf::snode* deriv = m_sg.brzozowski_deriv(re, mt); if (!deriv || deriv->is_fail()) continue; @@ -3295,8 +3304,7 @@ namespace seq { } } } - std::string s = partial_dfa_to_dot(root_re, false); - std::cout << s << std::endl; + //std::string s = partial_dfa_to_dot(root_re, true); } // ----------------------------------------------------------------------- @@ -3865,6 +3873,13 @@ namespace seq { if (mem.m_str->is_empty() || mem.is_primitive()) continue; + // compute_sigma / compute_tau do not understand the projection + // operator (re.proj) — they would recurse into it and hit an + // UNREACHABLE. Projection-constrained memberships are handled by the + // cycle-decomposition path, so skip them here. + if (mem.m_regex->has_projection()) + continue; + euf::snode* first = mem.m_str->first(); SASSERT(first); SASSERT(!first->is_char()); diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index eb08c110f..f5afa54dd 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -900,7 +900,7 @@ namespace seq { // at the current scope via m_sg.mk(expr) only when we actually need one. struct partial_dfa_edge { expr* m_src = nullptr; - expr* m_label = nullptr; // one-character regex label (char/minterm) + //expr* m_label = nullptr; // one-character regex label (char/minterm) expr* m_dst = nullptr; unsigned m_projection_idx = 0; // first extraction index that included this edge }; @@ -1002,14 +1002,14 @@ namespace seq { // Construct with a caller-supplied solver. Ownership is NOT transferred; // the caller is responsible for keeping the solver alive. nielsen_graph(euf::sgraph& sg, sub_solver_i& solver, context_solver_i& ctx); - ~nielsen_graph(); + ~nielsen_graph() override; - ast_manager &get_manager() { + ast_manager& get_manager() const { return m; } - euf::sgraph& sg() { return m_sg; } + euf::sgraph& sg() const { return m_sg; } seq_util& seq() { return m_seq; } seq_util const& seq() const { return m_seq; } diff --git a/src/smt/seq/seq_nielsen_pp.cpp b/src/smt/seq/seq_nielsen_pp.cpp index 605a037b4..101fd53d0 100644 --- a/src/smt/seq/seq_nielsen_pp.cpp +++ b/src/smt/seq/seq_nielsen_pp.cpp @@ -860,13 +860,13 @@ namespace seq { for (auto* e : edges) { out << " N" << e->m_src->get_id() << " -> N" << e->m_dst->get_id() << " [label=\""; - if (e->m_label) { - std::stringstream ss; - ss << mk_ismt2_pp(e->m_label, m); - out << sanitize(ss.str()); - } else { - out << "??"; - } + //if (e->m_label) { + // std::stringstream ss; + // ss << mk_ismt2_pp(e->m_label, m); + // out << sanitize(ss.str()); + //} else { + // out << "??"; + //} out << "\"];\n"; } diff --git a/src/smt/seq_model.cpp b/src/smt/seq_model.cpp index 840f6bffb..e2636856d 100644 --- a/src/smt/seq_model.cpp +++ b/src/smt/seq_model.cpp @@ -460,10 +460,12 @@ namespace smt { expr_ref witness(m); // We checked non-emptiness during Nielsen already lbool wr = m_rewriter.some_seq_in_re(re_expr, witness); - if (wr != l_true && re->has_projection()) { - // some_seq_in_re cannot extract a witness from a projection - // operator (re.proj). Fall back to a projection-aware BFS over - // the (length-intersected) regex using the sgraph derivative. + if (wr != l_true) { + // some_seq_in_re can fail (l_undef / l_false) on regexes it does + // not fully support — notably projection operators (re.proj), + // but also some plain Boolean-closure / large length-intersected + // shapes. Fall back to a derivative-automaton BFS that builds an + // accepting word of the requested length directly. wr = projection_witness(m_sg.mk(re_expr), witness); } if (wr == l_true) { @@ -474,7 +476,9 @@ namespace smt { } IF_VERBOSE(1, verbose_stream() << "witness extraction failed for " << mk_pp(var->get_expr(), m) << " : " << wr << " with len " << (has_len ? len_val.to_string() : "unknown") << "\n" << mk_pp(re_expr, m) << "\n"); - UNREACHABLE(); + // Last resort: do not crash model construction. model_validate (if + // enabled) will flag an inconsistent witness; otherwise fall through + // to the length-respecting fallback below. } // No regex constraint: try to respect the assigned length for the variable. diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 417296f44..c5ed38a5f 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -1767,11 +1767,26 @@ namespace smt { unsigned_vector const &mem_indices = var_to_mems[var_id]; ptr_vector regexes; + bool has_projection = false; for (auto i : mem_indices) { SASSERT(mems[i].well_formed()); regexes.push_back(mems[i].m_regex); + if (mems[i].m_regex->has_projection()) + has_projection = true; } + // Skip length coherence for synthetic variables constrained by a + // projection operator (the cycle variable x'∈π(R) and remainder + // x''∈~((π(R)∩~ε)Σ*) introduced by cycle decomposition). The + // Σ^l ∩ projection gradient does not converge for them — the + // benchmark has no real length constraints, so the integer solver + // is free to pick ever-larger lengths for these *synthetic* vars + // and the coherence loop rejects each one (len≠l) forever. Their + // length consistency is already guaranteed by the soundness of the + // decomposition, so leaving the integer assignment alone is safe. + if (has_projection) + continue; + SASSERT(!regexes.empty()); sort *ele_sort; VERIFY(m_seq.is_seq(m_sgraph.get_str_sort(), ele_sort));