3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-27 19:08:49 +00:00

Projection operator => view

This commit is contained in:
CEisenhofer 2026-06-25 10:48:20 +02:00
parent f126b60369
commit 64fed10e86
10 changed files with 494 additions and 651 deletions

View file

@ -39,8 +39,10 @@ NSB review:
#include <algorithm>
#include <complex>
#include <cstdlib>
#include <set>
#include <stack>
#include <unordered_map>
#include <vector>
namespace seq {
@ -261,19 +263,33 @@ namespace seq {
// -----------------------------------------------
bool str_mem::is_primitive() const {
return m_str && m_str->length() == 1 && m_str->is_var() && m_regex->is_ground();
// A still-unresolved symbolic-derivative residual (ite) is not a settled
// primitive — apply_regex_if_split must resolve it first.
return m_str && m_str->length() == 1 && m_str->is_var() && m_regex->is_ground()
&& m_regex->kind() != euf::snode_kind::s_ite;
}
bool str_mem::is_trivial(nielsen_node const* n) const {
if (!(m_str && m_regex && m_str->is_empty()))
if (!(m_str && m_regex))
return false;
// Projection-aware nullability (handles re.proj operators in m_regex).
if (m_kind == mem_kind::no_loop)
// guard: discharged ⇒ Σ* (accepts all); ε has no non-empty lap-prefix.
return m_discharged || m_str->is_empty();
if (!m_str->is_empty())
return false;
if (m_kind == mem_kind::stab_view)
// ε ∈ stab(root,Q) iff current state ≡ root (i.e. root ∈ F={root}).
return m_regex == m_root;
return n->graph().sg().re_nullable(m_regex) == l_true;
}
bool str_mem::is_contradiction(nielsen_node const* n) const {
if (!(m_str && m_regex && m_str->is_empty()))
return false;
if (m_kind == mem_kind::no_loop)
return false; // guard acceptance is always true on the empty word
if (m_kind == mem_kind::stab_view)
return m_regex != m_root; // ε ∉ stab(root,Q) when state ≢ root
return n->graph().sg().re_nullable(m_regex) == l_false;
}
@ -538,13 +554,9 @@ namespace seq {
: m(sg.get_manager()), a(sg.get_manager()), m_seq(sg.get_seq_util()), m_sg(sg), m_rw(m), m_sk(m, m_rw),
m_length_solver(solver), m_context_solver(ctx_solver), m_partial_dfa_pin(sg.get_manager()),
m_parikh(alloc(seq_parikh, sg)), m_seq_regex(alloc(seq::seq_regex, sg)) {
// Answer projection-state membership queries during projection-aware
// derivatives (the sgraph cannot reach the partial DFA otherwise).
m_sg.set_projection_oracle(this);
}
nielsen_graph::~nielsen_graph() {
m_sg.set_projection_oracle(nullptr);
dealloc(m_parikh);
dealloc(m_seq_regex);
reset();
@ -692,13 +704,6 @@ namespace seq {
n->add_constraint(constraint(le, dep, m));
}
euf::snode const* nielsen_graph::mk_projection_term(euf::snode const* root_re, unsigned nu) {
SASSERT(root_re && root_re->get_expr());
// π_{Q_nu, {root}}(root): current state == accepting state == root.
expr_ref proj = m_sg.mk_re_proj(root_re->get_expr(), root_re->get_expr(), nu);
return m_sg.mk(proj);
}
// -----------------------------------------------------------------------
// nielsen_node: simplify_and_init
// -----------------------------------------------------------------------
@ -1023,16 +1028,6 @@ namespace seq {
return;
if (src_re->is_fail() || dst_re->is_fail())
return;
// The partial DFA must track ONLY the concrete Brzozowski automaton of
// the original ground regexes. Projection operators (re.proj) are
// synthetic stabilizers minted by cycle decomposition; every fresh
// snapshot index ν is a new expression, so recording projection-derived
// states as DFA nodes makes the SCC grow without bound (a newly-marked
// edge on every extraction) and re-triggers cycle decomposition forever
// (e.g. a cycle variable x'∈π(R) being decomposed again and again
// against its own / a sibling regex's cycle). Reject such edges.
if (src_re->has_projection() || dst_re->has_projection())
return;
//euf::snode const* label_re = to_partial_label_regex(label);
//SASSERT(label_re);
@ -1201,34 +1196,6 @@ namespace seq {
return newly_marked;
}
bool nielsen_graph::try_extract_partial_projection(euf::snode const* root_re, euf::snode const*& projection_re) {
SASSERT(root_re && root_re->get_expr());
projection_re = nullptr;
if (!root_re->is_ground())
return false;
uint_set scc;
if (!collect_scc_for_projection(root_re, scc))
return false;
// Novelty = the SCC's edge set grew (some edge was previously unmarked).
// mark_scc_projection_edges advances the snapshot index only in that
// case, so a re-visit of an already-fully-marked SCC (from any of its
// states) marks nothing new and does not re-trigger a decomposition.
const unsigned newly_marked = mark_scc_projection_edges(scc);
if (newly_marked == 0)
return false;
// Keep the stabilizer symbolic as a projection operator over the
// (just-marked) explored subautomaton snapshot. Its language is
// refined lazily through projection-aware derivatives.
projection_re = mk_projection_term(root_re, m_projection_extract_idx);
if (!projection_re)
return false;
return true;
}
euf::snode const* nielsen_graph::get_slice(euf::snode const* v, expr* left, expr* right) {
SASSERT(v && v->get_expr() && left && right);
SASSERT(v->is_var());
@ -1669,7 +1636,7 @@ namespace seq {
// in both directions (left-to-right, then right-to-left), mirroring ZIPT.
for (str_mem& mem : m_str_mem) {
SASSERT(mem.well_formed());
if (mem.is_primitive())
if (mem.is_primitive() || !mem.is_plain())
continue;
for (unsigned od = 0; od < 2; ++od) {
bool fwd = od == 0;
@ -1715,7 +1682,7 @@ namespace seq {
// consume symbolic characters via uniform derivatives
for (str_mem& mem : m_str_mem) {
SASSERT(mem.well_formed());
if (mem.is_primitive())
if (mem.is_primitive() || !mem.is_plain())
continue;
while (mem.m_str && !mem.m_str->is_empty()) {
@ -1727,13 +1694,7 @@ namespace seq {
euf::snode const* src_re = mem.m_regex;
euf::snode const* next = nullptr;
if (src_re->has_projection()) {
// The generic symbolic derivative cannot see the projection
// operator; route through the projection-aware derivative,
// which yields the ite-residual with π propagated to leaves.
next = sg.brzozowski_deriv(src_re, tok);
}
else {
{
seq_rewriter rw(m);
expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m);
@ -1774,6 +1735,18 @@ namespace seq {
}
}
// consume leading characters of view / guard memberships (Section 3.3).
// m_regex is the current (plain) derivative state; we gate on whether it
// lies in Q_ν (projection_state_in_Q) and step with the ordinary
// derivative, keeping the view/guard annotation.
for (str_mem& mem : m_str_mem) {
SASSERT(mem.well_formed());
if (mem.is_plain())
continue;
if (consume_view_guard(mem))
return simplify_result::conflict;
}
// check for regex memberships that are immediately infeasible
for (str_mem& mem : m_str_mem) {
if (mem.is_contradiction(this)) {
@ -1784,29 +1757,6 @@ namespace seq {
}
}
// Empty-language check for *primitive* memberships whose regex contains a
// projection operator. The regex widening pass below skips primitives,
// and is_contradiction only fires once the string side is empty. But a
// cycle decomposition constrains the remainder x'' by ~((π(r)∩~ε)·Σ*),
// and deriving this through the cycle can collapse it to the empty
// language: e.g. ~(π(r)·Σ*) ≡ ∅ because π(r) is nullable (r ∈ F), so
// π(r)·Σ* ≡ Σ*. Such a constraint is unsatisfiable, but without this
// eager check the variable would be unwound depth-deep before the
// conflict surfaces — the source of the multi-cycle-SCC blow-up. The
// check is cheap: is_empty_bfs on these projection regexes settles in a
// couple of states (a nullable projection short-circuits to non-empty).
SASSERT(m_graph.m_seq_regex);
for (str_mem const& mem : m_str_mem) {
if (!mem.is_primitive() || !mem.m_regex->has_projection())
continue;
if (m_graph.m_seq_regex->is_empty_bfs(mem.m_regex) == l_true) {
TRACE(seq, tout << "empty primitive projection regex " << mem_pp(mem, m) << "\n");
set_general_conflict();
set_conflict(backtrack_reason::regex, mem.m_dep);
return simplify_result::conflict;
}
}
// remove trivial membership constraints once again
unsigned wj = 0;
for (unsigned j = 0; j < m_str_mem.size(); ++j) {
@ -1845,6 +1795,75 @@ namespace seq {
return simplify_result::proceed;
}
bool nielsen_node::consume_view_guard(str_mem& mem) {
SASSERT(!mem.is_plain());
euf::sgraph& sg = m_graph.sg();
ast_manager& m = sg.get_manager();
seq_util& seq = m_graph.seq();
auto set_regex_conflict = [&]() {
set_general_conflict();
set_conflict(backtrack_reason::regex, mem.m_dep);
};
while (mem.m_str && !mem.m_str->is_empty()) {
euf::snode const* tok = mem.m_str->first();
if (!tok || !tok->is_char_or_unit())
break; // leading token is a variable/power — nothing to consume yet
euf::snode const* c = mem.m_regex;
// The gate tests the CURRENT (plain) state c against Q_ν. An ite
// state means a previous symbolic step has not been resolved yet;
// leave it for apply_regex_if_split.
if (!c->is_ground() || c->kind() == euf::snode_kind::s_ite)
break;
const bool in_Q = m_graph.projection_state_in_Q(c->get_expr(), mem.m_nu);
if (!in_Q) {
if (mem.is_guard()) {
// The run left Q: no lap from the start can complete within Q
// anymore, so the guard is discharged (accepts every suffix).
mem.m_discharged = true;
return false;
}
// view: a^{-1} L_{Q,F}(c) = ∅ when c ∉ Q.
set_regex_conflict();
return true;
}
// Step with brzozowski_deriv for BOTH concrete and symbolic tokens.
// This is essential: the partial-DFA states (and m_root) are produced
// by brzozowski_deriv, so its canonicalization must be used here too —
// otherwise the guard's resolved state never equals m_root by snode
// identity and laps never close. For a symbolic unit it yields a
// canonical ite residual that apply_regex_if_split later resolves.
euf::snode const* next = sg.brzozowski_deriv(c, tok);
if (!next)
break;
mem.m_str = sg.drop_left(mem.m_str, 1);
mem.m_regex = next;
if (next->is_fail()) {
// view: derivative collapsed to ∅ — unsatisfiable.
// guard: the lap can never close through ∅; treat as discharged.
if (mem.is_guard()) { mem.m_discharged = true; return false; }
set_regex_conflict();
return true;
}
if (next->is_ground() && next->kind() != euf::snode_kind::s_ite) {
// concrete next state resolved immediately
if (mem.is_guard() && next == mem.m_root) {
// a non-empty prefix completed a lap r→…→r within Q.
set_regex_conflict();
return true;
}
}
else {
// symbolic ite residual: defer to apply_regex_if_split, which
// resolves the character and (for guards) detects a lap landing
// back on the root.
break;
}
}
return false;
}
bool nielsen_node::is_satisfied() const {
if (!m_str_deq.empty() || !m_str_eq.empty())
return false;
@ -3389,128 +3408,81 @@ namespace seq {
}
// -----------------------------------------------------------------------
// Helper: record_dfa_edges_from_ite
// Walk an ite-structured symbolic derivative and record a concrete DFA edge
// for each non-fail branch. The ite has the form:
// ite(in_re(char_var, minterm_re), branch_re, rest_ite)
// Each (minterm_re, branch_re) pair gives one DFA edge src_re→branch_re.
// Called from simplify_and_init so that cycle_decomp can detect SCCs lazily
// as symbolic chars are consumed (mirroring the old concrete-char approach).
// -----------------------------------------------------------------------
void nielsen_graph::record_dfa_edges_from_ite(euf::snode const* src_re, expr* ite_deriv) {
if (!src_re || !ite_deriv)
return;
expr *c, *th, *el;
if (!m.is_ite(ite_deriv, c, th, el))
return;
expr *char_ex, *minterm_re;
if (m_seq.str.is_in_re(c, char_ex, minterm_re)) {
if (!m_seq.re.is_empty(th)) {
euf::snode const* dst = m_sg.mk(th);
if (dst && !dst->is_fail() && dst->is_ground()) {
euf::snode const* label = m_sg.mk(minterm_re);
record_partial_derivative_edge(src_re, label, dst);
}
}
}
record_dfa_edges_from_ite(src_re, el);
}
// -----------------------------------------------------------------------
// Helper: get_current_stabilizer
// Returns the current partial DFA stabilizer s* for root_re without the
// novelty guard from try_extract_partial_projection. Returns nullptr if
// no SCC exists yet or no edges have been marked.
// -----------------------------------------------------------------------
euf::snode const* nielsen_graph::get_current_stabilizer(euf::snode const* root_re) {
if (!root_re || !root_re->is_ground() || m_projection_extract_idx == 0)
return nullptr;
uint_set scc;
if (!collect_scc_for_projection(root_re, scc))
return nullptr;
// Symbolic projection over the edges marked by earlier extractions
// (index ≤ current snapshot). No new marking here, mirroring the old
// behaviour of reusing the current extraction index.
return mk_projection_term(root_re, m_projection_extract_idx);
}
// -----------------------------------------------------------------------
// Modifier: apply_cycle_subsumption
// For str_mem x·rest ∈ r where L(∩ Reg_x) ⊆ L(stabilizer(r)), remove x:
// replace x·rest ∈ r with rest ∈ r.
// Mirrors ZIPT StrMem.TrySubsume / paper Section 3.2.3 (Cycle Subsumption).
// Modifier: apply_cycle_subsumption (paper Section "Cycle Subsumption")
// For a membership x·u ∈ R (u≠ε) with L(⊓Reg_x) ⊆ stab(R,Q_ν), drop the
// leading x: replace x·u ∈ R by u ∈ R. The inclusion is decided as the
// product-emptiness test L(⊓Reg_x) ∩ ~stab(R,Q_ν) = ∅ (Section 3.3),
// adding one co-view component for ~stab.
// -----------------------------------------------------------------------
bool nielsen_graph::apply_cycle_subsumption(nielsen_node* node) {
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
str_mem const& mem = node->str_mems()[mi];
SASSERT(mem.well_formed());
if (mem.is_primitive())
if (!mem.is_plain() || mem.is_primitive())
continue;
euf::snode const* first = mem.m_str->first();
SASSERT(first);
if (!first->is_var())
continue;
euf::snode const* R = mem.m_regex;
// Get the current stabilizer for this regex (no novelty guard).
euf::snode const* stabilizer = get_current_stabilizer(mem.m_regex);
if (!stabilizer || m_seq.re.is_epsilon(stabilizer->get_expr()))
// R must lie on a detected cycle with a marked SCC snapshot.
uint_set scc;
if (!collect_scc_for_projection(R, scc))
continue;
const unsigned nu = m_projection_extract_idx;
if (nu == 0)
continue;
// Collect primitive regex constraints on `first`.
// Decide L(⊓Reg_x) ⊆ stab(R,Q_ν) as ⊓Reg_x ∩ ~stab = ∅.
vector<prod_comp> comps;
dep_tracker x_dep = nullptr;
euf::snode const* x_regex = m_seq_regex->collect_primitive_regex_intersection(
first, *node, m_dep_mgr, x_dep);
if (!x_regex)
collect_var_components(first, *node, comps, x_dep);
comps.push_back(prod_comp::mk_view(R, R, nu, /*complemented*/ true));
if (check_product_emptiness(comps, 5000) != l_true)
continue;
// Check L(x_regex) ⊆ L(stabilizer).
if (m_seq_regex->is_language_subset(x_regex, stabilizer) != l_true)
continue;
// Subsume: replace x·rest ∈ r with rest ∈ r.
// Subsume: replace x·u ∈ R with u ∈ R.
euf::snode const* tail = m_sg.drop_first(mem.m_str);
SASSERT(tail);
nielsen_node* child = mk_child(node);
mk_edge(node, child, "cycle subs", true);
auto& child_mems = child->str_mems();
for (unsigned k = 0; k < child_mems.size(); ++k) {
if (child_mems[k] == mem) {
child_mems[k] = child_mems.back();
child_mems.pop_back();
for (auto& cm : child->str_mems()) {
if (cm == mem) {
cm.m_str = tail;
cm.m_dep = m_dep_mgr.mk_join(cm.m_dep, x_dep);
break;
}
}
const dep_tracker combined_dep = m_dep_mgr.mk_join(mem.m_dep, x_dep);
child->add_str_mem(str_mem(tail, mem.m_regex, combined_dep));
TRACE(seq, tout << "cycle_subsumption: dropped x=" << mk_pp(first->get_expr(), m)
<< " from " << mk_pp(mem.m_str->get_expr(), m)
<< "" << mk_pp(mem.m_regex->get_expr(), m) << "\n");
<< "" << mk_pp(R->get_expr(), m) << " nu=" << nu << "\n");
return true;
}
return false;
}
// -----------------------------------------------------------------------
// Modifier: apply_cycle_decomposition
// Cycle decomposition: for a str_mem x·s ∈ R where a partial DFA
// cycle is detected, project SCC onto stabilizer constraint b.
// Rewrites x into x'·x'' with x' ∈ b*, x'' ∈ complement((b ∩ complement(eps)) · Sigma*).
// Modifier: apply_cycle_decomposition (paper Section "Cycle Decomposition")
//
// Here stabilizer_re = π_{Q_SCC,{R}}(R) is the projection operator denoting
// the language of all paths from R back to R that stay within the explored
// subautomaton Q_SCC (including ε), i.e. s* for the non-empty cycle language
// s. It is kept symbolic; its derivative/nullability are evaluated lazily by
// the projection-aware sgraph (paper §3.3) rather than materialized.
// For a membership x·u ∈ R whose leading variable x sits on a detected cycle
// (R lies in an SCC of the partial DFA) and that does not already carry a
// matching cycle guard for the current SCC snapshot, split
// x → x'·x''
// and attach the two *view*/*guard* primitive constraints (Section 3.3):
// x' ∈ stab(R, Q_ν) -- stabilizer view (F = {R})
// noloop(x'', R, Q_ν) -- cycle guard (two-mode monitor)
// The leading x' is immediately subsumed (its only constraint is the view,
// and stab ⊆ stab trivially), so it is dropped from the primary constraint.
//
// The constraint on x'' prevents divergence: x'' may not begin with any
// non-empty word from L(stabilizer_re), so it cannot re-enter the cycle.
// Unlike the old projection-operator encoding, the view and guard are kept
// as *constraint metadata* over the plain state R and the ν-indexed explored
// subautomaton Q_ν — nothing is materialized as a regex, which keeps the
// reachable state space finite (termination).
// -----------------------------------------------------------------------
bool nielsen_graph::apply_cycle_decomposition(nielsen_node* node) {
@ -3518,7 +3490,7 @@ namespace seq {
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
str_mem const& mem = node->str_mems()[mi];
SASSERT(mem.well_formed());
if (mem.is_primitive())
if (!mem.is_plain() || mem.is_primitive())
continue;
euf::snode const* first = mem.m_str->first();
SASSERT(first);
@ -3526,29 +3498,47 @@ namespace seq {
continue;
euf::snode const* x = first;
euf::snode const* stabilizer_re = nullptr;
euf::snode const* R = mem.m_regex;
// Eagerly precompute partial DFA edges from this regex so that
// collect_scc_for_projection can detect cycles without waiting
// for apply_regex_var_split to create per-minterm children.
precompute_partial_dfa(mem.m_regex, 2);
precompute_partial_dfa(R, 64);
if (!try_extract_partial_projection(mem.m_regex, stabilizer_re))
// R must sit on a cycle (an SCC of the partial DFA).
uint_set scc;
if (!collect_scc_for_projection(R, scc))
continue;
// Mark the SCC edges; this gives a ν identifying the current Q_SCC.
// (We trigger on absence of a matching guard, NOT on novelty.)
mark_scc_projection_edges(scc);
const unsigned nu = m_projection_extract_idx;
if (nu == 0)
continue;
fprintf(stderr, "DEC R=%u nu=%u sccsz=%u x=%u nedges=%u\n", R->id(), nu, scc.num_elems(), x->id(), (unsigned)m_partial_dfa_edges.size()); fflush(stderr);
// Trigger condition: x must not already carry a cycle guard for the
// current SCC snapshot ν. All states of one SCC share a single ν, so
// the guard is keyed on ν rather than on the (changing) head R: as the
// derivation walks the SCC the head moves, but the lineage is already
// guarded against re-traversing the cycle. An already-decomposed
// variable whose guard refers to a strictly smaller, stale ν is
// re-decomposed to adopt the enlarged SCC.
bool already_guarded = false;
for (str_mem const& g : node->str_mems()) {
if (g.is_guard() && g.m_nu >= nu
&& g.m_str && g.m_str->first() == x) {
already_guarded = true;
break;
}
}
if (already_guarded)
continue;
SASSERT(stabilizer_re && stabilizer_re->get_expr());
// stabilizer_re is epsilon if the SCC has no non-trivial cycles — skip.
if (m_seq.re.is_epsilon(stabilizer_re->get_expr()))
continue;
// Get sorts needed to build the xpp regex.
sort* re_sort = stabilizer_re->get_expr()->get_sort();
sort* seq_sort = nullptr;
VERIFY(m_seq.is_re(stabilizer_re->get_expr(), seq_sort));
sort* seq_sort = x->get_expr()->get_sort();
// Construct the replacement x = x' x''
euf::snode const* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), stabilizer_re->get_expr(), seq_sort));
euf::snode const* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), R->get_expr(), seq_sort));
euf::snode const* xpp = get_tail(x, compute_length_expr(xp).get());
euf::snode const* xp_xpp = m_sg.mk_concat(xp, xpp);
@ -3563,32 +3553,17 @@ namespace seq {
SASSERT(child->m_str_mem[mi].m_str->first() == xp);
child->m_str_mem[mi].m_str = dir_drop(m_sg, child->m_str_mem[mi].m_str, 1, true);
// x' ∈ stabilizer_re (= s*, all repetitions of the detected cycle)
child->add_str_mem(str_mem(xp, stabilizer_re, mem.m_dep));
// x' ∈ stab(R, Q_ν) (stabilizer view, F = {R}, current state = R)
child->add_str_mem(str_mem::mk_view(xp, R, R, nu, mem.m_dep));
// x'' ∈ complement((stabilizer_re ∩ complement(ε)) · Σ*)
//
// stabilizer_re ∩ complement(ε) = non-empty words in the cycle language
// (s_ne) · Σ* = all words whose prefix is a full non-empty cycle
// complement(...) = words that do NOT start with a full non-empty cycle
//
// This ensures x'' cannot begin another complete cycle from the same
// SCC entry point, which is what prevents infinite unfolding.
const expr_ref eps_re(m_seq.re.mk_epsilon(seq_sort), m);
const expr_ref compl_eps(m_seq.re.mk_complement(eps_re), m);
const expr_ref s_ne(m_seq.re.mk_inter(stabilizer_re->get_expr(), compl_eps), m);
const expr_ref sigma_star(m_seq.re.mk_full_seq(re_sort), m);
const expr_ref s_ne_sigma_star(m_seq.re.mk_concat(s_ne, sigma_star), m);
const expr_ref xpp_re(m_seq.re.mk_complement(s_ne_sigma_star), m);
euf::snode const* xpp_snode = m_sg.mk(xpp_re);
child->add_str_mem(str_mem(xpp, xpp_snode, mem.m_dep));
// noloop(x'', R, Q_ν) (cycle guard, two-mode monitor, state = R)
child->add_str_mem(str_mem::mk_guard(xpp, R, R, nu, mem.m_dep));
TRACE(seq, tout << "cycle_decomp: x=" << mk_pp(x->get_expr(), m)
<< " stabilizer=" << mk_pp(stabilizer_re->get_expr(), m)
<< " xpp_re=" << xpp_re << "\n");
<< " R=" << mk_pp(R->get_expr(), m) << " nu=" << nu << "\n");
#ifdef Z3DEBUG
std::string dot = partial_dfa_to_dot(mem.m_regex, false);
std::string dot = partial_dfa_to_dot(R, false);
#endif
return true;
}
@ -3686,11 +3661,9 @@ namespace seq {
if (mem.m_str->is_empty() || mem.is_primitive())
continue;
// The split engine works on plain regex AST and does not understand the
// projection operator (re.proj) — it would give up on it anyway.
// Projection-constrained memberships are handled by the
// cycle-decomposition path, so skip them here.
if (mem.m_regex->has_projection())
// View / guard memberships (Section 3.3) are handled by the
// cycle machinery and the synchronous product, not by factorization.
if (!mem.is_plain())
continue;
split_set pairs;
@ -4083,8 +4056,12 @@ namespace seq {
expr_ref c2(m), th2(m), el2(m);
if (!bool_rewriter(m).decompose_ite(r, c2, th2, el2)) {
// No ite remaining: leaf → create child node with regex updated to r
euf::snode const* new_regex_snode = m_sg.mk(r);
// No ite remaining: leaf → create child node with regex updated to r.
// Canonicalize with th_rewriter so that the resolved leaf shares
// its snode id with the corresponding partial-DFA state (which is
// built by brzozowski_deriv); otherwise un-simplified residuals
// like (a|∅)·R≠a·R break view/guard Q-membership and lap checks.
euf::snode const* new_regex_snode = mk_rewrite(r);
nielsen_node *child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, "regex if", true);
for (const auto f : cs) {
@ -4093,6 +4070,12 @@ namespace seq {
for (str_mem &cm : child->str_mems()) {
if (cm == mem) {
cm.m_regex = new_regex_snode;
// A guard whose symbolic step lands back on the cycle
// head closed a lap within Q → this branch is dead.
if (cm.is_guard() && new_regex_snode == cm.m_root) {
child->set_general_conflict();
child->set_conflict(backtrack_reason::regex, cm.m_dep);
}
break;
}
}
@ -4938,28 +4921,184 @@ namespace seq {
// Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329)
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// Synchronous product over plain / view / guard / co-view components.
// -----------------------------------------------------------------------
lbool nielsen_graph::comp_accepting(prod_comp const& c) const {
if (c.m_dead)
return l_false;
switch (c.m_kind) {
case mem_kind::plain:
return m_sg.re_nullable(c.m_state);
case mem_kind::stab_view:
if (c.m_complemented)
return (c.m_sink || c.m_state != c.m_root) ? l_true : l_false;
return (c.m_state == c.m_root) ? l_true : l_false;
case mem_kind::no_loop:
return l_true; // guard accepts on every prefix it has not failed on
}
return l_undef;
}
nielsen_graph::prod_comp nielsen_graph::comp_step(prod_comp const& c, euf::snode const* mt) {
prod_comp r = c;
if (c.m_dead)
return r;
switch (c.m_kind) {
case mem_kind::plain: {
euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt);
if (!d || d->is_fail()) r.m_dead = true; else r.m_state = d;
return r;
}
case mem_kind::stab_view: {
if (c.m_complemented) {
if (c.m_sink) return r; // Σ*
if (!projection_state_in_Q(c.m_state->get_expr(), c.m_nu)) { r.m_sink = true; return r; }
euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt);
if (!d || d->is_fail()) { r.m_sink = true; return r; } // ~∅ = Σ*
r.m_state = d;
return r;
}
if (!projection_state_in_Q(c.m_state->get_expr(), c.m_nu)) { r.m_dead = true; return r; }
euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt);
if (!d || d->is_fail()) { r.m_dead = true; return r; }
r.m_state = d;
return r;
}
case mem_kind::no_loop: {
if (c.m_sink) return r; // discharged: Σ*
if (!projection_state_in_Q(c.m_state->get_expr(), c.m_nu)) { r.m_sink = true; return r; }
euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt);
if (!d || d->is_fail()) { r.m_sink = true; return r; } // lap cannot close through ∅
if (d == c.m_root) { r.m_dead = true; return r; } // lap completed → forbidden
r.m_state = d;
return r;
}
}
return r;
}
lbool nielsen_graph::check_product_emptiness(vector<prod_comp> const& comps0, unsigned max_states) {
if (comps0.empty())
return l_false; // empty intersection = Σ* (non-empty)
auto encode = [](vector<prod_comp> const& cs) {
std::vector<unsigned> key;
key.reserve(cs.size() * 5);
for (auto const& c : cs) {
key.push_back(static_cast<unsigned>(c.m_kind));
key.push_back((c.m_complemented ? 1u : 0u) | (c.m_sink ? 2u : 0u) | (c.m_dead ? 4u : 0u));
key.push_back(c.m_state ? c.m_state->id() : UINT_MAX);
}
return key;
};
std::set<std::vector<unsigned>> visited;
vector<vector<prod_comp>> work;
work.push_back(comps0);
visited.insert(encode(comps0));
unsigned explored = 0;
while (!work.empty()) {
if (!m.inc())
return l_undef;
if (explored >= max_states)
return l_undef;
vector<prod_comp> cur = work.back();
work.pop_back();
++explored;
bool any_dead = false;
for (auto const& c : cur) if (c.m_dead) { any_dead = true; break; }
if (any_dead)
continue;
// simultaneously accepting?
bool all_acc = true, any_undef = false;
for (auto const& c : cur) {
const lbool a = comp_accepting(c);
if (a == l_false) { all_acc = false; break; }
if (a == l_undef) any_undef = true;
}
if (all_acc && !any_undef)
return l_false; // found a common word
// joint first-character partition = minterms of the intersection of
// all still-discriminating component states.
expr* combined = nullptr;
for (auto const& c : cur) {
if (c.m_sink || c.m_dead) continue;
combined = combined ? m_seq.re.mk_inter(combined, c.m_state->get_expr())
: c.m_state->get_expr();
}
if (!combined)
continue; // no discriminating state and not accepting: dead end
euf::snode_vector mts;
m_sg.compute_minterms(m_sg.mk(combined), mts);
for (euf::snode const* mt : mts) {
vector<prod_comp> nxt;
bool dead = false;
for (auto const& c : cur) {
prod_comp d = comp_step(c, mt);
if (d.m_dead) { dead = true; break; }
nxt.push_back(d);
}
if (dead)
continue;
if (visited.insert(encode(nxt)).second)
work.push_back(nxt);
}
}
return l_true; // exhausted with no accepting tuple → empty
}
bool nielsen_graph::collect_var_components(euf::snode const* var, nielsen_node const& node,
vector<prod_comp>& out, dep_tracker& dep) {
bool found = false;
for (auto const& mem : node.str_mems()) {
if (!mem.is_primitive())
continue;
if (mem.m_str->first() != var)
continue;
switch (mem.m_kind) {
case mem_kind::plain:
out.push_back(prod_comp::mk_plain(mem.m_regex));
break;
case mem_kind::stab_view:
out.push_back(prod_comp::mk_view(mem.m_regex, mem.m_root, mem.m_nu, false));
break;
case mem_kind::no_loop:
out.push_back(prod_comp::mk_guard(mem.m_regex, mem.m_root, mem.m_nu, mem.m_discharged));
break;
}
dep = m_dep_mgr.mk_join(dep, mem.m_dep);
found = true;
}
return found;
}
bool nielsen_graph::check_leaf_regex(nielsen_node const& node, dep_tracker& dep) {
SASSERT(m_seq_regex);
// Group str_mem constraints by variable (primitive constraints only)
u_map<std::pair<euf::snode_vector, dep_tracker>> var_regexes;
// distinct variables carrying a primitive constraint
uint_set seen;
for (auto const& mem : node.str_mems()) {
SASSERT(mem.is_primitive());
euf::snode const* const first = mem.m_str->first();
SASSERT(first && first->is_var());
auto &[fst, snd] = var_regexes.insert_if_not_there(first->id(), std::pair<euf::snode_vector, dep_tracker>());
fst.push_back(mem.m_regex);
snd = dep_mgr().mk_join(snd, mem.m_dep);
}
if (seen.contains(first->id()))
continue;
seen.insert(first->id());
// check intersection non-emptiness (also for single occurrences; it could be empty)
for (auto& [var_id, regexes] : var_regexes) {
const lbool result = m_seq_regex->check_intersection_emptiness(regexes.first, 5000);
vector<prod_comp> comps;
dep_tracker d = nullptr;
collect_var_components(first, node, comps, d);
const lbool result = check_product_emptiness(comps, 5000);
if (result == l_true) {
TRACE(seq, tout << "empty intersection\n");
// Intersection is empty — infeasible
dep = regexes.second;
dep = d;
return false;
}
}