diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index 1601a6afac..1faef3e097 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -111,11 +111,6 @@ namespace euf { if (m_seq.str.is_in_re(e)) return snode_kind::s_in_re; - // projection operator π_{Q,F}(state) modeled as the re.proj skolem. - if (m_seq.is_skolem(e) && - to_app(e)->get_decl()->get_parameter(0).get_symbol() == re_proj_name()) - return snode_kind::s_projection; - return snode_kind::s_var; } @@ -290,18 +285,6 @@ namespace euf { n->m_length = 1; break; - case snode_kind::s_projection: - // re.proj(state, root, nu): args 0/1 are regexes, arg 2 is the - // snapshot index (an integer numeral). Ground/classical follow the - // regex arguments only. - SASSERT(n->num_args() == 3); - n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground(); - n->m_regex_free = false; - n->m_is_classical = false; - n->m_level = 1; - n->m_length = 1; - break; - default: // NSB review: is this the correct defaults for unclassified nodes? // Is this UNREACHABLE()? @@ -311,14 +294,6 @@ namespace euf { n->m_length = 1; break; } - - // A regex (sub)tree carries a projection iff it is a projection node or - // any argument does. Computed uniformly here so all callers can use the - // cheap snode flag instead of re-walking the AST. - n->m_has_projection = (n->m_kind == snode_kind::s_projection); - for (unsigned i = 0; i < n->num_args() && !n->m_has_projection; ++i) - if (n->arg(i)->has_projection()) - n->m_has_projection = true; } static const unsigned HASH_BASE = 31; @@ -575,272 +550,10 @@ namespace euf { return n; } - bool sgraph::is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const { - if (!e || !m_seq.is_skolem(e)) - return false; - app* ap = to_app(e); - if (ap->get_decl()->get_parameter(0).get_symbol() != re_proj_name()) - return false; - if (ap->get_num_args() != 3) - return false; - arith_util au(m); - rational r; - if (!au.is_numeral(ap->get_arg(2), r) || !r.is_unsigned()) - return false; - state = ap->get_arg(0); - root = ap->get_arg(1); - nu = r.get_unsigned(); - return true; - } - - expr_ref sgraph::mk_re_proj(expr* state, expr* root, unsigned nu) { - SASSERT(state && root); - arith_util au(m); - expr* args[3] = { state, root, au.mk_int(nu) }; - sort* re_sort = state->get_sort(); - return expr_ref(m_seq.mk_skolem(re_proj_name(), 3, args, re_sort), m); - } - - // A symbolic-character derivative is a linear form: a *dispatch* over the - // character built from `ite` (predicate selection) and `union` (the linear - // factors α_i·r_i), bottoming out in ordinary derivative *states* r_i. We - // recognize this dispatch skeleton so that π can be pushed all the way to - // the concrete state leaves (where it is meaningful) rather than wrapped - // around a non-atomic union/ite (which is not a DFA state and breaks - // projection_state_in_Q / nullability). A plain regex state (even one that - // is itself a union, e.g. δ of a union regex) is NOT dispatch. - bool sgraph::is_char_dispatch(expr* e) const { - if (m.is_ite(e)) - return true; - if (m_seq.re.is_empty(e)) - return true; - expr* a = nullptr, *b = nullptr; - if (m_seq.re.is_union(e, a, b)) - return is_char_dispatch(a) && is_char_dispatch(b); - return false; - } - - expr_ref sgraph::wrap_proj(expr* e, expr* root, unsigned nu) { - // Push π_{Q,F} through the dispatch skeleton (ite / dispatch-union) to - // the concrete state leaves (paper §4). - expr* c = nullptr, *th = nullptr, *el = nullptr; - if (m.is_ite(e, c, th, el)) { - expr_ref t = wrap_proj(th, root, nu); - expr_ref f = wrap_proj(el, root, nu); - return expr_ref(m.mk_ite(c, t, f), m); - } - expr* a = nullptr, *b = nullptr; - if (m_seq.re.is_union(e, a, b) && is_char_dispatch(e)) { - expr_ref wa = wrap_proj(a, root, nu); - expr_ref wb = wrap_proj(b, root, nu); - return expr_ref(m_seq.re.mk_union(wa, wb), m); - } - // π(∅) ≡ ∅: a dead leaf stays dead. - if (m_seq.re.is_empty(e)) - return expr_ref(e, m); - return mk_re_proj(e, root, nu); - } - - // 3-valued Kleene conjunction / disjunction over lbool. - static lbool lb_and(lbool a, lbool b) { - if (a == l_false || b == l_false) return l_false; - if (a == l_undef || b == l_undef) return l_undef; - return l_true; - } - static lbool lb_or(lbool a, lbool b) { - if (a == l_true || b == l_true) return l_true; - if (a == l_undef || b == l_undef) return l_undef; - return l_false; - } - lbool sgraph::re_nullable(snode const* re) { if (!re) return l_undef; - // Projection-free regexes: defer to the standard regex info. - if (!re->has_projection()) - return m_seq.re.get_info(re->get_expr()).nullable; - - switch (re->kind()) { - case snode_kind::s_projection: { - // nullable(π_{{root}}(state)) = [state ∈ F] = [state ≡ root]. - // Regex expressions are perfectly shared, so syntactic pointer - // equality coincides with the expr-id equality used for cycle - // detection in the partial DFA. - expr* state = nullptr, *root = nullptr; unsigned nu = 0; - VERIFY(is_re_proj(re->get_expr(), state, root, nu)); - return (state == root) ? l_true : l_false; - } - case snode_kind::s_complement: - return ~re_nullable(re->arg(0)); - case snode_kind::s_intersect: - case snode_kind::s_concat: - return lb_and(re_nullable(re->arg(0)), re_nullable(re->arg(1))); - case snode_kind::s_union: - return lb_or(re_nullable(re->arg(0)), re_nullable(re->arg(1))); - case snode_kind::s_star: - return l_true; - case snode_kind::s_plus: - return re_nullable(re->arg(0)); - case snode_kind::s_ite: - // ε-acceptance of a symbolic-derivative residual depends on the - // (unresolved) character predicate, so it is genuinely unknown - // until the ite is split (apply_regex_if_split). - return l_undef; - default: - // Not expected to carry a projection in our constructions; fall - // back to the standard info (sound for projection-free shapes). - return m_seq.re.get_info(re->get_expr()).nullable; - } - } - - snode const* sgraph::deriv_proj(snode const* re, expr* ch) { - SASSERT(re && re->get_expr()); - expr* re_expr = re->get_expr(); - sort* re_sort = re_expr->get_sort(); - - // Projection-free subterm: the standard derivative already handles it - // (and keeps the result canonical, matching partial-DFA state ids). - if (!re->has_projection()) - return mk(m_rewriter.mk_derivative(ch, re_expr)); - - // Language-preserving combinators that DISTRIBUTE over ite. The - // symbolic-character derivative of a regex is a linear form: a - // *top-level* ite dispatching on character predicates over the (single) - // symbolic char, with ordinary derivative regexes at the leaves. Both - // projection leaves (via wrap_proj) and projection-free subterms (via - // mk_derivative) already yield top-level ites; if the surrounding - // Boolean/concat operators did not push themselves into the ite leaves, - // the ite would end up *buried* (e.g. ~(ite(...)·Σ*)) where - // apply_regex_if_split — which only matches a top-level ite — can no - // longer resolve the symbolic char, stalling the constraint and causing - // unbounded variable unwinding. Distributing keeps the result a proper - // top-level linear form, exactly as the standard mk_derivative does. - // These combinators also fold the trivial regex identities so the - // projection skolem and its inner state id are preserved verbatim. - // The symbolic-character derivative is a dispatch over the char built - // from `ite` (predicate selection) AND `union` (the linear factors). - // The combinators below distribute over BOTH so the dispatch skeleton - // stays at the top with concrete (π-wrapped) state leaves — otherwise a - // surrounding operator buries the dispatch and apply_regex_if_split can - // no longer resolve the symbolic char (the multi-cycle-SCC divergence). - // We only distribute over a union that is itself a char-dispatch (so a - // semantic state-union — e.g. δ of a union regex — is left intact and - // not needlessly expanded). All distributions are language-preserving - // (∩ and · distribute over ⊔; ~(A⊔B) = ~A ∩ ~B by De Morgan). - auto is_empty = [&](expr* r) { return m_seq.re.is_empty(r); }; - auto is_full = [&](expr* r) { return m_seq.re.is_full_seq(r); }; - auto is_eps = [&](expr* r) { return m_seq.re.is_epsilon(r); }; - auto is_ite = [&](expr* r, expr*& c, expr*& t, expr*& e) { return m.is_ite(r, c, t, e); }; - auto is_disp_union = [&](expr* r, expr*& a, expr*& b) { - return m_seq.re.is_union(r, a, b) && is_char_dispatch(r); - }; - - std::function mk_union = [&](expr* x, expr* y) -> expr* { - expr *c = nullptr, *t = nullptr, *e = nullptr; - if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_union(t, y), mk_union(e, y)); - if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_union(x, t), mk_union(x, e)); - if (is_empty(x)) return y; - if (is_empty(y)) return x; - if (x == y) return x; - if (is_full(x) || is_full(y)) return m_seq.re.mk_full_seq(re_sort); - return m_seq.re.mk_union(x, y); - }; - std::function mk_inter = [&](expr* x, expr* y) -> expr* { - expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr; - if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_inter(t, y), mk_inter(e, y)); - if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_inter(x, t), mk_inter(x, e)); - if (is_disp_union(x, a, b)) return mk_union(mk_inter(a, y), mk_inter(b, y)); - if (is_disp_union(y, a, b)) return mk_union(mk_inter(x, a), mk_inter(x, b)); - if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort); - if (is_full(x)) return y; - if (is_full(y)) return x; - if (x == y) return x; - return m_seq.re.mk_inter(x, y); - }; - std::function mk_concat = [&](expr* x, expr* y) -> expr* { - expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr; - if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_concat(t, y), mk_concat(e, y)); - if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_concat(x, t), mk_concat(x, e)); - if (is_disp_union(x, a, b)) return mk_union(mk_concat(a, y), mk_concat(b, y)); - if (is_disp_union(y, a, b)) return mk_union(mk_concat(x, a), mk_concat(x, b)); - if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort); - if (is_eps(x)) return y; - if (is_eps(y)) return x; - return m_seq.re.mk_concat(x, y); - }; - std::function mk_compl = [&](expr* x) -> expr* { - expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr; - if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_compl(t), mk_compl(e)); - if (is_disp_union(x, a, b)) return mk_inter(mk_compl(a), mk_compl(b)); // De Morgan - if (is_empty(x)) return m_seq.re.mk_full_seq(re_sort); - if (is_full(x)) return m_seq.re.mk_empty(re_sort); - expr* inner = nullptr; - if (m_seq.re.is_complement(x, inner)) return inner; - return m_seq.re.mk_complement(x); - }; - - switch (re->kind()) { - case snode_kind::s_projection: { - expr* state = nullptr, *root = nullptr; unsigned nu = 0; - VERIFY(is_re_proj(re_expr, state, root, nu)); - // δ_a(π_{Q,{root}}(state)) = π_{Q,{root}}(δ_a(state)) if state ∈ Q, - // else ⊥. The gate is on the *current* state (paper §3.3). - if (!m_proj_oracle || !m_proj_oracle->projection_state_in_Q(state, nu)) - return mk(m_seq.re.mk_empty(re_sort)); - snode const* dstate = deriv_proj(re->arg(0), ch); // arg(0) ≡ state - if (!dstate || dstate->is_fail() || m_seq.re.is_empty(dstate->get_expr())) - return mk(m_seq.re.mk_empty(re_sort)); - // δ(state) may be concrete (one state) or an ite-term (symbolic - // character). Wrap π around the result, descending into ite leaves. - return mk(wrap_proj(dstate->get_expr(), root, nu)); - } - case snode_kind::s_ite: { - // ite-structured residual (from a symbolic-character derivative): - // δ_a(ite(c, th, el)) = ite(c, δ_a(th), δ_a(el)). - snode const* dth = deriv_proj(re->arg(1), ch); - snode const* del = deriv_proj(re->arg(2), ch); - return mk(expr_ref(m.mk_ite(re->arg(0)->get_expr(), dth->get_expr(), del->get_expr()), m)); - } - case snode_kind::s_complement: { - snode const* d = deriv_proj(re->arg(0), ch); - return mk(expr_ref(mk_compl(d->get_expr()), m)); - } - case snode_kind::s_intersect: { - snode const* d0 = deriv_proj(re->arg(0), ch); - snode const* d1 = deriv_proj(re->arg(1), ch); - return mk(expr_ref(mk_inter(d0->get_expr(), d1->get_expr()), m)); - } - case snode_kind::s_union: { - snode const* d0 = deriv_proj(re->arg(0), ch); - snode const* d1 = deriv_proj(re->arg(1), ch); - return mk(expr_ref(mk_union(d0->get_expr(), d1->get_expr()), m)); - } - case snode_kind::s_concat: { - // δ_a(R·S) = δ_a(R)·S ⊔ (nullable(R) ? δ_a(S) : ∅) - snode const* d0 = deriv_proj(re->arg(0), ch); - expr* head = mk_concat(d0->get_expr(), re->arg(1)->get_expr()); - if (re_nullable(re->arg(0)) == l_true) { - snode const* d1 = deriv_proj(re->arg(1), ch); - head = mk_union(head, d1->get_expr()); - } - return mk(expr_ref(head, m)); - } - case snode_kind::s_star: { - // δ_a(R*) = δ_a(R)·R* - snode const* d = deriv_proj(re->arg(0), ch); - return mk(expr_ref(mk_concat(d->get_expr(), re_expr), m)); - } - case snode_kind::s_plus: { - // δ_a(R+) = δ_a(R)·R* - snode const* d = deriv_proj(re->arg(0), ch); - expr_ref star(m_seq.re.mk_star(re->arg(0)->get_expr()), m); - return mk(expr_ref(mk_concat(d->get_expr(), star), m)); - } - default: - // A projection nested under an operator we do not specially derive - // is not produced by our constructions. Fall back conservatively. - return mk(m_rewriter.mk_derivative(ch, re_expr)); - } + return m_seq.re.get_info(re->get_expr()).nullable; } snode const* sgraph::brzozowski_deriv(snode const* re, snode const* elem) { @@ -891,24 +604,17 @@ namespace euf { } } - // If the regex contains a projection operator, the generic rewriter - // cannot derive it. Use the projection-aware structural derivative, - // which delegates projection-free subterms back to mk_derivative. - // Canonicalize the result with th_rewriter (which treats the re.proj - // skolem as an opaque leaf and only applies ACI / identity rules to the - // surrounding Boolean/concat structure). Without this, equivalent - // derivative states get distinct snode ids and BFS emptiness checks - // fail to deduplicate, exploring an exploded state space. - if (re->has_projection()) { - snode const* d = deriv_proj(re, elem_expr); - expr_ref e(d->get_expr(), m); - th_rewriter trw(m); - trw(e); - return mk(e); - } - - const expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr); + expr_ref result(m_rewriter.mk_derivative(elem_expr, re_expr), m); SASSERT(result); + // Canonicalize with th_rewriter so that derivative states share a snode + // id regardless of how they were reached (concrete vs. via a symbolic + // ite resolved by apply_regex_if_split, which also th_rewrites its leaf). + // Without this, semantically equal but syntactically different residuals + // — notably intersections like (A∩B) vs (B∩A) or (a|∅)·R vs a·R — get + // distinct ids, breaking partial-DFA Q-membership and view/guard lap + // detection (the multi-cycle / intersection divergence). + th_rewriter trw(m); + trw(result); return mk(result); } @@ -962,14 +668,6 @@ namespace euf { if (m_seq.re.is_empty(e)) return; - // projection operator: collect predicates from the regex arguments only - // (the third argument is the integer snapshot index, not a regex). - if (re->is_projection()) { - collect_re_predicates(re->arg(0), preds); - collect_re_predicates(re->arg(1), preds); - return; - } - // Expected compound regex operators are handled by recursion below. // If a leaf survives to this point, it is an unhandled regex form. if (re->num_args() == 0) { diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 3d2a43a21d..1687883249 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -69,19 +69,6 @@ namespace euf { class seq_plugin; - // Oracle queried by the projection-aware derivative of sgraph. - // The projection operator π_{Q,F}(state) (a re.proj skolem) has its set of - // explored states Q stored externally (the nielsen_graph partial DFA), keyed - // by a snapshot index nu. The sgraph consults this oracle to decide whether - // the current state lies in Q when deriving a projection. - class projection_oracle { - public: - virtual ~projection_oracle() = default; - // true iff `state` (a regex expression) belongs to the explored - // subautomaton snapshot identified by `nu`. - virtual bool projection_state_in_Q(expr* state, unsigned nu) = 0; - }; - class sgraph { struct stats { @@ -121,9 +108,6 @@ namespace euf { unsigned_vector m_alias_trail; // expression ids unsigned_vector m_alias_trail_lim; // scope boundaries - // Oracle answering "state ∈ Q_nu" for projection derivatives. Not owned. - projection_oracle* m_proj_oracle = nullptr; - snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode const** args); snode_kind classify(expr* e) const; void compute_metadata(snode* n); @@ -173,28 +157,7 @@ namespace euf { // for deriving symbolic variables. snode const* brzozowski_deriv(snode const* re, snode const* elem); - // Register the oracle consulted when deriving projection operators. - // Passing nullptr unregisters. Not owned. - void set_projection_oracle(projection_oracle* o) { m_proj_oracle = o; } - - // Projection operator support (π_{Q,F}(state) modeled as re.proj skolem). - // Recognize and destructure a re.proj skolem expression. - bool is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const; - // Build the re.proj skolem expression for π_{{root}}(state) at snapshot nu. - expr_ref mk_re_proj(expr* state, expr* root, unsigned nu); - // True if e is a symbolic-character dispatch skeleton (ite / union of - // dispatch, bottoming out in ∅) rather than a concrete regex state. - bool is_char_dispatch(expr* e) const; - // Wrap a (possibly ite/union-structured) symbolic-derivative result in - // the projection operator, propagating π into every dispatch leaf (§4). - expr_ref wrap_proj(expr* e, expr* root, unsigned nu); - // Projection-aware Brzozowski derivative w.r.t. a character expr - // (concrete or symbolic). - snode const* deriv_proj(snode const* re, expr* ch); - - // Projection-aware nullability: lifts re.get_info().nullable to regexes - // that may contain projection operators. Returns l_true / l_false - // (l_undef only if an underlying projection-free subterm is undef). + // Nullability of a regex snode (thin wrapper over re.get_info().nullable). lbool re_nullable(snode const* re); // Decode a character expression that may be represented as a const-char, diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index 4412a341f3..1faba384f1 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -57,16 +57,9 @@ namespace euf { s_range, // character range [lo,hi] (OP_RE_RANGE) s_ite, // ite (OP_ITE) s_to_re, // string to regex (OP_SEQ_TO_RE) - s_in_re, // regex membership (OP_SEQ_IN_RE) - s_projection // projection operator π_{Q,F}(state) (re.proj skolem) + s_in_re // regex membership (OP_SEQ_IN_RE) }; - // Skolem decl-name marking a regex projection operator π_{Q,F}(state). - // The skolem has signature re.proj(state : RegLan, root : RegLan, nu : Int) - // where F = {root} (singleton accepting state) and nu identifies the - // explored subautomaton snapshot Q (see seq_nielsen partial DFA). - inline symbol re_proj_name() { return symbol("re.proj"); } - class snode { expr *m_expr = nullptr; // assumed to be non-null snode_kind m_kind = snode_kind::s_var; @@ -77,7 +70,6 @@ namespace euf { bool m_ground = true; // no uninterpreted string variables bool m_regex_free = true; // no regex constructs bool m_is_classical = true; // classical regular expression - bool m_has_projection = false; // contains a projection operator (re.proj) unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons) unsigned m_length = 0; // token count, number of leaf tokens in the tree @@ -193,10 +185,6 @@ namespace euf { bool is_classical() const { return m_is_classical; } - // true iff this regex (sub)tree contains a projection operator (re.proj). - bool has_projection() const { - return m_has_projection; - } unsigned level() const { return m_level; } @@ -296,10 +284,6 @@ namespace euf { } return true; } - bool is_projection() const { - return m_kind == snode_kind::s_projection; - } - // is this a leaf token (analogous to ZIPT's StrToken as opposed to Str) bool is_token() const { switch (m_kind) { diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index c3712ebf2a..9314355a68 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -39,8 +39,10 @@ NSB review: #include #include #include +#include #include #include +#include namespace seq { @@ -261,19 +263,33 @@ namespace seq { // ----------------------------------------------- bool str_mem::is_primitive() const { - return m_str && m_str->length() == 1 && m_str->is_var() && m_regex->is_ground(); + // A still-unresolved symbolic-derivative residual (ite) is not a settled + // primitive — apply_regex_if_split must resolve it first. + return m_str && m_str->length() == 1 && m_str->is_var() && m_regex->is_ground() + && m_regex->kind() != euf::snode_kind::s_ite; } bool str_mem::is_trivial(nielsen_node const* n) const { - if (!(m_str && m_regex && m_str->is_empty())) + if (!(m_str && m_regex)) return false; - // Projection-aware nullability (handles re.proj operators in m_regex). + if (m_kind == mem_kind::no_loop) + // guard: discharged ⇒ Σ* (accepts all); ε has no non-empty lap-prefix. + return m_discharged || m_str->is_empty(); + if (!m_str->is_empty()) + return false; + if (m_kind == mem_kind::stab_view) + // ε ∈ stab(root,Q) iff current state ≡ root (i.e. root ∈ F={root}). + return m_regex == m_root; return n->graph().sg().re_nullable(m_regex) == l_true; } bool str_mem::is_contradiction(nielsen_node const* n) const { if (!(m_str && m_regex && m_str->is_empty())) return false; + if (m_kind == mem_kind::no_loop) + return false; // guard acceptance is always true on the empty word + if (m_kind == mem_kind::stab_view) + return m_regex != m_root; // ε ∉ stab(root,Q) when state ≢ root return n->graph().sg().re_nullable(m_regex) == l_false; } @@ -538,13 +554,9 @@ namespace seq { : m(sg.get_manager()), a(sg.get_manager()), m_seq(sg.get_seq_util()), m_sg(sg), m_rw(m), m_sk(m, m_rw), m_length_solver(solver), m_context_solver(ctx_solver), m_partial_dfa_pin(sg.get_manager()), m_parikh(alloc(seq_parikh, sg)), m_seq_regex(alloc(seq::seq_regex, sg)) { - // Answer projection-state membership queries during projection-aware - // derivatives (the sgraph cannot reach the partial DFA otherwise). - m_sg.set_projection_oracle(this); } nielsen_graph::~nielsen_graph() { - m_sg.set_projection_oracle(nullptr); dealloc(m_parikh); dealloc(m_seq_regex); reset(); @@ -692,13 +704,6 @@ namespace seq { n->add_constraint(constraint(le, dep, m)); } - euf::snode const* nielsen_graph::mk_projection_term(euf::snode const* root_re, unsigned nu) { - SASSERT(root_re && root_re->get_expr()); - // π_{Q_nu, {root}}(root): current state == accepting state == root. - expr_ref proj = m_sg.mk_re_proj(root_re->get_expr(), root_re->get_expr(), nu); - return m_sg.mk(proj); - } - // ----------------------------------------------------------------------- // nielsen_node: simplify_and_init // ----------------------------------------------------------------------- @@ -1023,16 +1028,6 @@ namespace seq { return; if (src_re->is_fail() || dst_re->is_fail()) return; - // The partial DFA must track ONLY the concrete Brzozowski automaton of - // the original ground regexes. Projection operators (re.proj) are - // synthetic stabilizers minted by cycle decomposition; every fresh - // snapshot index ν is a new expression, so recording projection-derived - // states as DFA nodes makes the SCC grow without bound (a newly-marked - // edge on every extraction) and re-triggers cycle decomposition forever - // (e.g. a cycle variable x'∈π(R) being decomposed again and again - // against its own / a sibling regex's cycle). Reject such edges. - if (src_re->has_projection() || dst_re->has_projection()) - return; //euf::snode const* label_re = to_partial_label_regex(label); //SASSERT(label_re); @@ -1201,34 +1196,6 @@ namespace seq { return newly_marked; } - bool nielsen_graph::try_extract_partial_projection(euf::snode const* root_re, euf::snode const*& projection_re) { - SASSERT(root_re && root_re->get_expr()); - projection_re = nullptr; - if (!root_re->is_ground()) - return false; - - uint_set scc; - if (!collect_scc_for_projection(root_re, scc)) - return false; - - // Novelty = the SCC's edge set grew (some edge was previously unmarked). - // mark_scc_projection_edges advances the snapshot index only in that - // case, so a re-visit of an already-fully-marked SCC (from any of its - // states) marks nothing new and does not re-trigger a decomposition. - const unsigned newly_marked = mark_scc_projection_edges(scc); - if (newly_marked == 0) - return false; - - // Keep the stabilizer symbolic as a projection operator over the - // (just-marked) explored subautomaton snapshot. Its language is - // refined lazily through projection-aware derivatives. - projection_re = mk_projection_term(root_re, m_projection_extract_idx); - if (!projection_re) - return false; - - return true; - } - euf::snode const* nielsen_graph::get_slice(euf::snode const* v, expr* left, expr* right) { SASSERT(v && v->get_expr() && left && right); SASSERT(v->is_var()); @@ -1669,7 +1636,7 @@ namespace seq { // in both directions (left-to-right, then right-to-left), mirroring ZIPT. for (str_mem& mem : m_str_mem) { SASSERT(mem.well_formed()); - if (mem.is_primitive()) + if (mem.is_primitive() || !mem.is_plain()) continue; for (unsigned od = 0; od < 2; ++od) { bool fwd = od == 0; @@ -1715,7 +1682,7 @@ namespace seq { // consume symbolic characters via uniform derivatives for (str_mem& mem : m_str_mem) { SASSERT(mem.well_formed()); - if (mem.is_primitive()) + if (mem.is_primitive() || !mem.is_plain()) continue; while (mem.m_str && !mem.m_str->is_empty()) { @@ -1727,13 +1694,7 @@ namespace seq { euf::snode const* src_re = mem.m_regex; euf::snode const* next = nullptr; - if (src_re->has_projection()) { - // The generic symbolic derivative cannot see the projection - // operator; route through the projection-aware derivative, - // which yields the ite-residual with π propagated to leaves. - next = sg.brzozowski_deriv(src_re, tok); - } - else { + { seq_rewriter rw(m); expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m); @@ -1774,6 +1735,18 @@ namespace seq { } } + // consume leading characters of view / guard memberships (Section 3.3). + // m_regex is the current (plain) derivative state; we gate on whether it + // lies in Q_ν (projection_state_in_Q) and step with the ordinary + // derivative, keeping the view/guard annotation. + for (str_mem& mem : m_str_mem) { + SASSERT(mem.well_formed()); + if (mem.is_plain()) + continue; + if (consume_view_guard(mem)) + return simplify_result::conflict; + } + // check for regex memberships that are immediately infeasible for (str_mem& mem : m_str_mem) { if (mem.is_contradiction(this)) { @@ -1784,29 +1757,6 @@ namespace seq { } } - // Empty-language check for *primitive* memberships whose regex contains a - // projection operator. The regex widening pass below skips primitives, - // and is_contradiction only fires once the string side is empty. But a - // cycle decomposition constrains the remainder x'' by ~((π(r)∩~ε)·Σ*), - // and deriving this through the cycle can collapse it to the empty - // language: e.g. ~(π(r)·Σ*) ≡ ∅ because π(r) is nullable (r ∈ F), so - // π(r)·Σ* ≡ Σ*. Such a constraint is unsatisfiable, but without this - // eager check the variable would be unwound depth-deep before the - // conflict surfaces — the source of the multi-cycle-SCC blow-up. The - // check is cheap: is_empty_bfs on these projection regexes settles in a - // couple of states (a nullable projection short-circuits to non-empty). - SASSERT(m_graph.m_seq_regex); - for (str_mem const& mem : m_str_mem) { - if (!mem.is_primitive() || !mem.m_regex->has_projection()) - continue; - if (m_graph.m_seq_regex->is_empty_bfs(mem.m_regex) == l_true) { - TRACE(seq, tout << "empty primitive projection regex " << mem_pp(mem, m) << "\n"); - set_general_conflict(); - set_conflict(backtrack_reason::regex, mem.m_dep); - return simplify_result::conflict; - } - } - // remove trivial membership constraints once again unsigned wj = 0; for (unsigned j = 0; j < m_str_mem.size(); ++j) { @@ -1845,6 +1795,75 @@ namespace seq { return simplify_result::proceed; } + bool nielsen_node::consume_view_guard(str_mem& mem) { + SASSERT(!mem.is_plain()); + euf::sgraph& sg = m_graph.sg(); + ast_manager& m = sg.get_manager(); + seq_util& seq = m_graph.seq(); + + auto set_regex_conflict = [&]() { + set_general_conflict(); + set_conflict(backtrack_reason::regex, mem.m_dep); + }; + + while (mem.m_str && !mem.m_str->is_empty()) { + euf::snode const* tok = mem.m_str->first(); + if (!tok || !tok->is_char_or_unit()) + break; // leading token is a variable/power — nothing to consume yet + euf::snode const* c = mem.m_regex; + // The gate tests the CURRENT (plain) state c against Q_ν. An ite + // state means a previous symbolic step has not been resolved yet; + // leave it for apply_regex_if_split. + if (!c->is_ground() || c->kind() == euf::snode_kind::s_ite) + break; + const bool in_Q = m_graph.projection_state_in_Q(c->get_expr(), mem.m_nu); + if (!in_Q) { + if (mem.is_guard()) { + // The run left Q: no lap from the start can complete within Q + // anymore, so the guard is discharged (accepts every suffix). + mem.m_discharged = true; + return false; + } + // view: a^{-1} L_{Q,F}(c) = ∅ when c ∉ Q. + set_regex_conflict(); + return true; + } + // Step with brzozowski_deriv for BOTH concrete and symbolic tokens. + // This is essential: the partial-DFA states (and m_root) are produced + // by brzozowski_deriv, so its canonicalization must be used here too — + // otherwise the guard's resolved state never equals m_root by snode + // identity and laps never close. For a symbolic unit it yields a + // canonical ite residual that apply_regex_if_split later resolves. + euf::snode const* next = sg.brzozowski_deriv(c, tok); + if (!next) + break; + mem.m_str = sg.drop_left(mem.m_str, 1); + mem.m_regex = next; + if (next->is_fail()) { + // view: derivative collapsed to ∅ — unsatisfiable. + // guard: the lap can never close through ∅; treat as discharged. + if (mem.is_guard()) { mem.m_discharged = true; return false; } + set_regex_conflict(); + return true; + } + if (next->is_ground() && next->kind() != euf::snode_kind::s_ite) { + // concrete next state resolved immediately + if (mem.is_guard() && next == mem.m_root) { + // a non-empty prefix completed a lap r→…→r within Q. + set_regex_conflict(); + return true; + } + } + else { + // symbolic ite residual: defer to apply_regex_if_split, which + // resolves the character and (for guards) detects a lap landing + // back on the root. + break; + } + } + return false; + } + bool nielsen_node::is_satisfied() const { if (!m_str_deq.empty() || !m_str_eq.empty()) return false; @@ -3389,128 +3408,81 @@ namespace seq { } // ----------------------------------------------------------------------- - // Helper: record_dfa_edges_from_ite - // Walk an ite-structured symbolic derivative and record a concrete DFA edge - // for each non-fail branch. The ite has the form: - // ite(in_re(char_var, minterm_re), branch_re, rest_ite) - // Each (minterm_re, branch_re) pair gives one DFA edge src_re→branch_re. - // Called from simplify_and_init so that cycle_decomp can detect SCCs lazily - // as symbolic chars are consumed (mirroring the old concrete-char approach). - // ----------------------------------------------------------------------- - - void nielsen_graph::record_dfa_edges_from_ite(euf::snode const* src_re, expr* ite_deriv) { - if (!src_re || !ite_deriv) - return; - expr *c, *th, *el; - if (!m.is_ite(ite_deriv, c, th, el)) - return; - expr *char_ex, *minterm_re; - if (m_seq.str.is_in_re(c, char_ex, minterm_re)) { - if (!m_seq.re.is_empty(th)) { - euf::snode const* dst = m_sg.mk(th); - if (dst && !dst->is_fail() && dst->is_ground()) { - euf::snode const* label = m_sg.mk(minterm_re); - record_partial_derivative_edge(src_re, label, dst); - } - } - } - record_dfa_edges_from_ite(src_re, el); - } - - // ----------------------------------------------------------------------- - // Helper: get_current_stabilizer - // Returns the current partial DFA stabilizer s* for root_re without the - // novelty guard from try_extract_partial_projection. Returns nullptr if - // no SCC exists yet or no edges have been marked. - // ----------------------------------------------------------------------- - - euf::snode const* nielsen_graph::get_current_stabilizer(euf::snode const* root_re) { - if (!root_re || !root_re->is_ground() || m_projection_extract_idx == 0) - return nullptr; - uint_set scc; - if (!collect_scc_for_projection(root_re, scc)) - return nullptr; - // Symbolic projection over the edges marked by earlier extractions - // (index ≤ current snapshot). No new marking here, mirroring the old - // behaviour of reusing the current extraction index. - return mk_projection_term(root_re, m_projection_extract_idx); - } - - // ----------------------------------------------------------------------- - // Modifier: apply_cycle_subsumption - // For str_mem x·rest ∈ r where L(∩ Reg_x) ⊆ L(stabilizer(r)), remove x: - // replace x·rest ∈ r with rest ∈ r. - // Mirrors ZIPT StrMem.TrySubsume / paper Section 3.2.3 (Cycle Subsumption). + // Modifier: apply_cycle_subsumption (paper Section "Cycle Subsumption") + // For a membership x·u ∈ R (u≠ε) with L(⊓Reg_x) ⊆ stab(R,Q_ν), drop the + // leading x: replace x·u ∈ R by u ∈ R. The inclusion is decided as the + // product-emptiness test L(⊓Reg_x) ∩ ~stab(R,Q_ν) = ∅ (Section 3.3), + // adding one co-view component for ~stab. // ----------------------------------------------------------------------- bool nielsen_graph::apply_cycle_subsumption(nielsen_node* node) { for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) { str_mem const& mem = node->str_mems()[mi]; SASSERT(mem.well_formed()); - if (mem.is_primitive()) + if (!mem.is_plain() || mem.is_primitive()) continue; euf::snode const* first = mem.m_str->first(); SASSERT(first); if (!first->is_var()) continue; + euf::snode const* R = mem.m_regex; - // Get the current stabilizer for this regex (no novelty guard). - euf::snode const* stabilizer = get_current_stabilizer(mem.m_regex); - if (!stabilizer || m_seq.re.is_epsilon(stabilizer->get_expr())) + // R must lie on a detected cycle with a marked SCC snapshot. + uint_set scc; + if (!collect_scc_for_projection(R, scc)) + continue; + const unsigned nu = m_projection_extract_idx; + if (nu == 0) continue; - // Collect primitive regex constraints on `first`. + // Decide L(⊓Reg_x) ⊆ stab(R,Q_ν) as ⊓Reg_x ∩ ~stab = ∅. + vector comps; dep_tracker x_dep = nullptr; - euf::snode const* x_regex = m_seq_regex->collect_primitive_regex_intersection( - first, *node, m_dep_mgr, x_dep); - if (!x_regex) + collect_var_components(first, *node, comps, x_dep); + comps.push_back(prod_comp::mk_view(R, R, nu, /*complemented*/ true)); + if (check_product_emptiness(comps, 5000) != l_true) continue; - // Check L(x_regex) ⊆ L(stabilizer). - if (m_seq_regex->is_language_subset(x_regex, stabilizer) != l_true) - continue; - - // Subsume: replace x·rest ∈ r with rest ∈ r. + // Subsume: replace x·u ∈ R with u ∈ R. euf::snode const* tail = m_sg.drop_first(mem.m_str); SASSERT(tail); nielsen_node* child = mk_child(node); mk_edge(node, child, "cycle subs", true); - auto& child_mems = child->str_mems(); - for (unsigned k = 0; k < child_mems.size(); ++k) { - if (child_mems[k] == mem) { - child_mems[k] = child_mems.back(); - child_mems.pop_back(); + for (auto& cm : child->str_mems()) { + if (cm == mem) { + cm.m_str = tail; + cm.m_dep = m_dep_mgr.mk_join(cm.m_dep, x_dep); break; } } - const dep_tracker combined_dep = m_dep_mgr.mk_join(mem.m_dep, x_dep); - child->add_str_mem(str_mem(tail, mem.m_regex, combined_dep)); - TRACE(seq, tout << "cycle_subsumption: dropped x=" << mk_pp(first->get_expr(), m) << " from " << mk_pp(mem.m_str->get_expr(), m) - << " ∈ " << mk_pp(mem.m_regex->get_expr(), m) << "\n"); + << " ∈ " << mk_pp(R->get_expr(), m) << " nu=" << nu << "\n"); return true; } return false; } // ----------------------------------------------------------------------- - // Modifier: apply_cycle_decomposition - // Cycle decomposition: for a str_mem x·s ∈ R where a partial DFA - // cycle is detected, project SCC onto stabilizer constraint b. - // Rewrites x into x'·x'' with x' ∈ b*, x'' ∈ complement((b ∩ complement(eps)) · Sigma*). + // Modifier: apply_cycle_decomposition (paper Section "Cycle Decomposition") // - // Here stabilizer_re = π_{Q_SCC,{R}}(R) is the projection operator denoting - // the language of all paths from R back to R that stay within the explored - // subautomaton Q_SCC (including ε), i.e. s* for the non-empty cycle language - // s. It is kept symbolic; its derivative/nullability are evaluated lazily by - // the projection-aware sgraph (paper §3.3) rather than materialized. + // For a membership x·u ∈ R whose leading variable x sits on a detected cycle + // (R lies in an SCC of the partial DFA) and that does not already carry a + // matching cycle guard for the current SCC snapshot, split + // x → x'·x'' + // and attach the two *view*/*guard* primitive constraints (Section 3.3): + // x' ∈ stab(R, Q_ν) -- stabilizer view (F = {R}) + // noloop(x'', R, Q_ν) -- cycle guard (two-mode monitor) + // The leading x' is immediately subsumed (its only constraint is the view, + // and stab ⊆ stab trivially), so it is dropped from the primary constraint. // - // The constraint on x'' prevents divergence: x'' may not begin with any - // non-empty word from L(stabilizer_re), so it cannot re-enter the cycle. + // Unlike the old projection-operator encoding, the view and guard are kept + // as *constraint metadata* over the plain state R and the ν-indexed explored + // subautomaton Q_ν — nothing is materialized as a regex, which keeps the + // reachable state space finite (termination). // ----------------------------------------------------------------------- bool nielsen_graph::apply_cycle_decomposition(nielsen_node* node) { @@ -3518,7 +3490,7 @@ namespace seq { for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) { str_mem const& mem = node->str_mems()[mi]; SASSERT(mem.well_formed()); - if (mem.is_primitive()) + if (!mem.is_plain() || mem.is_primitive()) continue; euf::snode const* first = mem.m_str->first(); SASSERT(first); @@ -3526,29 +3498,47 @@ namespace seq { continue; euf::snode const* x = first; - euf::snode const* stabilizer_re = nullptr; + euf::snode const* R = mem.m_regex; // Eagerly precompute partial DFA edges from this regex so that // collect_scc_for_projection can detect cycles without waiting // for apply_regex_var_split to create per-minterm children. - precompute_partial_dfa(mem.m_regex, 2); + precompute_partial_dfa(R, 64); - if (!try_extract_partial_projection(mem.m_regex, stabilizer_re)) + // R must sit on a cycle (an SCC of the partial DFA). + uint_set scc; + if (!collect_scc_for_projection(R, scc)) + continue; + // Mark the SCC edges; this gives a ν identifying the current Q_SCC. + // (We trigger on absence of a matching guard, NOT on novelty.) + mark_scc_projection_edges(scc); + const unsigned nu = m_projection_extract_idx; + if (nu == 0) + continue; + fprintf(stderr, "DEC R=%u nu=%u sccsz=%u x=%u nedges=%u\n", R->id(), nu, scc.num_elems(), x->id(), (unsigned)m_partial_dfa_edges.size()); fflush(stderr); + + // Trigger condition: x must not already carry a cycle guard for the + // current SCC snapshot ν. All states of one SCC share a single ν, so + // the guard is keyed on ν rather than on the (changing) head R: as the + // derivation walks the SCC the head moves, but the lineage is already + // guarded against re-traversing the cycle. An already-decomposed + // variable whose guard refers to a strictly smaller, stale ν is + // re-decomposed to adopt the enlarged SCC. + bool already_guarded = false; + for (str_mem const& g : node->str_mems()) { + if (g.is_guard() && g.m_nu >= nu + && g.m_str && g.m_str->first() == x) { + already_guarded = true; + break; + } + } + if (already_guarded) continue; - SASSERT(stabilizer_re && stabilizer_re->get_expr()); - - // stabilizer_re is epsilon if the SCC has no non-trivial cycles — skip. - if (m_seq.re.is_epsilon(stabilizer_re->get_expr())) - continue; - - // Get sorts needed to build the xpp regex. - sort* re_sort = stabilizer_re->get_expr()->get_sort(); - sort* seq_sort = nullptr; - VERIFY(m_seq.is_re(stabilizer_re->get_expr(), seq_sort)); + sort* seq_sort = x->get_expr()->get_sort(); // Construct the replacement x = x' x'' - euf::snode const* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), stabilizer_re->get_expr(), seq_sort)); + euf::snode const* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), R->get_expr(), seq_sort)); euf::snode const* xpp = get_tail(x, compute_length_expr(xp).get()); euf::snode const* xp_xpp = m_sg.mk_concat(xp, xpp); @@ -3563,32 +3553,17 @@ namespace seq { SASSERT(child->m_str_mem[mi].m_str->first() == xp); child->m_str_mem[mi].m_str = dir_drop(m_sg, child->m_str_mem[mi].m_str, 1, true); - // x' ∈ stabilizer_re (= s*, all repetitions of the detected cycle) - child->add_str_mem(str_mem(xp, stabilizer_re, mem.m_dep)); + // x' ∈ stab(R, Q_ν) (stabilizer view, F = {R}, current state = R) + child->add_str_mem(str_mem::mk_view(xp, R, R, nu, mem.m_dep)); - // x'' ∈ complement((stabilizer_re ∩ complement(ε)) · Σ*) - // - // stabilizer_re ∩ complement(ε) = non-empty words in the cycle language - // (s_ne) · Σ* = all words whose prefix is a full non-empty cycle - // complement(...) = words that do NOT start with a full non-empty cycle - // - // This ensures x'' cannot begin another complete cycle from the same - // SCC entry point, which is what prevents infinite unfolding. - const expr_ref eps_re(m_seq.re.mk_epsilon(seq_sort), m); - const expr_ref compl_eps(m_seq.re.mk_complement(eps_re), m); - const expr_ref s_ne(m_seq.re.mk_inter(stabilizer_re->get_expr(), compl_eps), m); - const expr_ref sigma_star(m_seq.re.mk_full_seq(re_sort), m); - const expr_ref s_ne_sigma_star(m_seq.re.mk_concat(s_ne, sigma_star), m); - const expr_ref xpp_re(m_seq.re.mk_complement(s_ne_sigma_star), m); - euf::snode const* xpp_snode = m_sg.mk(xpp_re); - child->add_str_mem(str_mem(xpp, xpp_snode, mem.m_dep)); + // noloop(x'', R, Q_ν) (cycle guard, two-mode monitor, state = R) + child->add_str_mem(str_mem::mk_guard(xpp, R, R, nu, mem.m_dep)); TRACE(seq, tout << "cycle_decomp: x=" << mk_pp(x->get_expr(), m) - << " stabilizer=" << mk_pp(stabilizer_re->get_expr(), m) - << " xpp_re=" << xpp_re << "\n"); + << " R=" << mk_pp(R->get_expr(), m) << " nu=" << nu << "\n"); #ifdef Z3DEBUG - std::string dot = partial_dfa_to_dot(mem.m_regex, false); + std::string dot = partial_dfa_to_dot(R, false); #endif return true; } @@ -3686,11 +3661,9 @@ namespace seq { if (mem.m_str->is_empty() || mem.is_primitive()) continue; - // The split engine works on plain regex AST and does not understand the - // projection operator (re.proj) — it would give up on it anyway. - // Projection-constrained memberships are handled by the - // cycle-decomposition path, so skip them here. - if (mem.m_regex->has_projection()) + // View / guard memberships (Section 3.3) are handled by the + // cycle machinery and the synchronous product, not by factorization. + if (!mem.is_plain()) continue; split_set pairs; @@ -4083,8 +4056,12 @@ namespace seq { expr_ref c2(m), th2(m), el2(m); if (!bool_rewriter(m).decompose_ite(r, c2, th2, el2)) { - // No ite remaining: leaf → create child node with regex updated to r - euf::snode const* new_regex_snode = m_sg.mk(r); + // No ite remaining: leaf → create child node with regex updated to r. + // Canonicalize with th_rewriter so that the resolved leaf shares + // its snode id with the corresponding partial-DFA state (which is + // built by brzozowski_deriv); otherwise un-simplified residuals + // like (a|∅)·R≠a·R break view/guard Q-membership and lap checks. + euf::snode const* new_regex_snode = mk_rewrite(r); nielsen_node *child = mk_child(node); nielsen_edge* e = mk_edge(node, child, "regex if", true); for (const auto f : cs) { @@ -4093,6 +4070,12 @@ namespace seq { for (str_mem &cm : child->str_mems()) { if (cm == mem) { cm.m_regex = new_regex_snode; + // A guard whose symbolic step lands back on the cycle + // head closed a lap within Q → this branch is dead. + if (cm.is_guard() && new_regex_snode == cm.m_root) { + child->set_general_conflict(); + child->set_conflict(backtrack_reason::regex, cm.m_dep); + } break; } } @@ -4938,28 +4921,184 @@ namespace seq { // Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329) // ----------------------------------------------------------------------- + // ----------------------------------------------------------------------- + // Synchronous product over plain / view / guard / co-view components. + // ----------------------------------------------------------------------- + + lbool nielsen_graph::comp_accepting(prod_comp const& c) const { + if (c.m_dead) + return l_false; + switch (c.m_kind) { + case mem_kind::plain: + return m_sg.re_nullable(c.m_state); + case mem_kind::stab_view: + if (c.m_complemented) + return (c.m_sink || c.m_state != c.m_root) ? l_true : l_false; + return (c.m_state == c.m_root) ? l_true : l_false; + case mem_kind::no_loop: + return l_true; // guard accepts on every prefix it has not failed on + } + return l_undef; + } + + nielsen_graph::prod_comp nielsen_graph::comp_step(prod_comp const& c, euf::snode const* mt) { + prod_comp r = c; + if (c.m_dead) + return r; + switch (c.m_kind) { + case mem_kind::plain: { + euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt); + if (!d || d->is_fail()) r.m_dead = true; else r.m_state = d; + return r; + } + case mem_kind::stab_view: { + if (c.m_complemented) { + if (c.m_sink) return r; // Σ* + if (!projection_state_in_Q(c.m_state->get_expr(), c.m_nu)) { r.m_sink = true; return r; } + euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt); + if (!d || d->is_fail()) { r.m_sink = true; return r; } // ~∅ = Σ* + r.m_state = d; + return r; + } + if (!projection_state_in_Q(c.m_state->get_expr(), c.m_nu)) { r.m_dead = true; return r; } + euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt); + if (!d || d->is_fail()) { r.m_dead = true; return r; } + r.m_state = d; + return r; + } + case mem_kind::no_loop: { + if (c.m_sink) return r; // discharged: Σ* + if (!projection_state_in_Q(c.m_state->get_expr(), c.m_nu)) { r.m_sink = true; return r; } + euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt); + if (!d || d->is_fail()) { r.m_sink = true; return r; } // lap cannot close through ∅ + if (d == c.m_root) { r.m_dead = true; return r; } // lap completed → forbidden + r.m_state = d; + return r; + } + } + return r; + } + + lbool nielsen_graph::check_product_emptiness(vector const& comps0, unsigned max_states) { + if (comps0.empty()) + return l_false; // empty intersection = Σ* (non-empty) + + auto encode = [](vector const& cs) { + std::vector key; + key.reserve(cs.size() * 5); + for (auto const& c : cs) { + key.push_back(static_cast(c.m_kind)); + key.push_back((c.m_complemented ? 1u : 0u) | (c.m_sink ? 2u : 0u) | (c.m_dead ? 4u : 0u)); + key.push_back(c.m_state ? c.m_state->id() : UINT_MAX); + } + return key; + }; + + std::set> visited; + vector> work; + work.push_back(comps0); + visited.insert(encode(comps0)); + unsigned explored = 0; + + while (!work.empty()) { + if (!m.inc()) + return l_undef; + if (explored >= max_states) + return l_undef; + vector cur = work.back(); + work.pop_back(); + ++explored; + + bool any_dead = false; + for (auto const& c : cur) if (c.m_dead) { any_dead = true; break; } + if (any_dead) + continue; + + // simultaneously accepting? + bool all_acc = true, any_undef = false; + for (auto const& c : cur) { + const lbool a = comp_accepting(c); + if (a == l_false) { all_acc = false; break; } + if (a == l_undef) any_undef = true; + } + if (all_acc && !any_undef) + return l_false; // found a common word + + // joint first-character partition = minterms of the intersection of + // all still-discriminating component states. + expr* combined = nullptr; + for (auto const& c : cur) { + if (c.m_sink || c.m_dead) continue; + combined = combined ? m_seq.re.mk_inter(combined, c.m_state->get_expr()) + : c.m_state->get_expr(); + } + if (!combined) + continue; // no discriminating state and not accepting: dead end + euf::snode_vector mts; + m_sg.compute_minterms(m_sg.mk(combined), mts); + + for (euf::snode const* mt : mts) { + vector nxt; + bool dead = false; + for (auto const& c : cur) { + prod_comp d = comp_step(c, mt); + if (d.m_dead) { dead = true; break; } + nxt.push_back(d); + } + if (dead) + continue; + if (visited.insert(encode(nxt)).second) + work.push_back(nxt); + } + } + return l_true; // exhausted with no accepting tuple → empty + } + + bool nielsen_graph::collect_var_components(euf::snode const* var, nielsen_node const& node, + vector& out, dep_tracker& dep) { + bool found = false; + for (auto const& mem : node.str_mems()) { + if (!mem.is_primitive()) + continue; + if (mem.m_str->first() != var) + continue; + switch (mem.m_kind) { + case mem_kind::plain: + out.push_back(prod_comp::mk_plain(mem.m_regex)); + break; + case mem_kind::stab_view: + out.push_back(prod_comp::mk_view(mem.m_regex, mem.m_root, mem.m_nu, false)); + break; + case mem_kind::no_loop: + out.push_back(prod_comp::mk_guard(mem.m_regex, mem.m_root, mem.m_nu, mem.m_discharged)); + break; + } + dep = m_dep_mgr.mk_join(dep, mem.m_dep); + found = true; + } + return found; + } + bool nielsen_graph::check_leaf_regex(nielsen_node const& node, dep_tracker& dep) { SASSERT(m_seq_regex); - // Group str_mem constraints by variable (primitive constraints only) - u_map> var_regexes; - + // distinct variables carrying a primitive constraint + uint_set seen; for (auto const& mem : node.str_mems()) { SASSERT(mem.is_primitive()); euf::snode const* const first = mem.m_str->first(); SASSERT(first && first->is_var()); - auto &[fst, snd] = var_regexes.insert_if_not_there(first->id(), std::pair()); - fst.push_back(mem.m_regex); - snd = dep_mgr().mk_join(snd, mem.m_dep); - } + if (seen.contains(first->id())) + continue; + seen.insert(first->id()); - // check intersection non-emptiness (also for single occurrences; it could be empty) - for (auto& [var_id, regexes] : var_regexes) { - const lbool result = m_seq_regex->check_intersection_emptiness(regexes.first, 5000); + vector comps; + dep_tracker d = nullptr; + collect_var_components(first, node, comps, d); + const lbool result = check_product_emptiness(comps, 5000); if (result == l_true) { TRACE(seq, tout << "empty intersection\n"); - // Intersection is empty — infeasible - dep = regexes.second; + dep = d; return false; } } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 958c134059..707c3a8641 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -275,18 +275,55 @@ namespace seq { << snode_label_html(p.deq.m_rhs, p.m, false); } + // kind of a regex membership constraint (paper Section 3.3, "views"): + // - plain: ordinary u ∈ r + // - stab_view: stabilizer view u ∈_{Q,{root}} root (acceptance set F={root}) + // - no_loop: cycle guard noloop(u, root, Q) (two-mode monitor) + // For view/guard, m_regex holds the *current derivative state* (a plain + // regex; starts at m_root) and Q is identified by the ν-index m_nu over + // the partial DFA (projection_state_in_Q). This replaces the old re.proj + // projection operator: m_regex is always a plain regex now. + enum class mem_kind : unsigned char { plain, stab_view, no_loop }; + // regex membership constraint: str in regex // mirrors ZIPT's StrMem struct str_mem { euf::snode const* m_str; // assumed to be non-null - euf::snode const* m_regex; // assumed to be non-null + euf::snode const* m_regex; // assumed to be non-null (plain regex = current run state) dep_tracker m_dep; + // view / guard annotation (Section 3.3) + mem_kind m_kind = mem_kind::plain; + euf::snode const* m_root = nullptr; // cycle head r (view F={r}; guard lap head) + unsigned m_nu = 0; // ν: snapshot index identifying Q + bool m_discharged = false; // guard monitor: false=watch, true=discharged + str_mem(euf::snode const* str, euf::snode const* regex, dep_tracker const& dep): m_str(str), m_regex(regex), m_dep(dep) {} + // factory for a stabilizer view str ∈_{Q_ν,{root}} root (m_regex=state) + static str_mem mk_view(euf::snode const* str, euf::snode const* state, + euf::snode const* root, unsigned nu, dep_tracker const& dep) { + str_mem r(str, state, dep); + r.m_kind = mem_kind::stab_view; r.m_root = root; r.m_nu = nu; + return r; + } + // factory for a cycle guard noloop(str, root, Q_ν) (m_regex=state) + static str_mem mk_guard(euf::snode const* str, euf::snode const* state, + euf::snode const* root, unsigned nu, dep_tracker const& dep) { + str_mem r(str, state, dep); + r.m_kind = mem_kind::no_loop; r.m_root = root; r.m_nu = nu; + return r; + } + + bool is_plain() const { return m_kind == mem_kind::plain; } + bool is_view() const { return m_kind == mem_kind::stab_view; } + bool is_guard() const { return m_kind == mem_kind::no_loop; } + bool operator==(str_mem const& other) const { - return m_str == other.m_str && m_regex == other.m_regex; + return m_str == other.m_str && m_regex == other.m_regex + && m_kind == other.m_kind && m_root == other.m_root + && m_nu == other.m_nu && m_discharged == other.m_discharged; } // check if the constraint has the form x in R with x a single variable @@ -617,6 +654,12 @@ namespace seq { // Returns proceed, conflict, satisfied, or restart. simplify_result simplify_and_init(ptr_vector const& cur_path); + // Consume leading concrete/symbolic characters of a view/guard membership + // (Section 3.3): gate on Q_ν, step with the ordinary derivative, keeping + // the annotation. Returns true if the constraint died (view left Q, or + // guard completed a lap) — the caller reports a regex conflict. + bool consume_view_guard(str_mem& mem); + // true if all str_eqs are trivial and there are no str_mems bool is_satisfied() const; @@ -678,7 +721,7 @@ namespace seq { // the overall Nielsen transformation graph // mirrors ZIPT's NielsenGraph - class nielsen_graph : public euf::projection_oracle { + class nielsen_graph { friend class nielsen_node; friend class nielsen_edge; @@ -792,7 +835,7 @@ namespace seq { // Construct with a caller-supplied solver. Ownership is NOT transferred; // the caller is responsible for keeping the solver alive. nielsen_graph(euf::sgraph& sg, sub_solver_i& solver, context_solver_i& ctx); - ~nielsen_graph() override; + ~nielsen_graph(); ast_manager& get_manager() const { @@ -803,10 +846,10 @@ namespace seq { seq_util& seq() { return m_seq; } seq_util const& seq() const { return m_seq; } - // euf::projection_oracle: true iff regex `state` is part of the - // explored subautomaton snapshot `nu` (a partial-DFA edge incident to - // `state` was marked with an index in [1, nu]). - bool projection_state_in_Q(expr* state, unsigned nu) override; + // true iff regex `state` is part of the explored subautomaton snapshot + // `nu` — i.e. state ∈ Q_ν (a partial-DFA edge incident to `state` was + // marked with an index in [1, nu]). Used to gate view/guard derivation. + bool projection_state_in_Q(expr* state, unsigned nu); // node management nielsen_node* mk_node(); @@ -982,6 +1025,45 @@ namespace seq { // Mirrors ZIPT NielsenNode.CheckRegex) bool check_leaf_regex(nielsen_node const& node, dep_tracker& dep); + // ------------------------------------------------------------------- + // Synchronous product over plain / view / guard / co-view components + // (paper Section 3.3, "Deciding the intersection check"). Each + // component carries its own acceptance + (gated) one-character law; + // the product is empty iff no reachable tuple is simultaneously + // accepting. m_regex always being a plain regex, the derivative is + // the ordinary Brzozowski one and only acceptance / the Q-gate differ. + // ------------------------------------------------------------------- + struct prod_comp { + mem_kind m_kind = mem_kind::plain; + bool m_complemented = false; // ~stab co-view (kind==stab_view) + euf::snode const* m_state = nullptr; // current plain regex state + euf::snode const* m_root = nullptr; // view/guard cycle head + unsigned m_nu = 0; // ν (Q snapshot) + bool m_sink = false; // co-view became Σ* / guard discharged + bool m_dead = false; // language collapsed to ∅ + + static prod_comp mk_plain(euf::snode const* s) { prod_comp c; c.m_state = s; return c; } + static prod_comp mk_view(euf::snode const* s, euf::snode const* root, unsigned nu, bool compl_) { + prod_comp c; c.m_kind = mem_kind::stab_view; c.m_state = s; c.m_root = root; c.m_nu = nu; c.m_complemented = compl_; return c; + } + static prod_comp mk_guard(euf::snode const* s, euf::snode const* root, unsigned nu, bool discharged) { + prod_comp c; c.m_kind = mem_kind::no_loop; c.m_state = s; c.m_root = root; c.m_nu = nu; c.m_sink = discharged; return c; + } + }; + + // l_true = empty, l_false = non-empty (a simultaneously accepting tuple + // was reached), l_undef = budget exhausted / inconclusive. + lbool check_product_emptiness(vector const& comps, unsigned max_states); + + // acceptance / single-character step of one product component. + lbool comp_accepting(prod_comp const& c) const; + prod_comp comp_step(prod_comp const& c, euf::snode const* mt); + + // Build the product components for variable `var` from the node's + // primitive memberships (plain / view / guard). Joins their deps. + bool collect_var_components(euf::snode const* var, nielsen_node const& node, + vector& out, dep_tracker& dep); + // ------------------------------------------------------------------- // Partial DFA projection helpers // ------------------------------------------------------------------- @@ -1003,17 +1085,6 @@ namespace seq { // currently covered edge count for this extraction. unsigned mark_scc_projection_edges(uint_set const& scc); - // Build the projection operator π_{Q,{root_re}}(root_re) as a re.proj - // skolem snode, where Q is the explored subautomaton identified by the - // snapshot index nu. This is the stabilizer of root_re kept symbolically - // (the projection's derivative/nullability are evaluated lazily by the - // sgraph consulting projection_state_in_Q). - euf::snode const* mk_projection_term(euf::snode const* root_re, unsigned nu); - - // Try to extract a stronger projection for root_re. Returns true and - // stores it in projection_re iff SCC coverage has grown. - bool try_extract_partial_projection(euf::snode const* root_re, euf::snode const*& projection_re); - euf::snode const* get_slice(euf::snode const* v, expr* left, expr* right); euf::snode const* get_tail(euf::snode const* v, expr* cnt, bool fwd = true); @@ -1105,22 +1176,12 @@ namespace seq { // Fires without the novelty guard, using the current partial DFA state. bool apply_cycle_subsumption(nielsen_node* node); - // Return the current stabilizer s* for root_re from the partial DFA - // (bypasses the novelty guard used by try_extract_partial_projection). - euf::snode const* get_current_stabilizer(euf::snode const* root_re); - // BFS of Brzozowski derivatives from root_re up to `depth` steps, // eagerly recording concrete minterm edges in the partial DFA so that // collect_scc_for_projection can find cycles without first waiting for // concrete children to record them one level at a time. void precompute_partial_dfa(euf::snode const* root_re, unsigned depth); - // Walk an ite-structured symbolic derivative expression and record - // concrete DFA edges for each non-fail branch. - // Called from simplify_and_init when a symbolic character is consumed, - // so that cycle_decomp can detect SCCs lazily (as with concrete chars). - void record_dfa_edges_from_ite(euf::snode const* src_re, expr* ite_deriv); - // generalized power introduction: for an equation where one head is // a variable v and the other side has ground prefix + a variable x // forming a cycle back to v, introduce v = base^n · suffix. diff --git a/src/smt/seq/seq_regex.cpp b/src/smt/seq/seq_regex.cpp index 3308136908..1572d70fa0 100644 --- a/src/smt/seq/seq_regex.cpp +++ b/src/smt/seq/seq_regex.cpp @@ -399,14 +399,6 @@ namespace seq { if (re->is_fail() || re->is_full_char() || re->is_full_seq()) return; - // projection operator: only the regex arguments carry character - // structure; the third argument is the integer snapshot index. - if (re->is_projection()) { - collect_char_boundaries(re->arg(0), bounds); - collect_char_boundaries(re->arg(1), bounds); - return; - } - // If we reached a leaf and none of the expected leaf forms matched, // this is a regex constructor we did not account for in boundary // extraction and should fail loudly in debug builds. @@ -622,8 +614,11 @@ namespace seq { euf::snode const* result = nullptr; for (auto const& mem : node.str_mems()) { - // Primitive constraint: str is a single variable - if (!mem.is_primitive()) + // Primitive constraint: str is a single variable. View/guard + // memberships do not denote a plain regex on `var` (their m_regex + // is a derivative *state*), so skip them — yielding a coarser but + // sound over-approximation for the caller (regex widening). + if (!mem.is_primitive() || !mem.is_plain()) continue; euf::snode const* first = mem.m_str->first(); // NSB review: why is this "first" and not mem.m_str? diff --git a/src/smt/seq_model.cpp b/src/smt/seq_model.cpp index bbcc914da3..2148ded9d8 100644 --- a/src/smt/seq_model.cpp +++ b/src/smt/seq_model.cpp @@ -466,7 +466,7 @@ namespace smt { // but also some plain Boolean-closure / large length-intersected // shapes. Fall back to a derivative-automaton BFS that builds an // accepting word of the requested length directly. - wr = projection_witness(m_sg.mk(re_expr), witness); + wr = derivative_witness(m_sg.mk(re_expr), witness); } if (wr == l_true) { SASSERT(witness); @@ -517,15 +517,15 @@ namespace smt { return m_seq.str.mk_empty(srt); } - lbool seq_model::projection_witness(euf::snode const* re0, expr_ref& witness) const { + lbool seq_model::derivative_witness(euf::snode const* re0, expr_ref& witness) const { SASSERT(re0 && re0->get_expr()); sort* seq_sort = nullptr; if (!m_seq.is_re(re0->get_expr(), seq_sort)) return l_undef; - // Shortest-path BFS over the projection-aware derivative automaton. + // Shortest-path BFS over the derivative automaton. // Each frontier item is (state, accepting-word-so-far). A state is - // accepting when re_nullable (projection-aware) reports nullable. + // accepting when re_nullable reports nullable. // The regex carries the length intersection (∩ Σ^n), so an accepting // run has exactly the requested length and the search is finite. vector> work; @@ -582,6 +582,11 @@ namespace smt { if (mem.is_trivial(sat_node)) continue; // empty string in nullable regex: already satisfied, no variable to constrain VERIFY(mem.is_primitive()); // everything else should have been eliminated already + // TODO(view/guard witness): a stabilizer view / cycle guard does not + // denote a plain regex on the variable; for now skip it during model + // construction (handled by the dedicated view/guard witness below). + if (!mem.is_plain()) + continue; unsigned id = var_key(mem.m_str); euf::snode const* existing = nullptr; if (m_var_regex.find(id, existing) && existing) { diff --git a/src/smt/seq_model.h b/src/smt/seq_model.h index 5c028464ba..06c24bc3c2 100644 --- a/src/smt/seq_model.h +++ b/src/smt/seq_model.h @@ -119,11 +119,11 @@ namespace smt { // Witness extraction for regexes that contain a projection operator // (re.proj), which the standard seq_rewriter::some_seq_in_re cannot - // handle. Runs a shortest-path BFS over the projection-aware + // handle. Runs a shortest-path BFS over the // derivative automaton (m_sg) for a nullable (accepting) state, // building the accepting word. Returns l_true and sets `witness` // on success. - lbool projection_witness(euf::snode const* re, expr_ref& witness) const; + lbool derivative_witness(euf::snode const* re, expr_ref& witness) const; // collect per-variable regex constraints from the state. // For each positive str_mem, records the regex (or intersects diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index bdd96b7568..1c963be421 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -1844,24 +1844,22 @@ namespace smt { unsigned_vector const &mem_indices = var_to_mems[var_id]; euf::snode_vector regexes; - bool has_projection = false; + bool has_view_or_guard = false; for (auto i : mem_indices) { SASSERT(mems[i].well_formed()); regexes.push_back(mems[i].m_regex); - if (mems[i].m_regex->has_projection()) - has_projection = true; + // Synthetic cycle variables carry a stabilizer view / cycle guard + // (Section 3.3) rather than a real regex; skip length coherence. + if (!mems[i].is_plain()) + has_view_or_guard = true; } - // Skip length coherence for synthetic variables constrained by a - // projection operator (the cycle variable x'∈π(R) and remainder - // x''∈~((π(R)∩~ε)Σ*) introduced by cycle decomposition). The - // Σ^l ∩ projection gradient does not converge for them — the - // benchmark has no real length constraints, so the integer solver - // is free to pick ever-larger lengths for these *synthetic* vars - // and the coherence loop rejects each one (len≠l) forever. Their - // length consistency is already guaranteed by the soundness of the - // decomposition, so leaving the integer assignment alone is safe. - if (has_projection) + // Skip length coherence for synthetic cycle variables constrained by a + // stabilizer view / cycle guard (x'∈stab(R), noloop(x'',R)) introduced + // by cycle decomposition: the Σ^l ∩ view gradient does not converge, + // the benchmark has no real length constraints, and their length + // consistency is guaranteed by the soundness of the decomposition. + if (has_view_or_guard) continue; SASSERT(!regexes.empty()); diff --git a/src/test/nseq_basic.cpp b/src/test/nseq_basic.cpp index 7ffae340b0..877e0a96fa 100644 --- a/src/test/nseq_basic.cpp +++ b/src/test/nseq_basic.cpp @@ -73,7 +73,7 @@ static void test_nseq_param_validation() { static void test_nseq_param_validation_rejects_invalid() { std::cout << "test_nseq_param_validation_rejects_invalid\n"; const smt_params p; - static const char* invalid_variants[] = { "nseq2", "NSEQ", "nseqq", "nse", "Nseq", "nseq ", "" }; + static const char* invalid_variants[] = { "nseq3", "NSEQ", "nseqq", "nse", "Nseq", "nseq ", "" }; for (const auto s : invalid_variants) { bool threw = false; try {