3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-27 10:58:48 +00:00

Projection operator => view

This commit is contained in:
CEisenhofer 2026-06-25 10:48:20 +02:00
parent f126b60369
commit 64fed10e86
10 changed files with 494 additions and 651 deletions

View file

@ -111,11 +111,6 @@ namespace euf {
if (m_seq.str.is_in_re(e))
return snode_kind::s_in_re;
// projection operator π_{Q,F}(state) modeled as the re.proj skolem.
if (m_seq.is_skolem(e) &&
to_app(e)->get_decl()->get_parameter(0).get_symbol() == re_proj_name())
return snode_kind::s_projection;
return snode_kind::s_var;
}
@ -290,18 +285,6 @@ namespace euf {
n->m_length = 1;
break;
case snode_kind::s_projection:
// re.proj(state, root, nu): args 0/1 are regexes, arg 2 is the
// snapshot index (an integer numeral). Ground/classical follow the
// regex arguments only.
SASSERT(n->num_args() == 3);
n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
n->m_regex_free = false;
n->m_is_classical = false;
n->m_level = 1;
n->m_length = 1;
break;
default:
// NSB review: is this the correct defaults for unclassified nodes?
// Is this UNREACHABLE()?
@ -311,14 +294,6 @@ namespace euf {
n->m_length = 1;
break;
}
// A regex (sub)tree carries a projection iff it is a projection node or
// any argument does. Computed uniformly here so all callers can use the
// cheap snode flag instead of re-walking the AST.
n->m_has_projection = (n->m_kind == snode_kind::s_projection);
for (unsigned i = 0; i < n->num_args() && !n->m_has_projection; ++i)
if (n->arg(i)->has_projection())
n->m_has_projection = true;
}
static const unsigned HASH_BASE = 31;
@ -575,272 +550,10 @@ namespace euf {
return n;
}
bool sgraph::is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const {
if (!e || !m_seq.is_skolem(e))
return false;
app* ap = to_app(e);
if (ap->get_decl()->get_parameter(0).get_symbol() != re_proj_name())
return false;
if (ap->get_num_args() != 3)
return false;
arith_util au(m);
rational r;
if (!au.is_numeral(ap->get_arg(2), r) || !r.is_unsigned())
return false;
state = ap->get_arg(0);
root = ap->get_arg(1);
nu = r.get_unsigned();
return true;
}
expr_ref sgraph::mk_re_proj(expr* state, expr* root, unsigned nu) {
SASSERT(state && root);
arith_util au(m);
expr* args[3] = { state, root, au.mk_int(nu) };
sort* re_sort = state->get_sort();
return expr_ref(m_seq.mk_skolem(re_proj_name(), 3, args, re_sort), m);
}
// A symbolic-character derivative is a linear form: a *dispatch* over the
// character built from `ite` (predicate selection) and `union` (the linear
// factors α_i·r_i), bottoming out in ordinary derivative *states* r_i. We
// recognize this dispatch skeleton so that π can be pushed all the way to
// the concrete state leaves (where it is meaningful) rather than wrapped
// around a non-atomic union/ite (which is not a DFA state and breaks
// projection_state_in_Q / nullability). A plain regex state (even one that
// is itself a union, e.g. δ of a union regex) is NOT dispatch.
bool sgraph::is_char_dispatch(expr* e) const {
if (m.is_ite(e))
return true;
if (m_seq.re.is_empty(e))
return true;
expr* a = nullptr, *b = nullptr;
if (m_seq.re.is_union(e, a, b))
return is_char_dispatch(a) && is_char_dispatch(b);
return false;
}
expr_ref sgraph::wrap_proj(expr* e, expr* root, unsigned nu) {
// Push π_{Q,F} through the dispatch skeleton (ite / dispatch-union) to
// the concrete state leaves (paper §4).
expr* c = nullptr, *th = nullptr, *el = nullptr;
if (m.is_ite(e, c, th, el)) {
expr_ref t = wrap_proj(th, root, nu);
expr_ref f = wrap_proj(el, root, nu);
return expr_ref(m.mk_ite(c, t, f), m);
}
expr* a = nullptr, *b = nullptr;
if (m_seq.re.is_union(e, a, b) && is_char_dispatch(e)) {
expr_ref wa = wrap_proj(a, root, nu);
expr_ref wb = wrap_proj(b, root, nu);
return expr_ref(m_seq.re.mk_union(wa, wb), m);
}
// π(∅) ≡ ∅: a dead leaf stays dead.
if (m_seq.re.is_empty(e))
return expr_ref(e, m);
return mk_re_proj(e, root, nu);
}
// 3-valued Kleene conjunction / disjunction over lbool.
static lbool lb_and(lbool a, lbool b) {
if (a == l_false || b == l_false) return l_false;
if (a == l_undef || b == l_undef) return l_undef;
return l_true;
}
static lbool lb_or(lbool a, lbool b) {
if (a == l_true || b == l_true) return l_true;
if (a == l_undef || b == l_undef) return l_undef;
return l_false;
}
lbool sgraph::re_nullable(snode const* re) {
if (!re)
return l_undef;
// Projection-free regexes: defer to the standard regex info.
if (!re->has_projection())
return m_seq.re.get_info(re->get_expr()).nullable;
switch (re->kind()) {
case snode_kind::s_projection: {
// nullable(π_{{root}}(state)) = [state ∈ F] = [state ≡ root].
// Regex expressions are perfectly shared, so syntactic pointer
// equality coincides with the expr-id equality used for cycle
// detection in the partial DFA.
expr* state = nullptr, *root = nullptr; unsigned nu = 0;
VERIFY(is_re_proj(re->get_expr(), state, root, nu));
return (state == root) ? l_true : l_false;
}
case snode_kind::s_complement:
return ~re_nullable(re->arg(0));
case snode_kind::s_intersect:
case snode_kind::s_concat:
return lb_and(re_nullable(re->arg(0)), re_nullable(re->arg(1)));
case snode_kind::s_union:
return lb_or(re_nullable(re->arg(0)), re_nullable(re->arg(1)));
case snode_kind::s_star:
return l_true;
case snode_kind::s_plus:
return re_nullable(re->arg(0));
case snode_kind::s_ite:
// ε-acceptance of a symbolic-derivative residual depends on the
// (unresolved) character predicate, so it is genuinely unknown
// until the ite is split (apply_regex_if_split).
return l_undef;
default:
// Not expected to carry a projection in our constructions; fall
// back to the standard info (sound for projection-free shapes).
return m_seq.re.get_info(re->get_expr()).nullable;
}
}
snode const* sgraph::deriv_proj(snode const* re, expr* ch) {
SASSERT(re && re->get_expr());
expr* re_expr = re->get_expr();
sort* re_sort = re_expr->get_sort();
// Projection-free subterm: the standard derivative already handles it
// (and keeps the result canonical, matching partial-DFA state ids).
if (!re->has_projection())
return mk(m_rewriter.mk_derivative(ch, re_expr));
// Language-preserving combinators that DISTRIBUTE over ite. The
// symbolic-character derivative of a regex is a linear form: a
// *top-level* ite dispatching on character predicates over the (single)
// symbolic char, with ordinary derivative regexes at the leaves. Both
// projection leaves (via wrap_proj) and projection-free subterms (via
// mk_derivative) already yield top-level ites; if the surrounding
// Boolean/concat operators did not push themselves into the ite leaves,
// the ite would end up *buried* (e.g. ~(ite(...)·Σ*)) where
// apply_regex_if_split — which only matches a top-level ite — can no
// longer resolve the symbolic char, stalling the constraint and causing
// unbounded variable unwinding. Distributing keeps the result a proper
// top-level linear form, exactly as the standard mk_derivative does.
// These combinators also fold the trivial regex identities so the
// projection skolem and its inner state id are preserved verbatim.
// The symbolic-character derivative is a dispatch over the char built
// from `ite` (predicate selection) AND `union` (the linear factors).
// The combinators below distribute over BOTH so the dispatch skeleton
// stays at the top with concrete (π-wrapped) state leaves — otherwise a
// surrounding operator buries the dispatch and apply_regex_if_split can
// no longer resolve the symbolic char (the multi-cycle-SCC divergence).
// We only distribute over a union that is itself a char-dispatch (so a
// semantic state-union — e.g. δ of a union regex — is left intact and
// not needlessly expanded). All distributions are language-preserving
// (∩ and · distribute over ⊔; ~(A⊔B) = ~A ∩ ~B by De Morgan).
auto is_empty = [&](expr* r) { return m_seq.re.is_empty(r); };
auto is_full = [&](expr* r) { return m_seq.re.is_full_seq(r); };
auto is_eps = [&](expr* r) { return m_seq.re.is_epsilon(r); };
auto is_ite = [&](expr* r, expr*& c, expr*& t, expr*& e) { return m.is_ite(r, c, t, e); };
auto is_disp_union = [&](expr* r, expr*& a, expr*& b) {
return m_seq.re.is_union(r, a, b) && is_char_dispatch(r);
};
std::function<expr*(expr*, expr*)> mk_union = [&](expr* x, expr* y) -> expr* {
expr *c = nullptr, *t = nullptr, *e = nullptr;
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_union(t, y), mk_union(e, y));
if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_union(x, t), mk_union(x, e));
if (is_empty(x)) return y;
if (is_empty(y)) return x;
if (x == y) return x;
if (is_full(x) || is_full(y)) return m_seq.re.mk_full_seq(re_sort);
return m_seq.re.mk_union(x, y);
};
std::function<expr*(expr*, expr*)> mk_inter = [&](expr* x, expr* y) -> expr* {
expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr;
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_inter(t, y), mk_inter(e, y));
if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_inter(x, t), mk_inter(x, e));
if (is_disp_union(x, a, b)) return mk_union(mk_inter(a, y), mk_inter(b, y));
if (is_disp_union(y, a, b)) return mk_union(mk_inter(x, a), mk_inter(x, b));
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
if (is_full(x)) return y;
if (is_full(y)) return x;
if (x == y) return x;
return m_seq.re.mk_inter(x, y);
};
std::function<expr*(expr*, expr*)> mk_concat = [&](expr* x, expr* y) -> expr* {
expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr;
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_concat(t, y), mk_concat(e, y));
if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_concat(x, t), mk_concat(x, e));
if (is_disp_union(x, a, b)) return mk_union(mk_concat(a, y), mk_concat(b, y));
if (is_disp_union(y, a, b)) return mk_union(mk_concat(x, a), mk_concat(x, b));
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
if (is_eps(x)) return y;
if (is_eps(y)) return x;
return m_seq.re.mk_concat(x, y);
};
std::function<expr*(expr*)> mk_compl = [&](expr* x) -> expr* {
expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr;
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_compl(t), mk_compl(e));
if (is_disp_union(x, a, b)) return mk_inter(mk_compl(a), mk_compl(b)); // De Morgan
if (is_empty(x)) return m_seq.re.mk_full_seq(re_sort);
if (is_full(x)) return m_seq.re.mk_empty(re_sort);
expr* inner = nullptr;
if (m_seq.re.is_complement(x, inner)) return inner;
return m_seq.re.mk_complement(x);
};
switch (re->kind()) {
case snode_kind::s_projection: {
expr* state = nullptr, *root = nullptr; unsigned nu = 0;
VERIFY(is_re_proj(re_expr, state, root, nu));
// δ_a(π_{Q,{root}}(state)) = π_{Q,{root}}(δ_a(state)) if state ∈ Q,
// else ⊥. The gate is on the *current* state (paper §3.3).
if (!m_proj_oracle || !m_proj_oracle->projection_state_in_Q(state, nu))
return mk(m_seq.re.mk_empty(re_sort));
snode const* dstate = deriv_proj(re->arg(0), ch); // arg(0) ≡ state
if (!dstate || dstate->is_fail() || m_seq.re.is_empty(dstate->get_expr()))
return mk(m_seq.re.mk_empty(re_sort));
// δ(state) may be concrete (one state) or an ite-term (symbolic
// character). Wrap π around the result, descending into ite leaves.
return mk(wrap_proj(dstate->get_expr(), root, nu));
}
case snode_kind::s_ite: {
// ite-structured residual (from a symbolic-character derivative):
// δ_a(ite(c, th, el)) = ite(c, δ_a(th), δ_a(el)).
snode const* dth = deriv_proj(re->arg(1), ch);
snode const* del = deriv_proj(re->arg(2), ch);
return mk(expr_ref(m.mk_ite(re->arg(0)->get_expr(), dth->get_expr(), del->get_expr()), m));
}
case snode_kind::s_complement: {
snode const* d = deriv_proj(re->arg(0), ch);
return mk(expr_ref(mk_compl(d->get_expr()), m));
}
case snode_kind::s_intersect: {
snode const* d0 = deriv_proj(re->arg(0), ch);
snode const* d1 = deriv_proj(re->arg(1), ch);
return mk(expr_ref(mk_inter(d0->get_expr(), d1->get_expr()), m));
}
case snode_kind::s_union: {
snode const* d0 = deriv_proj(re->arg(0), ch);
snode const* d1 = deriv_proj(re->arg(1), ch);
return mk(expr_ref(mk_union(d0->get_expr(), d1->get_expr()), m));
}
case snode_kind::s_concat: {
// δ_a(R·S) = δ_a(R)·S ⊔ (nullable(R) ? δ_a(S) : ∅)
snode const* d0 = deriv_proj(re->arg(0), ch);
expr* head = mk_concat(d0->get_expr(), re->arg(1)->get_expr());
if (re_nullable(re->arg(0)) == l_true) {
snode const* d1 = deriv_proj(re->arg(1), ch);
head = mk_union(head, d1->get_expr());
}
return mk(expr_ref(head, m));
}
case snode_kind::s_star: {
// δ_a(R*) = δ_a(R)·R*
snode const* d = deriv_proj(re->arg(0), ch);
return mk(expr_ref(mk_concat(d->get_expr(), re_expr), m));
}
case snode_kind::s_plus: {
// δ_a(R+) = δ_a(R)·R*
snode const* d = deriv_proj(re->arg(0), ch);
expr_ref star(m_seq.re.mk_star(re->arg(0)->get_expr()), m);
return mk(expr_ref(mk_concat(d->get_expr(), star), m));
}
default:
// A projection nested under an operator we do not specially derive
// is not produced by our constructions. Fall back conservatively.
return mk(m_rewriter.mk_derivative(ch, re_expr));
}
return m_seq.re.get_info(re->get_expr()).nullable;
}
snode const* sgraph::brzozowski_deriv(snode const* re, snode const* elem) {
@ -891,24 +604,17 @@ namespace euf {
}
}
// If the regex contains a projection operator, the generic rewriter
// cannot derive it. Use the projection-aware structural derivative,
// which delegates projection-free subterms back to mk_derivative.
// Canonicalize the result with th_rewriter (which treats the re.proj
// skolem as an opaque leaf and only applies ACI / identity rules to the
// surrounding Boolean/concat structure). Without this, equivalent
// derivative states get distinct snode ids and BFS emptiness checks
// fail to deduplicate, exploring an exploded state space.
if (re->has_projection()) {
snode const* d = deriv_proj(re, elem_expr);
expr_ref e(d->get_expr(), m);
th_rewriter trw(m);
trw(e);
return mk(e);
}
const expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr);
expr_ref result(m_rewriter.mk_derivative(elem_expr, re_expr), m);
SASSERT(result);
// Canonicalize with th_rewriter so that derivative states share a snode
// id regardless of how they were reached (concrete vs. via a symbolic
// ite resolved by apply_regex_if_split, which also th_rewrites its leaf).
// Without this, semantically equal but syntactically different residuals
// — notably intersections like (A∩B) vs (B∩A) or (a|∅)·R vs a·R — get
// distinct ids, breaking partial-DFA Q-membership and view/guard lap
// detection (the multi-cycle / intersection divergence).
th_rewriter trw(m);
trw(result);
return mk(result);
}
@ -962,14 +668,6 @@ namespace euf {
if (m_seq.re.is_empty(e))
return;
// projection operator: collect predicates from the regex arguments only
// (the third argument is the integer snapshot index, not a regex).
if (re->is_projection()) {
collect_re_predicates(re->arg(0), preds);
collect_re_predicates(re->arg(1), preds);
return;
}
// Expected compound regex operators are handled by recursion below.
// If a leaf survives to this point, it is an unhandled regex form.
if (re->num_args() == 0) {

View file

@ -69,19 +69,6 @@ namespace euf {
class seq_plugin;
// Oracle queried by the projection-aware derivative of sgraph.
// The projection operator π_{Q,F}(state) (a re.proj skolem) has its set of
// explored states Q stored externally (the nielsen_graph partial DFA), keyed
// by a snapshot index nu. The sgraph consults this oracle to decide whether
// the current state lies in Q when deriving a projection.
class projection_oracle {
public:
virtual ~projection_oracle() = default;
// true iff `state` (a regex expression) belongs to the explored
// subautomaton snapshot identified by `nu`.
virtual bool projection_state_in_Q(expr* state, unsigned nu) = 0;
};
class sgraph {
struct stats {
@ -121,9 +108,6 @@ namespace euf {
unsigned_vector m_alias_trail; // expression ids
unsigned_vector m_alias_trail_lim; // scope boundaries
// Oracle answering "state ∈ Q_nu" for projection derivatives. Not owned.
projection_oracle* m_proj_oracle = nullptr;
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode const** args);
snode_kind classify(expr* e) const;
void compute_metadata(snode* n);
@ -173,28 +157,7 @@ namespace euf {
// for deriving symbolic variables.
snode const* brzozowski_deriv(snode const* re, snode const* elem);
// Register the oracle consulted when deriving projection operators.
// Passing nullptr unregisters. Not owned.
void set_projection_oracle(projection_oracle* o) { m_proj_oracle = o; }
// Projection operator support (π_{Q,F}(state) modeled as re.proj skolem).
// Recognize and destructure a re.proj skolem expression.
bool is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const;
// Build the re.proj skolem expression for π_{{root}}(state) at snapshot nu.
expr_ref mk_re_proj(expr* state, expr* root, unsigned nu);
// True if e is a symbolic-character dispatch skeleton (ite / union of
// dispatch, bottoming out in ∅) rather than a concrete regex state.
bool is_char_dispatch(expr* e) const;
// Wrap a (possibly ite/union-structured) symbolic-derivative result in
// the projection operator, propagating π into every dispatch leaf (§4).
expr_ref wrap_proj(expr* e, expr* root, unsigned nu);
// Projection-aware Brzozowski derivative w.r.t. a character expr
// (concrete or symbolic).
snode const* deriv_proj(snode const* re, expr* ch);
// Projection-aware nullability: lifts re.get_info().nullable to regexes
// that may contain projection operators. Returns l_true / l_false
// (l_undef only if an underlying projection-free subterm is undef).
// Nullability of a regex snode (thin wrapper over re.get_info().nullable).
lbool re_nullable(snode const* re);
// Decode a character expression that may be represented as a const-char,

View file

@ -57,16 +57,9 @@ namespace euf {
s_range, // character range [lo,hi] (OP_RE_RANGE)
s_ite, // ite (OP_ITE)
s_to_re, // string to regex (OP_SEQ_TO_RE)
s_in_re, // regex membership (OP_SEQ_IN_RE)
s_projection // projection operator π_{Q,F}(state) (re.proj skolem)
s_in_re // regex membership (OP_SEQ_IN_RE)
};
// Skolem decl-name marking a regex projection operator π_{Q,F}(state).
// The skolem has signature re.proj(state : RegLan, root : RegLan, nu : Int)
// where F = {root} (singleton accepting state) and nu identifies the
// explored subautomaton snapshot Q (see seq_nielsen partial DFA).
inline symbol re_proj_name() { return symbol("re.proj"); }
class snode {
expr *m_expr = nullptr; // assumed to be non-null
snode_kind m_kind = snode_kind::s_var;
@ -77,7 +70,6 @@ namespace euf {
bool m_ground = true; // no uninterpreted string variables
bool m_regex_free = true; // no regex constructs
bool m_is_classical = true; // classical regular expression
bool m_has_projection = false; // contains a projection operator (re.proj)
unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons)
unsigned m_length = 0; // token count, number of leaf tokens in the tree
@ -193,10 +185,6 @@ namespace euf {
bool is_classical() const {
return m_is_classical;
}
// true iff this regex (sub)tree contains a projection operator (re.proj).
bool has_projection() const {
return m_has_projection;
}
unsigned level() const {
return m_level;
}
@ -296,10 +284,6 @@ namespace euf {
}
return true;
}
bool is_projection() const {
return m_kind == snode_kind::s_projection;
}
// is this a leaf token (analogous to ZIPT's StrToken as opposed to Str)
bool is_token() const {
switch (m_kind) {

View file

@ -39,8 +39,10 @@ NSB review:
#include <algorithm>
#include <complex>
#include <cstdlib>
#include <set>
#include <stack>
#include <unordered_map>
#include <vector>
namespace seq {
@ -261,19 +263,33 @@ namespace seq {
// -----------------------------------------------
bool str_mem::is_primitive() const {
return m_str && m_str->length() == 1 && m_str->is_var() && m_regex->is_ground();
// A still-unresolved symbolic-derivative residual (ite) is not a settled
// primitive — apply_regex_if_split must resolve it first.
return m_str && m_str->length() == 1 && m_str->is_var() && m_regex->is_ground()
&& m_regex->kind() != euf::snode_kind::s_ite;
}
bool str_mem::is_trivial(nielsen_node const* n) const {
if (!(m_str && m_regex && m_str->is_empty()))
if (!(m_str && m_regex))
return false;
// Projection-aware nullability (handles re.proj operators in m_regex).
if (m_kind == mem_kind::no_loop)
// guard: discharged ⇒ Σ* (accepts all); ε has no non-empty lap-prefix.
return m_discharged || m_str->is_empty();
if (!m_str->is_empty())
return false;
if (m_kind == mem_kind::stab_view)
// ε ∈ stab(root,Q) iff current state ≡ root (i.e. root ∈ F={root}).
return m_regex == m_root;
return n->graph().sg().re_nullable(m_regex) == l_true;
}
bool str_mem::is_contradiction(nielsen_node const* n) const {
if (!(m_str && m_regex && m_str->is_empty()))
return false;
if (m_kind == mem_kind::no_loop)
return false; // guard acceptance is always true on the empty word
if (m_kind == mem_kind::stab_view)
return m_regex != m_root; // ε ∉ stab(root,Q) when state ≢ root
return n->graph().sg().re_nullable(m_regex) == l_false;
}
@ -538,13 +554,9 @@ namespace seq {
: m(sg.get_manager()), a(sg.get_manager()), m_seq(sg.get_seq_util()), m_sg(sg), m_rw(m), m_sk(m, m_rw),
m_length_solver(solver), m_context_solver(ctx_solver), m_partial_dfa_pin(sg.get_manager()),
m_parikh(alloc(seq_parikh, sg)), m_seq_regex(alloc(seq::seq_regex, sg)) {
// Answer projection-state membership queries during projection-aware
// derivatives (the sgraph cannot reach the partial DFA otherwise).
m_sg.set_projection_oracle(this);
}
nielsen_graph::~nielsen_graph() {
m_sg.set_projection_oracle(nullptr);
dealloc(m_parikh);
dealloc(m_seq_regex);
reset();
@ -692,13 +704,6 @@ namespace seq {
n->add_constraint(constraint(le, dep, m));
}
euf::snode const* nielsen_graph::mk_projection_term(euf::snode const* root_re, unsigned nu) {
SASSERT(root_re && root_re->get_expr());
// π_{Q_nu, {root}}(root): current state == accepting state == root.
expr_ref proj = m_sg.mk_re_proj(root_re->get_expr(), root_re->get_expr(), nu);
return m_sg.mk(proj);
}
// -----------------------------------------------------------------------
// nielsen_node: simplify_and_init
// -----------------------------------------------------------------------
@ -1023,16 +1028,6 @@ namespace seq {
return;
if (src_re->is_fail() || dst_re->is_fail())
return;
// The partial DFA must track ONLY the concrete Brzozowski automaton of
// the original ground regexes. Projection operators (re.proj) are
// synthetic stabilizers minted by cycle decomposition; every fresh
// snapshot index ν is a new expression, so recording projection-derived
// states as DFA nodes makes the SCC grow without bound (a newly-marked
// edge on every extraction) and re-triggers cycle decomposition forever
// (e.g. a cycle variable x'∈π(R) being decomposed again and again
// against its own / a sibling regex's cycle). Reject such edges.
if (src_re->has_projection() || dst_re->has_projection())
return;
//euf::snode const* label_re = to_partial_label_regex(label);
//SASSERT(label_re);
@ -1201,34 +1196,6 @@ namespace seq {
return newly_marked;
}
bool nielsen_graph::try_extract_partial_projection(euf::snode const* root_re, euf::snode const*& projection_re) {
SASSERT(root_re && root_re->get_expr());
projection_re = nullptr;
if (!root_re->is_ground())
return false;
uint_set scc;
if (!collect_scc_for_projection(root_re, scc))
return false;
// Novelty = the SCC's edge set grew (some edge was previously unmarked).
// mark_scc_projection_edges advances the snapshot index only in that
// case, so a re-visit of an already-fully-marked SCC (from any of its
// states) marks nothing new and does not re-trigger a decomposition.
const unsigned newly_marked = mark_scc_projection_edges(scc);
if (newly_marked == 0)
return false;
// Keep the stabilizer symbolic as a projection operator over the
// (just-marked) explored subautomaton snapshot. Its language is
// refined lazily through projection-aware derivatives.
projection_re = mk_projection_term(root_re, m_projection_extract_idx);
if (!projection_re)
return false;
return true;
}
euf::snode const* nielsen_graph::get_slice(euf::snode const* v, expr* left, expr* right) {
SASSERT(v && v->get_expr() && left && right);
SASSERT(v->is_var());
@ -1669,7 +1636,7 @@ namespace seq {
// in both directions (left-to-right, then right-to-left), mirroring ZIPT.
for (str_mem& mem : m_str_mem) {
SASSERT(mem.well_formed());
if (mem.is_primitive())
if (mem.is_primitive() || !mem.is_plain())
continue;
for (unsigned od = 0; od < 2; ++od) {
bool fwd = od == 0;
@ -1715,7 +1682,7 @@ namespace seq {
// consume symbolic characters via uniform derivatives
for (str_mem& mem : m_str_mem) {
SASSERT(mem.well_formed());
if (mem.is_primitive())
if (mem.is_primitive() || !mem.is_plain())
continue;
while (mem.m_str && !mem.m_str->is_empty()) {
@ -1727,13 +1694,7 @@ namespace seq {
euf::snode const* src_re = mem.m_regex;
euf::snode const* next = nullptr;
if (src_re->has_projection()) {
// The generic symbolic derivative cannot see the projection
// operator; route through the projection-aware derivative,
// which yields the ite-residual with π propagated to leaves.
next = sg.brzozowski_deriv(src_re, tok);
}
else {
{
seq_rewriter rw(m);
expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m);
@ -1774,6 +1735,18 @@ namespace seq {
}
}
// consume leading characters of view / guard memberships (Section 3.3).
// m_regex is the current (plain) derivative state; we gate on whether it
// lies in Q_ν (projection_state_in_Q) and step with the ordinary
// derivative, keeping the view/guard annotation.
for (str_mem& mem : m_str_mem) {
SASSERT(mem.well_formed());
if (mem.is_plain())
continue;
if (consume_view_guard(mem))
return simplify_result::conflict;
}
// check for regex memberships that are immediately infeasible
for (str_mem& mem : m_str_mem) {
if (mem.is_contradiction(this)) {
@ -1784,29 +1757,6 @@ namespace seq {
}
}
// Empty-language check for *primitive* memberships whose regex contains a
// projection operator. The regex widening pass below skips primitives,
// and is_contradiction only fires once the string side is empty. But a
// cycle decomposition constrains the remainder x'' by ~((π(r)∩~ε)·Σ*),
// and deriving this through the cycle can collapse it to the empty
// language: e.g. ~(π(r)·Σ*) ≡ ∅ because π(r) is nullable (r ∈ F), so
// π(r)·Σ* ≡ Σ*. Such a constraint is unsatisfiable, but without this
// eager check the variable would be unwound depth-deep before the
// conflict surfaces — the source of the multi-cycle-SCC blow-up. The
// check is cheap: is_empty_bfs on these projection regexes settles in a
// couple of states (a nullable projection short-circuits to non-empty).
SASSERT(m_graph.m_seq_regex);
for (str_mem const& mem : m_str_mem) {
if (!mem.is_primitive() || !mem.m_regex->has_projection())
continue;
if (m_graph.m_seq_regex->is_empty_bfs(mem.m_regex) == l_true) {
TRACE(seq, tout << "empty primitive projection regex " << mem_pp(mem, m) << "\n");
set_general_conflict();
set_conflict(backtrack_reason::regex, mem.m_dep);
return simplify_result::conflict;
}
}
// remove trivial membership constraints once again
unsigned wj = 0;
for (unsigned j = 0; j < m_str_mem.size(); ++j) {
@ -1845,6 +1795,75 @@ namespace seq {
return simplify_result::proceed;
}
bool nielsen_node::consume_view_guard(str_mem& mem) {
SASSERT(!mem.is_plain());
euf::sgraph& sg = m_graph.sg();
ast_manager& m = sg.get_manager();
seq_util& seq = m_graph.seq();
auto set_regex_conflict = [&]() {
set_general_conflict();
set_conflict(backtrack_reason::regex, mem.m_dep);
};
while (mem.m_str && !mem.m_str->is_empty()) {
euf::snode const* tok = mem.m_str->first();
if (!tok || !tok->is_char_or_unit())
break; // leading token is a variable/power — nothing to consume yet
euf::snode const* c = mem.m_regex;
// The gate tests the CURRENT (plain) state c against Q_ν. An ite
// state means a previous symbolic step has not been resolved yet;
// leave it for apply_regex_if_split.
if (!c->is_ground() || c->kind() == euf::snode_kind::s_ite)
break;
const bool in_Q = m_graph.projection_state_in_Q(c->get_expr(), mem.m_nu);
if (!in_Q) {
if (mem.is_guard()) {
// The run left Q: no lap from the start can complete within Q
// anymore, so the guard is discharged (accepts every suffix).
mem.m_discharged = true;
return false;
}
// view: a^{-1} L_{Q,F}(c) = ∅ when c ∉ Q.
set_regex_conflict();
return true;
}
// Step with brzozowski_deriv for BOTH concrete and symbolic tokens.
// This is essential: the partial-DFA states (and m_root) are produced
// by brzozowski_deriv, so its canonicalization must be used here too —
// otherwise the guard's resolved state never equals m_root by snode
// identity and laps never close. For a symbolic unit it yields a
// canonical ite residual that apply_regex_if_split later resolves.
euf::snode const* next = sg.brzozowski_deriv(c, tok);
if (!next)
break;
mem.m_str = sg.drop_left(mem.m_str, 1);
mem.m_regex = next;
if (next->is_fail()) {
// view: derivative collapsed to ∅ — unsatisfiable.
// guard: the lap can never close through ∅; treat as discharged.
if (mem.is_guard()) { mem.m_discharged = true; return false; }
set_regex_conflict();
return true;
}
if (next->is_ground() && next->kind() != euf::snode_kind::s_ite) {
// concrete next state resolved immediately
if (mem.is_guard() && next == mem.m_root) {
// a non-empty prefix completed a lap r→…→r within Q.
set_regex_conflict();
return true;
}
}
else {
// symbolic ite residual: defer to apply_regex_if_split, which
// resolves the character and (for guards) detects a lap landing
// back on the root.
break;
}
}
return false;
}
bool nielsen_node::is_satisfied() const {
if (!m_str_deq.empty() || !m_str_eq.empty())
return false;
@ -3389,128 +3408,81 @@ namespace seq {
}
// -----------------------------------------------------------------------
// Helper: record_dfa_edges_from_ite
// Walk an ite-structured symbolic derivative and record a concrete DFA edge
// for each non-fail branch. The ite has the form:
// ite(in_re(char_var, minterm_re), branch_re, rest_ite)
// Each (minterm_re, branch_re) pair gives one DFA edge src_re→branch_re.
// Called from simplify_and_init so that cycle_decomp can detect SCCs lazily
// as symbolic chars are consumed (mirroring the old concrete-char approach).
// -----------------------------------------------------------------------
void nielsen_graph::record_dfa_edges_from_ite(euf::snode const* src_re, expr* ite_deriv) {
if (!src_re || !ite_deriv)
return;
expr *c, *th, *el;
if (!m.is_ite(ite_deriv, c, th, el))
return;
expr *char_ex, *minterm_re;
if (m_seq.str.is_in_re(c, char_ex, minterm_re)) {
if (!m_seq.re.is_empty(th)) {
euf::snode const* dst = m_sg.mk(th);
if (dst && !dst->is_fail() && dst->is_ground()) {
euf::snode const* label = m_sg.mk(minterm_re);
record_partial_derivative_edge(src_re, label, dst);
}
}
}
record_dfa_edges_from_ite(src_re, el);
}
// -----------------------------------------------------------------------
// Helper: get_current_stabilizer
// Returns the current partial DFA stabilizer s* for root_re without the
// novelty guard from try_extract_partial_projection. Returns nullptr if
// no SCC exists yet or no edges have been marked.
// -----------------------------------------------------------------------
euf::snode const* nielsen_graph::get_current_stabilizer(euf::snode const* root_re) {
if (!root_re || !root_re->is_ground() || m_projection_extract_idx == 0)
return nullptr;
uint_set scc;
if (!collect_scc_for_projection(root_re, scc))
return nullptr;
// Symbolic projection over the edges marked by earlier extractions
// (index ≤ current snapshot). No new marking here, mirroring the old
// behaviour of reusing the current extraction index.
return mk_projection_term(root_re, m_projection_extract_idx);
}
// -----------------------------------------------------------------------
// Modifier: apply_cycle_subsumption
// For str_mem x·rest ∈ r where L(∩ Reg_x) ⊆ L(stabilizer(r)), remove x:
// replace x·rest ∈ r with rest ∈ r.
// Mirrors ZIPT StrMem.TrySubsume / paper Section 3.2.3 (Cycle Subsumption).
// Modifier: apply_cycle_subsumption (paper Section "Cycle Subsumption")
// For a membership x·u ∈ R (u≠ε) with L(⊓Reg_x) ⊆ stab(R,Q_ν), drop the
// leading x: replace x·u ∈ R by u ∈ R. The inclusion is decided as the
// product-emptiness test L(⊓Reg_x) ∩ ~stab(R,Q_ν) = ∅ (Section 3.3),
// adding one co-view component for ~stab.
// -----------------------------------------------------------------------
bool nielsen_graph::apply_cycle_subsumption(nielsen_node* node) {
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
str_mem const& mem = node->str_mems()[mi];
SASSERT(mem.well_formed());
if (mem.is_primitive())
if (!mem.is_plain() || mem.is_primitive())
continue;
euf::snode const* first = mem.m_str->first();
SASSERT(first);
if (!first->is_var())
continue;
euf::snode const* R = mem.m_regex;
// Get the current stabilizer for this regex (no novelty guard).
euf::snode const* stabilizer = get_current_stabilizer(mem.m_regex);
if (!stabilizer || m_seq.re.is_epsilon(stabilizer->get_expr()))
// R must lie on a detected cycle with a marked SCC snapshot.
uint_set scc;
if (!collect_scc_for_projection(R, scc))
continue;
const unsigned nu = m_projection_extract_idx;
if (nu == 0)
continue;
// Collect primitive regex constraints on `first`.
// Decide L(⊓Reg_x) ⊆ stab(R,Q_ν) as ⊓Reg_x ∩ ~stab = ∅.
vector<prod_comp> comps;
dep_tracker x_dep = nullptr;
euf::snode const* x_regex = m_seq_regex->collect_primitive_regex_intersection(
first, *node, m_dep_mgr, x_dep);
if (!x_regex)
collect_var_components(first, *node, comps, x_dep);
comps.push_back(prod_comp::mk_view(R, R, nu, /*complemented*/ true));
if (check_product_emptiness(comps, 5000) != l_true)
continue;
// Check L(x_regex) ⊆ L(stabilizer).
if (m_seq_regex->is_language_subset(x_regex, stabilizer) != l_true)
continue;
// Subsume: replace x·rest ∈ r with rest ∈ r.
// Subsume: replace x·u ∈ R with u ∈ R.
euf::snode const* tail = m_sg.drop_first(mem.m_str);
SASSERT(tail);
nielsen_node* child = mk_child(node);
mk_edge(node, child, "cycle subs", true);
auto& child_mems = child->str_mems();
for (unsigned k = 0; k < child_mems.size(); ++k) {
if (child_mems[k] == mem) {
child_mems[k] = child_mems.back();
child_mems.pop_back();
for (auto& cm : child->str_mems()) {
if (cm == mem) {
cm.m_str = tail;
cm.m_dep = m_dep_mgr.mk_join(cm.m_dep, x_dep);
break;
}
}
const dep_tracker combined_dep = m_dep_mgr.mk_join(mem.m_dep, x_dep);
child->add_str_mem(str_mem(tail, mem.m_regex, combined_dep));
TRACE(seq, tout << "cycle_subsumption: dropped x=" << mk_pp(first->get_expr(), m)
<< " from " << mk_pp(mem.m_str->get_expr(), m)
<< "" << mk_pp(mem.m_regex->get_expr(), m) << "\n");
<< "" << mk_pp(R->get_expr(), m) << " nu=" << nu << "\n");
return true;
}
return false;
}
// -----------------------------------------------------------------------
// Modifier: apply_cycle_decomposition
// Cycle decomposition: for a str_mem x·s ∈ R where a partial DFA
// cycle is detected, project SCC onto stabilizer constraint b.
// Rewrites x into x'·x'' with x' ∈ b*, x'' ∈ complement((b ∩ complement(eps)) · Sigma*).
// Modifier: apply_cycle_decomposition (paper Section "Cycle Decomposition")
//
// Here stabilizer_re = π_{Q_SCC,{R}}(R) is the projection operator denoting
// the language of all paths from R back to R that stay within the explored
// subautomaton Q_SCC (including ε), i.e. s* for the non-empty cycle language
// s. It is kept symbolic; its derivative/nullability are evaluated lazily by
// the projection-aware sgraph (paper §3.3) rather than materialized.
// For a membership x·u ∈ R whose leading variable x sits on a detected cycle
// (R lies in an SCC of the partial DFA) and that does not already carry a
// matching cycle guard for the current SCC snapshot, split
// x → x'·x''
// and attach the two *view*/*guard* primitive constraints (Section 3.3):
// x' ∈ stab(R, Q_ν) -- stabilizer view (F = {R})
// noloop(x'', R, Q_ν) -- cycle guard (two-mode monitor)
// The leading x' is immediately subsumed (its only constraint is the view,
// and stab ⊆ stab trivially), so it is dropped from the primary constraint.
//
// The constraint on x'' prevents divergence: x'' may not begin with any
// non-empty word from L(stabilizer_re), so it cannot re-enter the cycle.
// Unlike the old projection-operator encoding, the view and guard are kept
// as *constraint metadata* over the plain state R and the ν-indexed explored
// subautomaton Q_ν — nothing is materialized as a regex, which keeps the
// reachable state space finite (termination).
// -----------------------------------------------------------------------
bool nielsen_graph::apply_cycle_decomposition(nielsen_node* node) {
@ -3518,7 +3490,7 @@ namespace seq {
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
str_mem const& mem = node->str_mems()[mi];
SASSERT(mem.well_formed());
if (mem.is_primitive())
if (!mem.is_plain() || mem.is_primitive())
continue;
euf::snode const* first = mem.m_str->first();
SASSERT(first);
@ -3526,29 +3498,47 @@ namespace seq {
continue;
euf::snode const* x = first;
euf::snode const* stabilizer_re = nullptr;
euf::snode const* R = mem.m_regex;
// Eagerly precompute partial DFA edges from this regex so that
// collect_scc_for_projection can detect cycles without waiting
// for apply_regex_var_split to create per-minterm children.
precompute_partial_dfa(mem.m_regex, 2);
precompute_partial_dfa(R, 64);
if (!try_extract_partial_projection(mem.m_regex, stabilizer_re))
// R must sit on a cycle (an SCC of the partial DFA).
uint_set scc;
if (!collect_scc_for_projection(R, scc))
continue;
// Mark the SCC edges; this gives a ν identifying the current Q_SCC.
// (We trigger on absence of a matching guard, NOT on novelty.)
mark_scc_projection_edges(scc);
const unsigned nu = m_projection_extract_idx;
if (nu == 0)
continue;
fprintf(stderr, "DEC R=%u nu=%u sccsz=%u x=%u nedges=%u\n", R->id(), nu, scc.num_elems(), x->id(), (unsigned)m_partial_dfa_edges.size()); fflush(stderr);
// Trigger condition: x must not already carry a cycle guard for the
// current SCC snapshot ν. All states of one SCC share a single ν, so
// the guard is keyed on ν rather than on the (changing) head R: as the
// derivation walks the SCC the head moves, but the lineage is already
// guarded against re-traversing the cycle. An already-decomposed
// variable whose guard refers to a strictly smaller, stale ν is
// re-decomposed to adopt the enlarged SCC.
bool already_guarded = false;
for (str_mem const& g : node->str_mems()) {
if (g.is_guard() && g.m_nu >= nu
&& g.m_str && g.m_str->first() == x) {
already_guarded = true;
break;
}
}
if (already_guarded)
continue;
SASSERT(stabilizer_re && stabilizer_re->get_expr());
// stabilizer_re is epsilon if the SCC has no non-trivial cycles — skip.
if (m_seq.re.is_epsilon(stabilizer_re->get_expr()))
continue;
// Get sorts needed to build the xpp regex.
sort* re_sort = stabilizer_re->get_expr()->get_sort();
sort* seq_sort = nullptr;
VERIFY(m_seq.is_re(stabilizer_re->get_expr(), seq_sort));
sort* seq_sort = x->get_expr()->get_sort();
// Construct the replacement x = x' x''
euf::snode const* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), stabilizer_re->get_expr(), seq_sort));
euf::snode const* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), R->get_expr(), seq_sort));
euf::snode const* xpp = get_tail(x, compute_length_expr(xp).get());
euf::snode const* xp_xpp = m_sg.mk_concat(xp, xpp);
@ -3563,32 +3553,17 @@ namespace seq {
SASSERT(child->m_str_mem[mi].m_str->first() == xp);
child->m_str_mem[mi].m_str = dir_drop(m_sg, child->m_str_mem[mi].m_str, 1, true);
// x' ∈ stabilizer_re (= s*, all repetitions of the detected cycle)
child->add_str_mem(str_mem(xp, stabilizer_re, mem.m_dep));
// x' ∈ stab(R, Q_ν) (stabilizer view, F = {R}, current state = R)
child->add_str_mem(str_mem::mk_view(xp, R, R, nu, mem.m_dep));
// x'' ∈ complement((stabilizer_re ∩ complement(ε)) · Σ*)
//
// stabilizer_re ∩ complement(ε) = non-empty words in the cycle language
// (s_ne) · Σ* = all words whose prefix is a full non-empty cycle
// complement(...) = words that do NOT start with a full non-empty cycle
//
// This ensures x'' cannot begin another complete cycle from the same
// SCC entry point, which is what prevents infinite unfolding.
const expr_ref eps_re(m_seq.re.mk_epsilon(seq_sort), m);
const expr_ref compl_eps(m_seq.re.mk_complement(eps_re), m);
const expr_ref s_ne(m_seq.re.mk_inter(stabilizer_re->get_expr(), compl_eps), m);
const expr_ref sigma_star(m_seq.re.mk_full_seq(re_sort), m);
const expr_ref s_ne_sigma_star(m_seq.re.mk_concat(s_ne, sigma_star), m);
const expr_ref xpp_re(m_seq.re.mk_complement(s_ne_sigma_star), m);
euf::snode const* xpp_snode = m_sg.mk(xpp_re);
child->add_str_mem(str_mem(xpp, xpp_snode, mem.m_dep));
// noloop(x'', R, Q_ν) (cycle guard, two-mode monitor, state = R)
child->add_str_mem(str_mem::mk_guard(xpp, R, R, nu, mem.m_dep));
TRACE(seq, tout << "cycle_decomp: x=" << mk_pp(x->get_expr(), m)
<< " stabilizer=" << mk_pp(stabilizer_re->get_expr(), m)
<< " xpp_re=" << xpp_re << "\n");
<< " R=" << mk_pp(R->get_expr(), m) << " nu=" << nu << "\n");
#ifdef Z3DEBUG
std::string dot = partial_dfa_to_dot(mem.m_regex, false);
std::string dot = partial_dfa_to_dot(R, false);
#endif
return true;
}
@ -3686,11 +3661,9 @@ namespace seq {
if (mem.m_str->is_empty() || mem.is_primitive())
continue;
// The split engine works on plain regex AST and does not understand the
// projection operator (re.proj) — it would give up on it anyway.
// Projection-constrained memberships are handled by the
// cycle-decomposition path, so skip them here.
if (mem.m_regex->has_projection())
// View / guard memberships (Section 3.3) are handled by the
// cycle machinery and the synchronous product, not by factorization.
if (!mem.is_plain())
continue;
split_set pairs;
@ -4083,8 +4056,12 @@ namespace seq {
expr_ref c2(m), th2(m), el2(m);
if (!bool_rewriter(m).decompose_ite(r, c2, th2, el2)) {
// No ite remaining: leaf → create child node with regex updated to r
euf::snode const* new_regex_snode = m_sg.mk(r);
// No ite remaining: leaf → create child node with regex updated to r.
// Canonicalize with th_rewriter so that the resolved leaf shares
// its snode id with the corresponding partial-DFA state (which is
// built by brzozowski_deriv); otherwise un-simplified residuals
// like (a|∅)·R≠a·R break view/guard Q-membership and lap checks.
euf::snode const* new_regex_snode = mk_rewrite(r);
nielsen_node *child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, "regex if", true);
for (const auto f : cs) {
@ -4093,6 +4070,12 @@ namespace seq {
for (str_mem &cm : child->str_mems()) {
if (cm == mem) {
cm.m_regex = new_regex_snode;
// A guard whose symbolic step lands back on the cycle
// head closed a lap within Q → this branch is dead.
if (cm.is_guard() && new_regex_snode == cm.m_root) {
child->set_general_conflict();
child->set_conflict(backtrack_reason::regex, cm.m_dep);
}
break;
}
}
@ -4938,28 +4921,184 @@ namespace seq {
// Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329)
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// Synchronous product over plain / view / guard / co-view components.
// -----------------------------------------------------------------------
lbool nielsen_graph::comp_accepting(prod_comp const& c) const {
if (c.m_dead)
return l_false;
switch (c.m_kind) {
case mem_kind::plain:
return m_sg.re_nullable(c.m_state);
case mem_kind::stab_view:
if (c.m_complemented)
return (c.m_sink || c.m_state != c.m_root) ? l_true : l_false;
return (c.m_state == c.m_root) ? l_true : l_false;
case mem_kind::no_loop:
return l_true; // guard accepts on every prefix it has not failed on
}
return l_undef;
}
nielsen_graph::prod_comp nielsen_graph::comp_step(prod_comp const& c, euf::snode const* mt) {
prod_comp r = c;
if (c.m_dead)
return r;
switch (c.m_kind) {
case mem_kind::plain: {
euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt);
if (!d || d->is_fail()) r.m_dead = true; else r.m_state = d;
return r;
}
case mem_kind::stab_view: {
if (c.m_complemented) {
if (c.m_sink) return r; // Σ*
if (!projection_state_in_Q(c.m_state->get_expr(), c.m_nu)) { r.m_sink = true; return r; }
euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt);
if (!d || d->is_fail()) { r.m_sink = true; return r; } // ~∅ = Σ*
r.m_state = d;
return r;
}
if (!projection_state_in_Q(c.m_state->get_expr(), c.m_nu)) { r.m_dead = true; return r; }
euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt);
if (!d || d->is_fail()) { r.m_dead = true; return r; }
r.m_state = d;
return r;
}
case mem_kind::no_loop: {
if (c.m_sink) return r; // discharged: Σ*
if (!projection_state_in_Q(c.m_state->get_expr(), c.m_nu)) { r.m_sink = true; return r; }
euf::snode const* d = m_sg.brzozowski_deriv(c.m_state, mt);
if (!d || d->is_fail()) { r.m_sink = true; return r; } // lap cannot close through ∅
if (d == c.m_root) { r.m_dead = true; return r; } // lap completed → forbidden
r.m_state = d;
return r;
}
}
return r;
}
lbool nielsen_graph::check_product_emptiness(vector<prod_comp> const& comps0, unsigned max_states) {
if (comps0.empty())
return l_false; // empty intersection = Σ* (non-empty)
auto encode = [](vector<prod_comp> const& cs) {
std::vector<unsigned> key;
key.reserve(cs.size() * 5);
for (auto const& c : cs) {
key.push_back(static_cast<unsigned>(c.m_kind));
key.push_back((c.m_complemented ? 1u : 0u) | (c.m_sink ? 2u : 0u) | (c.m_dead ? 4u : 0u));
key.push_back(c.m_state ? c.m_state->id() : UINT_MAX);
}
return key;
};
std::set<std::vector<unsigned>> visited;
vector<vector<prod_comp>> work;
work.push_back(comps0);
visited.insert(encode(comps0));
unsigned explored = 0;
while (!work.empty()) {
if (!m.inc())
return l_undef;
if (explored >= max_states)
return l_undef;
vector<prod_comp> cur = work.back();
work.pop_back();
++explored;
bool any_dead = false;
for (auto const& c : cur) if (c.m_dead) { any_dead = true; break; }
if (any_dead)
continue;
// simultaneously accepting?
bool all_acc = true, any_undef = false;
for (auto const& c : cur) {
const lbool a = comp_accepting(c);
if (a == l_false) { all_acc = false; break; }
if (a == l_undef) any_undef = true;
}
if (all_acc && !any_undef)
return l_false; // found a common word
// joint first-character partition = minterms of the intersection of
// all still-discriminating component states.
expr* combined = nullptr;
for (auto const& c : cur) {
if (c.m_sink || c.m_dead) continue;
combined = combined ? m_seq.re.mk_inter(combined, c.m_state->get_expr())
: c.m_state->get_expr();
}
if (!combined)
continue; // no discriminating state and not accepting: dead end
euf::snode_vector mts;
m_sg.compute_minterms(m_sg.mk(combined), mts);
for (euf::snode const* mt : mts) {
vector<prod_comp> nxt;
bool dead = false;
for (auto const& c : cur) {
prod_comp d = comp_step(c, mt);
if (d.m_dead) { dead = true; break; }
nxt.push_back(d);
}
if (dead)
continue;
if (visited.insert(encode(nxt)).second)
work.push_back(nxt);
}
}
return l_true; // exhausted with no accepting tuple → empty
}
bool nielsen_graph::collect_var_components(euf::snode const* var, nielsen_node const& node,
vector<prod_comp>& out, dep_tracker& dep) {
bool found = false;
for (auto const& mem : node.str_mems()) {
if (!mem.is_primitive())
continue;
if (mem.m_str->first() != var)
continue;
switch (mem.m_kind) {
case mem_kind::plain:
out.push_back(prod_comp::mk_plain(mem.m_regex));
break;
case mem_kind::stab_view:
out.push_back(prod_comp::mk_view(mem.m_regex, mem.m_root, mem.m_nu, false));
break;
case mem_kind::no_loop:
out.push_back(prod_comp::mk_guard(mem.m_regex, mem.m_root, mem.m_nu, mem.m_discharged));
break;
}
dep = m_dep_mgr.mk_join(dep, mem.m_dep);
found = true;
}
return found;
}
bool nielsen_graph::check_leaf_regex(nielsen_node const& node, dep_tracker& dep) {
SASSERT(m_seq_regex);
// Group str_mem constraints by variable (primitive constraints only)
u_map<std::pair<euf::snode_vector, dep_tracker>> var_regexes;
// distinct variables carrying a primitive constraint
uint_set seen;
for (auto const& mem : node.str_mems()) {
SASSERT(mem.is_primitive());
euf::snode const* const first = mem.m_str->first();
SASSERT(first && first->is_var());
auto &[fst, snd] = var_regexes.insert_if_not_there(first->id(), std::pair<euf::snode_vector, dep_tracker>());
fst.push_back(mem.m_regex);
snd = dep_mgr().mk_join(snd, mem.m_dep);
}
if (seen.contains(first->id()))
continue;
seen.insert(first->id());
// check intersection non-emptiness (also for single occurrences; it could be empty)
for (auto& [var_id, regexes] : var_regexes) {
const lbool result = m_seq_regex->check_intersection_emptiness(regexes.first, 5000);
vector<prod_comp> comps;
dep_tracker d = nullptr;
collect_var_components(first, node, comps, d);
const lbool result = check_product_emptiness(comps, 5000);
if (result == l_true) {
TRACE(seq, tout << "empty intersection\n");
// Intersection is empty — infeasible
dep = regexes.second;
dep = d;
return false;
}
}

View file

@ -275,18 +275,55 @@ namespace seq {
<< snode_label_html(p.deq.m_rhs, p.m, false);
}
// kind of a regex membership constraint (paper Section 3.3, "views"):
// - plain: ordinary u ∈ r
// - stab_view: stabilizer view u ∈_{Q,{root}} root (acceptance set F={root})
// - no_loop: cycle guard noloop(u, root, Q) (two-mode monitor)
// For view/guard, m_regex holds the *current derivative state* (a plain
// regex; starts at m_root) and Q is identified by the ν-index m_nu over
// the partial DFA (projection_state_in_Q). This replaces the old re.proj
// projection operator: m_regex is always a plain regex now.
enum class mem_kind : unsigned char { plain, stab_view, no_loop };
// regex membership constraint: str in regex
// mirrors ZIPT's StrMem
struct str_mem {
euf::snode const* m_str; // assumed to be non-null
euf::snode const* m_regex; // assumed to be non-null
euf::snode const* m_regex; // assumed to be non-null (plain regex = current run state)
dep_tracker m_dep;
// view / guard annotation (Section 3.3)
mem_kind m_kind = mem_kind::plain;
euf::snode const* m_root = nullptr; // cycle head r (view F={r}; guard lap head)
unsigned m_nu = 0; // ν: snapshot index identifying Q
bool m_discharged = false; // guard monitor: false=watch, true=discharged
str_mem(euf::snode const* str, euf::snode const* regex, dep_tracker const& dep):
m_str(str), m_regex(regex), m_dep(dep) {}
// factory for a stabilizer view str ∈_{Q_ν,{root}} root (m_regex=state)
static str_mem mk_view(euf::snode const* str, euf::snode const* state,
euf::snode const* root, unsigned nu, dep_tracker const& dep) {
str_mem r(str, state, dep);
r.m_kind = mem_kind::stab_view; r.m_root = root; r.m_nu = nu;
return r;
}
// factory for a cycle guard noloop(str, root, Q_ν) (m_regex=state)
static str_mem mk_guard(euf::snode const* str, euf::snode const* state,
euf::snode const* root, unsigned nu, dep_tracker const& dep) {
str_mem r(str, state, dep);
r.m_kind = mem_kind::no_loop; r.m_root = root; r.m_nu = nu;
return r;
}
bool is_plain() const { return m_kind == mem_kind::plain; }
bool is_view() const { return m_kind == mem_kind::stab_view; }
bool is_guard() const { return m_kind == mem_kind::no_loop; }
bool operator==(str_mem const& other) const {
return m_str == other.m_str && m_regex == other.m_regex;
return m_str == other.m_str && m_regex == other.m_regex
&& m_kind == other.m_kind && m_root == other.m_root
&& m_nu == other.m_nu && m_discharged == other.m_discharged;
}
// check if the constraint has the form x in R with x a single variable
@ -617,6 +654,12 @@ namespace seq {
// Returns proceed, conflict, satisfied, or restart.
simplify_result simplify_and_init(ptr_vector<nielsen_edge> const& cur_path);
// Consume leading concrete/symbolic characters of a view/guard membership
// (Section 3.3): gate on Q_ν, step with the ordinary derivative, keeping
// the annotation. Returns true if the constraint died (view left Q, or
// guard completed a lap) — the caller reports a regex conflict.
bool consume_view_guard(str_mem& mem);
// true if all str_eqs are trivial and there are no str_mems
bool is_satisfied() const;
@ -678,7 +721,7 @@ namespace seq {
// the overall Nielsen transformation graph
// mirrors ZIPT's NielsenGraph
class nielsen_graph : public euf::projection_oracle {
class nielsen_graph {
friend class nielsen_node;
friend class nielsen_edge;
@ -792,7 +835,7 @@ namespace seq {
// Construct with a caller-supplied solver. Ownership is NOT transferred;
// the caller is responsible for keeping the solver alive.
nielsen_graph(euf::sgraph& sg, sub_solver_i& solver, context_solver_i& ctx);
~nielsen_graph() override;
~nielsen_graph();
ast_manager& get_manager() const {
@ -803,10 +846,10 @@ namespace seq {
seq_util& seq() { return m_seq; }
seq_util const& seq() const { return m_seq; }
// euf::projection_oracle: true iff regex `state` is part of the
// explored subautomaton snapshot `nu` (a partial-DFA edge incident to
// `state` was marked with an index in [1, nu]).
bool projection_state_in_Q(expr* state, unsigned nu) override;
// true iff regex `state` is part of the explored subautomaton snapshot
// `nu` — i.e. state ∈ Q_ν (a partial-DFA edge incident to `state` was
// marked with an index in [1, nu]). Used to gate view/guard derivation.
bool projection_state_in_Q(expr* state, unsigned nu);
// node management
nielsen_node* mk_node();
@ -982,6 +1025,45 @@ namespace seq {
// Mirrors ZIPT NielsenNode.CheckRegex)
bool check_leaf_regex(nielsen_node const& node, dep_tracker& dep);
// -------------------------------------------------------------------
// Synchronous product over plain / view / guard / co-view components
// (paper Section 3.3, "Deciding the intersection check"). Each
// component carries its own acceptance + (gated) one-character law;
// the product is empty iff no reachable tuple is simultaneously
// accepting. m_regex always being a plain regex, the derivative is
// the ordinary Brzozowski one and only acceptance / the Q-gate differ.
// -------------------------------------------------------------------
struct prod_comp {
mem_kind m_kind = mem_kind::plain;
bool m_complemented = false; // ~stab co-view (kind==stab_view)
euf::snode const* m_state = nullptr; // current plain regex state
euf::snode const* m_root = nullptr; // view/guard cycle head
unsigned m_nu = 0; // ν (Q snapshot)
bool m_sink = false; // co-view became Σ* / guard discharged
bool m_dead = false; // language collapsed to ∅
static prod_comp mk_plain(euf::snode const* s) { prod_comp c; c.m_state = s; return c; }
static prod_comp mk_view(euf::snode const* s, euf::snode const* root, unsigned nu, bool compl_) {
prod_comp c; c.m_kind = mem_kind::stab_view; c.m_state = s; c.m_root = root; c.m_nu = nu; c.m_complemented = compl_; return c;
}
static prod_comp mk_guard(euf::snode const* s, euf::snode const* root, unsigned nu, bool discharged) {
prod_comp c; c.m_kind = mem_kind::no_loop; c.m_state = s; c.m_root = root; c.m_nu = nu; c.m_sink = discharged; return c;
}
};
// l_true = empty, l_false = non-empty (a simultaneously accepting tuple
// was reached), l_undef = budget exhausted / inconclusive.
lbool check_product_emptiness(vector<prod_comp> const& comps, unsigned max_states);
// acceptance / single-character step of one product component.
lbool comp_accepting(prod_comp const& c) const;
prod_comp comp_step(prod_comp const& c, euf::snode const* mt);
// Build the product components for variable `var` from the node's
// primitive memberships (plain / view / guard). Joins their deps.
bool collect_var_components(euf::snode const* var, nielsen_node const& node,
vector<prod_comp>& out, dep_tracker& dep);
// -------------------------------------------------------------------
// Partial DFA projection helpers
// -------------------------------------------------------------------
@ -1003,17 +1085,6 @@ namespace seq {
// currently covered edge count for this extraction.
unsigned mark_scc_projection_edges(uint_set const& scc);
// Build the projection operator π_{Q,{root_re}}(root_re) as a re.proj
// skolem snode, where Q is the explored subautomaton identified by the
// snapshot index nu. This is the stabilizer of root_re kept symbolically
// (the projection's derivative/nullability are evaluated lazily by the
// sgraph consulting projection_state_in_Q).
euf::snode const* mk_projection_term(euf::snode const* root_re, unsigned nu);
// Try to extract a stronger projection for root_re. Returns true and
// stores it in projection_re iff SCC coverage has grown.
bool try_extract_partial_projection(euf::snode const* root_re, euf::snode const*& projection_re);
euf::snode const* get_slice(euf::snode const* v, expr* left, expr* right);
euf::snode const* get_tail(euf::snode const* v, expr* cnt, bool fwd = true);
@ -1105,22 +1176,12 @@ namespace seq {
// Fires without the novelty guard, using the current partial DFA state.
bool apply_cycle_subsumption(nielsen_node* node);
// Return the current stabilizer s* for root_re from the partial DFA
// (bypasses the novelty guard used by try_extract_partial_projection).
euf::snode const* get_current_stabilizer(euf::snode const* root_re);
// BFS of Brzozowski derivatives from root_re up to `depth` steps,
// eagerly recording concrete minterm edges in the partial DFA so that
// collect_scc_for_projection can find cycles without first waiting for
// concrete children to record them one level at a time.
void precompute_partial_dfa(euf::snode const* root_re, unsigned depth);
// Walk an ite-structured symbolic derivative expression and record
// concrete DFA edges for each non-fail branch.
// Called from simplify_and_init when a symbolic character is consumed,
// so that cycle_decomp can detect SCCs lazily (as with concrete chars).
void record_dfa_edges_from_ite(euf::snode const* src_re, expr* ite_deriv);
// generalized power introduction: for an equation where one head is
// a variable v and the other side has ground prefix + a variable x
// forming a cycle back to v, introduce v = base^n · suffix.

View file

@ -399,14 +399,6 @@ namespace seq {
if (re->is_fail() || re->is_full_char() || re->is_full_seq())
return;
// projection operator: only the regex arguments carry character
// structure; the third argument is the integer snapshot index.
if (re->is_projection()) {
collect_char_boundaries(re->arg(0), bounds);
collect_char_boundaries(re->arg(1), bounds);
return;
}
// If we reached a leaf and none of the expected leaf forms matched,
// this is a regex constructor we did not account for in boundary
// extraction and should fail loudly in debug builds.
@ -622,8 +614,11 @@ namespace seq {
euf::snode const* result = nullptr;
for (auto const& mem : node.str_mems()) {
// Primitive constraint: str is a single variable
if (!mem.is_primitive())
// Primitive constraint: str is a single variable. View/guard
// memberships do not denote a plain regex on `var` (their m_regex
// is a derivative *state*), so skip them — yielding a coarser but
// sound over-approximation for the caller (regex widening).
if (!mem.is_primitive() || !mem.is_plain())
continue;
euf::snode const* first = mem.m_str->first();
// NSB review: why is this "first" and not mem.m_str?

View file

@ -466,7 +466,7 @@ namespace smt {
// but also some plain Boolean-closure / large length-intersected
// shapes. Fall back to a derivative-automaton BFS that builds an
// accepting word of the requested length directly.
wr = projection_witness(m_sg.mk(re_expr), witness);
wr = derivative_witness(m_sg.mk(re_expr), witness);
}
if (wr == l_true) {
SASSERT(witness);
@ -517,15 +517,15 @@ namespace smt {
return m_seq.str.mk_empty(srt);
}
lbool seq_model::projection_witness(euf::snode const* re0, expr_ref& witness) const {
lbool seq_model::derivative_witness(euf::snode const* re0, expr_ref& witness) const {
SASSERT(re0 && re0->get_expr());
sort* seq_sort = nullptr;
if (!m_seq.is_re(re0->get_expr(), seq_sort))
return l_undef;
// Shortest-path BFS over the projection-aware derivative automaton.
// Shortest-path BFS over the derivative automaton.
// Each frontier item is (state, accepting-word-so-far). A state is
// accepting when re_nullable (projection-aware) reports nullable.
// accepting when re_nullable reports nullable.
// The regex carries the length intersection (∩ Σ^n), so an accepting
// run has exactly the requested length and the search is finite.
vector<std::pair<euf::snode const*, zstring>> work;
@ -582,6 +582,11 @@ namespace smt {
if (mem.is_trivial(sat_node))
continue; // empty string in nullable regex: already satisfied, no variable to constrain
VERIFY(mem.is_primitive()); // everything else should have been eliminated already
// TODO(view/guard witness): a stabilizer view / cycle guard does not
// denote a plain regex on the variable; for now skip it during model
// construction (handled by the dedicated view/guard witness below).
if (!mem.is_plain())
continue;
unsigned id = var_key(mem.m_str);
euf::snode const* existing = nullptr;
if (m_var_regex.find(id, existing) && existing) {

View file

@ -119,11 +119,11 @@ namespace smt {
// Witness extraction for regexes that contain a projection operator
// (re.proj), which the standard seq_rewriter::some_seq_in_re cannot
// handle. Runs a shortest-path BFS over the projection-aware
// handle. Runs a shortest-path BFS over the
// derivative automaton (m_sg) for a nullable (accepting) state,
// building the accepting word. Returns l_true and sets `witness`
// on success.
lbool projection_witness(euf::snode const* re, expr_ref& witness) const;
lbool derivative_witness(euf::snode const* re, expr_ref& witness) const;
// collect per-variable regex constraints from the state.
// For each positive str_mem, records the regex (or intersects

View file

@ -1844,24 +1844,22 @@ namespace smt {
unsigned_vector const &mem_indices = var_to_mems[var_id];
euf::snode_vector regexes;
bool has_projection = false;
bool has_view_or_guard = false;
for (auto i : mem_indices) {
SASSERT(mems[i].well_formed());
regexes.push_back(mems[i].m_regex);
if (mems[i].m_regex->has_projection())
has_projection = true;
// Synthetic cycle variables carry a stabilizer view / cycle guard
// (Section 3.3) rather than a real regex; skip length coherence.
if (!mems[i].is_plain())
has_view_or_guard = true;
}
// Skip length coherence for synthetic variables constrained by a
// projection operator (the cycle variable x'∈π(R) and remainder
// x''∈~((π(R)∩~ε)Σ*) introduced by cycle decomposition). The
// Σ^l ∩ projection gradient does not converge for them — the
// benchmark has no real length constraints, so the integer solver
// is free to pick ever-larger lengths for these *synthetic* vars
// and the coherence loop rejects each one (len≠l) forever. Their
// length consistency is already guaranteed by the soundness of the
// decomposition, so leaving the integer assignment alone is safe.
if (has_projection)
// Skip length coherence for synthetic cycle variables constrained by a
// stabilizer view / cycle guard (x'∈stab(R), noloop(x'',R)) introduced
// by cycle decomposition: the Σ^l ∩ view gradient does not converge,
// the benchmark has no real length constraints, and their length
// consistency is guaranteed by the soundness of the decomposition.
if (has_view_or_guard)
continue;
SASSERT(!regexes.empty());

View file

@ -73,7 +73,7 @@ static void test_nseq_param_validation() {
static void test_nseq_param_validation_rejects_invalid() {
std::cout << "test_nseq_param_validation_rejects_invalid\n";
const smt_params p;
static const char* invalid_variants[] = { "nseq2", "NSEQ", "nseqq", "nse", "Nseq", "nseq ", "" };
static const char* invalid_variants[] = { "nseq3", "NSEQ", "nseqq", "nse", "Nseq", "nseq ", "" };
for (const auto s : invalid_variants) {
bool threw = false;
try {