3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 15:16:29 +00:00

Potentially fixed termination problem with projection operators

This commit is contained in:
CEisenhofer 2026-06-02 17:04:31 +02:00
parent 5b41c6eb9f
commit 3908016651
4 changed files with 127 additions and 11 deletions

View file

@ -597,18 +597,40 @@ namespace euf {
return expr_ref(m_seq.mk_skolem(re_proj_name(), 3, args, re_sort), m);
}
// A symbolic-character derivative is a linear form: a *dispatch* over the
// character built from `ite` (predicate selection) and `union` (the linear
// factors α_i·r_i), bottoming out in ordinary derivative *states* r_i. We
// recognize this dispatch skeleton so that π can be pushed all the way to
// the concrete state leaves (where it is meaningful) rather than wrapped
// around a non-atomic union/ite (which is not a DFA state and breaks
// projection_state_in_Q / nullability). A plain regex state (even one that
// is itself a union, e.g. δ of a union regex) is NOT dispatch.
bool sgraph::is_char_dispatch(expr* e) const {
if (m.is_ite(e))
return true;
if (m_seq.re.is_empty(e))
return true;
expr* a = nullptr, *b = nullptr;
if (m_seq.re.is_union(e, a, b))
return is_char_dispatch(a) && is_char_dispatch(b);
return false;
}
expr_ref sgraph::wrap_proj(expr* e, expr* root, unsigned nu) {
// The symbolic derivative of a regex is an ite-term dispatching on
// character predicates; its leaves are ordinary derivative states.
// π_{Q,F} is propagated to all such leaves (paper §4). Partial-DFA
// states are produced from concrete-character derivatives and are thus
// ite-free, so every ite encountered here is dispatch structure.
// Push π_{Q,F} through the dispatch skeleton (ite / dispatch-union) to
// the concrete state leaves (paper §4).
expr* c = nullptr, *th = nullptr, *el = nullptr;
if (m.is_ite(e, c, th, el)) {
expr_ref t = wrap_proj(th, root, nu);
expr_ref f = wrap_proj(el, root, nu);
return expr_ref(m.mk_ite(c, t, f), m);
}
expr* a = nullptr, *b = nullptr;
if (m_seq.re.is_union(e, a, b) && is_char_dispatch(e)) {
expr_ref wa = wrap_proj(a, root, nu);
expr_ref wb = wrap_proj(b, root, nu);
return expr_ref(m_seq.re.mk_union(wa, wb), m);
}
// π(∅) ≡ ∅: a dead leaf stays dead.
if (m_seq.re.is_empty(e))
return expr_ref(e, m);
@ -691,10 +713,23 @@ namespace euf {
// top-level linear form, exactly as the standard mk_derivative does.
// These combinators also fold the trivial regex identities so the
// projection skolem and its inner state id are preserved verbatim.
// The symbolic-character derivative is a dispatch over the char built
// from `ite` (predicate selection) AND `union` (the linear factors).
// The combinators below distribute over BOTH so the dispatch skeleton
// stays at the top with concrete (π-wrapped) state leaves — otherwise a
// surrounding operator buries the dispatch and apply_regex_if_split can
// no longer resolve the symbolic char (the multi-cycle-SCC divergence).
// We only distribute over a union that is itself a char-dispatch (so a
// semantic state-union — e.g. δ of a union regex — is left intact and
// not needlessly expanded). All distributions are language-preserving
// (∩ and · distribute over ⊔; ~(A⊔B) = ~A ∩ ~B by De Morgan).
auto is_empty = [&](expr* r) { return m_seq.re.is_empty(r); };
auto is_full = [&](expr* r) { return m_seq.re.is_full_seq(r); };
auto is_eps = [&](expr* r) { return m_seq.re.is_epsilon(r); };
auto is_ite = [&](expr* r, expr*& c, expr*& t, expr*& e) { return m.is_ite(r, c, t, e); };
auto is_disp_union = [&](expr* r, expr*& a, expr*& b) {
return m_seq.re.is_union(r, a, b) && is_char_dispatch(r);
};
std::function<expr*(expr*, expr*)> mk_union = [&](expr* x, expr* y) -> expr* {
expr *c = nullptr, *t = nullptr, *e = nullptr;
@ -707,9 +742,11 @@ namespace euf {
return m_seq.re.mk_union(x, y);
};
std::function<expr*(expr*, expr*)> mk_inter = [&](expr* x, expr* y) -> expr* {
expr *c = nullptr, *t = nullptr, *e = nullptr;
expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr;
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_inter(t, y), mk_inter(e, y));
if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_inter(x, t), mk_inter(x, e));
if (is_disp_union(x, a, b)) return mk_union(mk_inter(a, y), mk_inter(b, y));
if (is_disp_union(y, a, b)) return mk_union(mk_inter(x, a), mk_inter(x, b));
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
if (is_full(x)) return y;
if (is_full(y)) return x;
@ -717,17 +754,20 @@ namespace euf {
return m_seq.re.mk_inter(x, y);
};
std::function<expr*(expr*, expr*)> mk_concat = [&](expr* x, expr* y) -> expr* {
expr *c = nullptr, *t = nullptr, *e = nullptr;
expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr;
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_concat(t, y), mk_concat(e, y));
if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_concat(x, t), mk_concat(x, e));
if (is_disp_union(x, a, b)) return mk_union(mk_concat(a, y), mk_concat(b, y));
if (is_disp_union(y, a, b)) return mk_union(mk_concat(x, a), mk_concat(x, b));
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
if (is_eps(x)) return y;
if (is_eps(y)) return x;
return m_seq.re.mk_concat(x, y);
};
std::function<expr*(expr*)> mk_compl = [&](expr* x) -> expr* {
expr *c = nullptr, *t = nullptr, *e = nullptr;
expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr;
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_compl(t), mk_compl(e));
if (is_disp_union(x, a, b)) return mk_inter(mk_compl(a), mk_compl(b)); // De Morgan
if (is_empty(x)) return m_seq.re.mk_full_seq(re_sort);
if (is_full(x)) return m_seq.re.mk_empty(re_sort);
expr* inner = nullptr;

View file

@ -182,8 +182,11 @@ namespace euf {
bool is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const;
// Build the re.proj skolem expression for π_{{root}}(state) at snapshot nu.
expr_ref mk_re_proj(expr* state, expr* root, unsigned nu);
// Wrap a (possibly ite-structured) symbolic-derivative result in the
// projection operator, propagating π into every ite leaf (paper §4).
// True if e is a symbolic-character dispatch skeleton (ite / union of
// dispatch, bottoming out in ∅) rather than a concrete regex state.
bool is_char_dispatch(expr* e) const;
// Wrap a (possibly ite/union-structured) symbolic-derivative result in
// the projection operator, propagating π into every dispatch leaf (§4).
expr_ref wrap_proj(expr* e, expr* root, unsigned nu);
// Projection-aware Brzozowski derivative w.r.t. a character expr
// (concrete or symbolic).

View file

@ -460,13 +460,19 @@ namespace smt {
expr_ref witness(m);
// We checked non-emptiness during Nielsen already
lbool wr = m_rewriter.some_seq_in_re(re_expr, witness);
if (wr != l_true && re->has_projection()) {
// some_seq_in_re cannot extract a witness from a projection
// operator (re.proj). Fall back to a projection-aware BFS over
// the (length-intersected) regex using the sgraph derivative.
wr = projection_witness(m_sg.mk(re_expr), witness);
}
if (wr == l_true) {
SASSERT(witness);
m_trail.push_back(witness);
m_factory->register_value(witness);
return witness;
}
IF_VERBOSE(1, verbose_stream() << "witness extraction failed for " << mk_pp(var->get_expr(), m)
IF_VERBOSE(1, verbose_stream() << "witness extraction failed for " << mk_pp(var->get_expr(), m)
<< " : " << wr << " with len " << (has_len ? len_val.to_string() : "unknown") << "\n" << mk_pp(re_expr, m) << "\n");
UNREACHABLE();
}
@ -507,6 +513,65 @@ namespace smt {
return m_seq.str.mk_empty(srt);
}
lbool seq_model::projection_witness(euf::snode* re0, expr_ref& witness) {
if (!re0 || !re0->get_expr())
return l_undef;
sort* seq_sort = nullptr;
if (!m_seq.is_re(re0->get_expr(), seq_sort))
return l_undef;
// Shortest-path BFS over the projection-aware derivative automaton.
// Each frontier item is (state, accepting-word-so-far). A state is
// accepting when re_nullable (projection-aware) reports nullable.
// The regex carries the length intersection (∩ Σ^n), so an accepting
// run has exactly the requested length and the search is finite.
vector<std::pair<euf::snode*, zstring>> work;
work.push_back({re0, zstring()});
uint_set visited;
visited.insert(re0->id());
unsigned head = 0;
const unsigned MAX_STATES = 100000;
while (head < work.size() && head < MAX_STATES) {
euf::snode* st = work[head].first;
zstring w = work[head].second;
++head;
if (m_sg.re_nullable(st) == l_true) {
witness = m_seq.str.mk_string(w);
return l_true;
}
if (st->is_fail())
continue;
euf::snode_vector mts;
m_sg.compute_minterms(st, mts);
for (euf::snode* mt : mts) {
euf::snode* d = m_sg.brzozowski_deriv(st, mt);
if (!d || d->is_fail())
continue;
if (visited.contains(d->id()))
continue;
visited.insert(d->id());
// Representative character of the minterm (must lie in mt so
// that δ_ch(st) = d). Minterms over the cycle alphabet are
// ranges / to_re singletons / full_char.
unsigned ch = 0;
expr* me = mt->get_expr();
expr* lo = nullptr, *hi = nullptr, *s = nullptr;
if (m_seq.re.is_range(me, lo, hi))
m_sg.decode_re_char(lo, ch);
else if (m_seq.re.is_to_re(me, s))
m_sg.decode_re_char(s, ch);
// re.full_char and anything else: ch = 0 (a valid representative)
work.push_back({d, w + zstring(ch)});
}
}
return l_undef;
}
void seq_model::collect_var_regex_constraints(seq::nielsen_node const* sat_node) {
SASSERT(sat_node);
for (auto const& mem : sat_node->str_mems()) {

View file

@ -117,6 +117,14 @@ namespace smt {
// to a plain fresh value from the factory.
expr* mk_fresh_value(euf::snode* var);
// Witness extraction for regexes that contain a projection operator
// (re.proj), which the standard seq_rewriter::some_seq_in_re cannot
// handle. Runs a shortest-path BFS over the projection-aware
// derivative automaton (m_sg) for a nullable (accepting) state,
// building the accepting word. Returns l_true and sets `witness`
// on success.
lbool projection_witness(euf::snode* re, expr_ref& witness);
// collect per-variable regex constraints from the state.
// For each positive str_mem, records the regex (or intersects
// with existing) into m_var_regex keyed by the string snode id.