mirror of
https://github.com/Z3Prover/z3
synced 2026-06-19 15:16:29 +00:00
Potentially fixed termination problem with projection operators
This commit is contained in:
parent
5b41c6eb9f
commit
3908016651
4 changed files with 127 additions and 11 deletions
|
|
@ -597,18 +597,40 @@ namespace euf {
|
|||
return expr_ref(m_seq.mk_skolem(re_proj_name(), 3, args, re_sort), m);
|
||||
}
|
||||
|
||||
// A symbolic-character derivative is a linear form: a *dispatch* over the
|
||||
// character built from `ite` (predicate selection) and `union` (the linear
|
||||
// factors α_i·r_i), bottoming out in ordinary derivative *states* r_i. We
|
||||
// recognize this dispatch skeleton so that π can be pushed all the way to
|
||||
// the concrete state leaves (where it is meaningful) rather than wrapped
|
||||
// around a non-atomic union/ite (which is not a DFA state and breaks
|
||||
// projection_state_in_Q / nullability). A plain regex state (even one that
|
||||
// is itself a union, e.g. δ of a union regex) is NOT dispatch.
|
||||
bool sgraph::is_char_dispatch(expr* e) const {
|
||||
if (m.is_ite(e))
|
||||
return true;
|
||||
if (m_seq.re.is_empty(e))
|
||||
return true;
|
||||
expr* a = nullptr, *b = nullptr;
|
||||
if (m_seq.re.is_union(e, a, b))
|
||||
return is_char_dispatch(a) && is_char_dispatch(b);
|
||||
return false;
|
||||
}
|
||||
|
||||
expr_ref sgraph::wrap_proj(expr* e, expr* root, unsigned nu) {
|
||||
// The symbolic derivative of a regex is an ite-term dispatching on
|
||||
// character predicates; its leaves are ordinary derivative states.
|
||||
// π_{Q,F} is propagated to all such leaves (paper §4). Partial-DFA
|
||||
// states are produced from concrete-character derivatives and are thus
|
||||
// ite-free, so every ite encountered here is dispatch structure.
|
||||
// Push π_{Q,F} through the dispatch skeleton (ite / dispatch-union) to
|
||||
// the concrete state leaves (paper §4).
|
||||
expr* c = nullptr, *th = nullptr, *el = nullptr;
|
||||
if (m.is_ite(e, c, th, el)) {
|
||||
expr_ref t = wrap_proj(th, root, nu);
|
||||
expr_ref f = wrap_proj(el, root, nu);
|
||||
return expr_ref(m.mk_ite(c, t, f), m);
|
||||
}
|
||||
expr* a = nullptr, *b = nullptr;
|
||||
if (m_seq.re.is_union(e, a, b) && is_char_dispatch(e)) {
|
||||
expr_ref wa = wrap_proj(a, root, nu);
|
||||
expr_ref wb = wrap_proj(b, root, nu);
|
||||
return expr_ref(m_seq.re.mk_union(wa, wb), m);
|
||||
}
|
||||
// π(∅) ≡ ∅: a dead leaf stays dead.
|
||||
if (m_seq.re.is_empty(e))
|
||||
return expr_ref(e, m);
|
||||
|
|
@ -691,10 +713,23 @@ namespace euf {
|
|||
// top-level linear form, exactly as the standard mk_derivative does.
|
||||
// These combinators also fold the trivial regex identities so the
|
||||
// projection skolem and its inner state id are preserved verbatim.
|
||||
// The symbolic-character derivative is a dispatch over the char built
|
||||
// from `ite` (predicate selection) AND `union` (the linear factors).
|
||||
// The combinators below distribute over BOTH so the dispatch skeleton
|
||||
// stays at the top with concrete (π-wrapped) state leaves — otherwise a
|
||||
// surrounding operator buries the dispatch and apply_regex_if_split can
|
||||
// no longer resolve the symbolic char (the multi-cycle-SCC divergence).
|
||||
// We only distribute over a union that is itself a char-dispatch (so a
|
||||
// semantic state-union — e.g. δ of a union regex — is left intact and
|
||||
// not needlessly expanded). All distributions are language-preserving
|
||||
// (∩ and · distribute over ⊔; ~(A⊔B) = ~A ∩ ~B by De Morgan).
|
||||
auto is_empty = [&](expr* r) { return m_seq.re.is_empty(r); };
|
||||
auto is_full = [&](expr* r) { return m_seq.re.is_full_seq(r); };
|
||||
auto is_eps = [&](expr* r) { return m_seq.re.is_epsilon(r); };
|
||||
auto is_ite = [&](expr* r, expr*& c, expr*& t, expr*& e) { return m.is_ite(r, c, t, e); };
|
||||
auto is_disp_union = [&](expr* r, expr*& a, expr*& b) {
|
||||
return m_seq.re.is_union(r, a, b) && is_char_dispatch(r);
|
||||
};
|
||||
|
||||
std::function<expr*(expr*, expr*)> mk_union = [&](expr* x, expr* y) -> expr* {
|
||||
expr *c = nullptr, *t = nullptr, *e = nullptr;
|
||||
|
|
@ -707,9 +742,11 @@ namespace euf {
|
|||
return m_seq.re.mk_union(x, y);
|
||||
};
|
||||
std::function<expr*(expr*, expr*)> mk_inter = [&](expr* x, expr* y) -> expr* {
|
||||
expr *c = nullptr, *t = nullptr, *e = nullptr;
|
||||
expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr;
|
||||
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_inter(t, y), mk_inter(e, y));
|
||||
if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_inter(x, t), mk_inter(x, e));
|
||||
if (is_disp_union(x, a, b)) return mk_union(mk_inter(a, y), mk_inter(b, y));
|
||||
if (is_disp_union(y, a, b)) return mk_union(mk_inter(x, a), mk_inter(x, b));
|
||||
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
|
||||
if (is_full(x)) return y;
|
||||
if (is_full(y)) return x;
|
||||
|
|
@ -717,17 +754,20 @@ namespace euf {
|
|||
return m_seq.re.mk_inter(x, y);
|
||||
};
|
||||
std::function<expr*(expr*, expr*)> mk_concat = [&](expr* x, expr* y) -> expr* {
|
||||
expr *c = nullptr, *t = nullptr, *e = nullptr;
|
||||
expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr;
|
||||
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_concat(t, y), mk_concat(e, y));
|
||||
if (is_ite(y, c, t, e)) return m.mk_ite(c, mk_concat(x, t), mk_concat(x, e));
|
||||
if (is_disp_union(x, a, b)) return mk_union(mk_concat(a, y), mk_concat(b, y));
|
||||
if (is_disp_union(y, a, b)) return mk_union(mk_concat(x, a), mk_concat(x, b));
|
||||
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
|
||||
if (is_eps(x)) return y;
|
||||
if (is_eps(y)) return x;
|
||||
return m_seq.re.mk_concat(x, y);
|
||||
};
|
||||
std::function<expr*(expr*)> mk_compl = [&](expr* x) -> expr* {
|
||||
expr *c = nullptr, *t = nullptr, *e = nullptr;
|
||||
expr *c = nullptr, *t = nullptr, *e = nullptr, *a = nullptr, *b = nullptr;
|
||||
if (is_ite(x, c, t, e)) return m.mk_ite(c, mk_compl(t), mk_compl(e));
|
||||
if (is_disp_union(x, a, b)) return mk_inter(mk_compl(a), mk_compl(b)); // De Morgan
|
||||
if (is_empty(x)) return m_seq.re.mk_full_seq(re_sort);
|
||||
if (is_full(x)) return m_seq.re.mk_empty(re_sort);
|
||||
expr* inner = nullptr;
|
||||
|
|
|
|||
|
|
@ -182,8 +182,11 @@ namespace euf {
|
|||
bool is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const;
|
||||
// Build the re.proj skolem expression for π_{{root}}(state) at snapshot nu.
|
||||
expr_ref mk_re_proj(expr* state, expr* root, unsigned nu);
|
||||
// Wrap a (possibly ite-structured) symbolic-derivative result in the
|
||||
// projection operator, propagating π into every ite leaf (paper §4).
|
||||
// True if e is a symbolic-character dispatch skeleton (ite / union of
|
||||
// dispatch, bottoming out in ∅) rather than a concrete regex state.
|
||||
bool is_char_dispatch(expr* e) const;
|
||||
// Wrap a (possibly ite/union-structured) symbolic-derivative result in
|
||||
// the projection operator, propagating π into every dispatch leaf (§4).
|
||||
expr_ref wrap_proj(expr* e, expr* root, unsigned nu);
|
||||
// Projection-aware Brzozowski derivative w.r.t. a character expr
|
||||
// (concrete or symbolic).
|
||||
|
|
|
|||
|
|
@ -460,13 +460,19 @@ namespace smt {
|
|||
expr_ref witness(m);
|
||||
// We checked non-emptiness during Nielsen already
|
||||
lbool wr = m_rewriter.some_seq_in_re(re_expr, witness);
|
||||
if (wr != l_true && re->has_projection()) {
|
||||
// some_seq_in_re cannot extract a witness from a projection
|
||||
// operator (re.proj). Fall back to a projection-aware BFS over
|
||||
// the (length-intersected) regex using the sgraph derivative.
|
||||
wr = projection_witness(m_sg.mk(re_expr), witness);
|
||||
}
|
||||
if (wr == l_true) {
|
||||
SASSERT(witness);
|
||||
m_trail.push_back(witness);
|
||||
m_factory->register_value(witness);
|
||||
return witness;
|
||||
}
|
||||
IF_VERBOSE(1, verbose_stream() << "witness extraction failed for " << mk_pp(var->get_expr(), m)
|
||||
IF_VERBOSE(1, verbose_stream() << "witness extraction failed for " << mk_pp(var->get_expr(), m)
|
||||
<< " : " << wr << " with len " << (has_len ? len_val.to_string() : "unknown") << "\n" << mk_pp(re_expr, m) << "\n");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
|
@ -507,6 +513,65 @@ namespace smt {
|
|||
return m_seq.str.mk_empty(srt);
|
||||
}
|
||||
|
||||
lbool seq_model::projection_witness(euf::snode* re0, expr_ref& witness) {
|
||||
if (!re0 || !re0->get_expr())
|
||||
return l_undef;
|
||||
sort* seq_sort = nullptr;
|
||||
if (!m_seq.is_re(re0->get_expr(), seq_sort))
|
||||
return l_undef;
|
||||
|
||||
// Shortest-path BFS over the projection-aware derivative automaton.
|
||||
// Each frontier item is (state, accepting-word-so-far). A state is
|
||||
// accepting when re_nullable (projection-aware) reports nullable.
|
||||
// The regex carries the length intersection (∩ Σ^n), so an accepting
|
||||
// run has exactly the requested length and the search is finite.
|
||||
vector<std::pair<euf::snode*, zstring>> work;
|
||||
work.push_back({re0, zstring()});
|
||||
uint_set visited;
|
||||
visited.insert(re0->id());
|
||||
|
||||
unsigned head = 0;
|
||||
const unsigned MAX_STATES = 100000;
|
||||
while (head < work.size() && head < MAX_STATES) {
|
||||
euf::snode* st = work[head].first;
|
||||
zstring w = work[head].second;
|
||||
++head;
|
||||
|
||||
if (m_sg.re_nullable(st) == l_true) {
|
||||
witness = m_seq.str.mk_string(w);
|
||||
return l_true;
|
||||
}
|
||||
if (st->is_fail())
|
||||
continue;
|
||||
|
||||
euf::snode_vector mts;
|
||||
m_sg.compute_minterms(st, mts);
|
||||
for (euf::snode* mt : mts) {
|
||||
euf::snode* d = m_sg.brzozowski_deriv(st, mt);
|
||||
if (!d || d->is_fail())
|
||||
continue;
|
||||
if (visited.contains(d->id()))
|
||||
continue;
|
||||
visited.insert(d->id());
|
||||
|
||||
// Representative character of the minterm (must lie in mt so
|
||||
// that δ_ch(st) = d). Minterms over the cycle alphabet are
|
||||
// ranges / to_re singletons / full_char.
|
||||
unsigned ch = 0;
|
||||
expr* me = mt->get_expr();
|
||||
expr* lo = nullptr, *hi = nullptr, *s = nullptr;
|
||||
if (m_seq.re.is_range(me, lo, hi))
|
||||
m_sg.decode_re_char(lo, ch);
|
||||
else if (m_seq.re.is_to_re(me, s))
|
||||
m_sg.decode_re_char(s, ch);
|
||||
// re.full_char and anything else: ch = 0 (a valid representative)
|
||||
|
||||
work.push_back({d, w + zstring(ch)});
|
||||
}
|
||||
}
|
||||
return l_undef;
|
||||
}
|
||||
|
||||
void seq_model::collect_var_regex_constraints(seq::nielsen_node const* sat_node) {
|
||||
SASSERT(sat_node);
|
||||
for (auto const& mem : sat_node->str_mems()) {
|
||||
|
|
|
|||
|
|
@ -117,6 +117,14 @@ namespace smt {
|
|||
// to a plain fresh value from the factory.
|
||||
expr* mk_fresh_value(euf::snode* var);
|
||||
|
||||
// Witness extraction for regexes that contain a projection operator
|
||||
// (re.proj), which the standard seq_rewriter::some_seq_in_re cannot
|
||||
// handle. Runs a shortest-path BFS over the projection-aware
|
||||
// derivative automaton (m_sg) for a nullable (accepting) state,
|
||||
// building the accepting word. Returns l_true and sets `witness`
|
||||
// on success.
|
||||
lbool projection_witness(euf::snode* re, expr_ref& witness);
|
||||
|
||||
// collect per-variable regex constraints from the state.
|
||||
// For each positive str_mem, records the regex (or intersects
|
||||
// with existing) into m_var_regex keyed by the string snode id.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue