3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-27 19:08:49 +00:00

Added real projection operator

This commit is contained in:
CEisenhofer 2026-05-29 15:51:35 +02:00
parent ff99cb442a
commit 70031b674c
7 changed files with 437 additions and 176 deletions

View file

@ -178,15 +178,14 @@ namespace seq {
bool str_mem::is_trivial(nielsen_node const* n) const {
if (!(m_str && m_regex && m_str->is_empty()))
return false;
const auto& info = n->graph().seq().re.get_info(m_regex->get_expr());
return info.nullable == l_true;
// Projection-aware nullability (handles re.proj operators in m_regex).
return n->graph().sg().re_nullable(m_regex) == l_true;
}
bool str_mem::is_contradiction(nielsen_node const* n) const {
if (!(m_str && m_regex && m_str->is_empty()))
return false;
const auto& info = n->graph().seq().re.get_info(m_regex->get_expr());
return info.nullable == l_false;
return n->graph().sg().re_nullable(m_regex) == l_false;
}
bool str_mem::contains_var(euf::snode* var) const {
@ -457,14 +456,49 @@ namespace seq {
m_context_solver(ctx_solver),
m_partial_dfa_pin(sg.get_manager()),
m_parikh(alloc(seq_parikh, sg)),
m_seq_regex(alloc(seq::seq_regex, sg)) {}
m_seq_regex(alloc(seq::seq_regex, sg)) {
// Answer projection-state membership queries during projection-aware
// derivatives (the sgraph cannot reach the partial DFA otherwise).
m_sg.set_projection_oracle(this);
}
nielsen_graph::~nielsen_graph() {
m_sg.set_projection_oracle(nullptr);
dealloc(m_parikh);
dealloc(m_seq_regex);
reset();
}
bool nielsen_graph::projection_state_in_Q(expr* state, unsigned nu) {
if (!state || nu == 0)
return false;
const unsigned sid = state->get_id();
// r ∈ Q_nu iff r is incident to a partial-DFA edge whose extraction
// index lies in [1, nu] (the "edges marked ≤ ν" subautomaton of the
// implementation-aspects section of the paper).
auto incident = [&](std::unordered_map<unsigned, unsigned_vector> const& adj) {
auto it = adj.find(sid);
if (it == adj.end())
return false;
for (const unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
const unsigned pidx = m_partial_dfa_edges[edge_idx].m_projection_idx;
if (pidx != 0 && pidx <= nu)
return true;
}
return false;
};
return incident(m_partial_dfa_out) || incident(m_partial_dfa_in);
}
euf::snode* nielsen_graph::mk_projection_term(euf::snode* root_re, unsigned nu) {
SASSERT(root_re && root_re->get_expr());
// π_{Q_nu, {root}}(root): current state == accepting state == root.
expr_ref proj = m_sg.mk_re_proj(root_re->get_expr(), root_re->get_expr(), nu);
return m_sg.mk(proj);
}
nielsen_node* nielsen_graph::mk_node() {
const unsigned id = m_nodes.size();
nielsen_node* n = alloc(nielsen_node, *this, id);
@ -1057,137 +1091,6 @@ namespace seq {
return covered;
}
euf::snode* nielsen_graph::build_projection_regex_from_scc(euf::snode* root_re, uint_set const& scc, unsigned extract_idx) {
SASSERT(root_re && root_re->get_expr());
sort* seq_sort = nullptr;
VERIFY(m_seq.is_re(root_re->get_expr(), seq_sort));
SASSERT(seq_sort);
sort* re_sort = root_re->get_expr()->get_sort();
// Floyd-Warshall is done over expressions, not snodes: every label
// edge that feeds this matrix comes from m_partial_dfa_edges whose
// labels (and src/dst) may correspond to snodes that no longer exist
// at the current sgraph scope. We work entirely with pinned exprs
// here and materialize a single snode for the simplified result at
// the very end. All intermediate exprs are kept alive by an
// expr_ref_vector that lives for the duration of this call.
expr_ref_vector pin(m);
expr* eps_expr = m_seq.re.mk_epsilon(seq_sort);
expr* empty_expr = m_seq.re.mk_empty(re_sort);
pin.push_back(eps_expr);
pin.push_back(empty_expr);
auto is_empty = [&](expr* re) -> bool {
return !re || m_seq.re.is_empty(re);
};
auto is_eps = [&](expr* re) -> bool {
return re && m_seq.re.is_epsilon(re);
};
auto mk_union = [&](expr* a, expr* b) -> expr* {
if (is_empty(a)) return b;
if (is_empty(b)) return a;
if (a == b) return a;
expr* r = m_seq.re.mk_union(a, b);
pin.push_back(r);
return r;
};
auto mk_concat = [&](expr* a, expr* b) -> expr* {
if (is_empty(a) || is_empty(b)) return empty_expr;
if (is_eps(a)) return b;
if (is_eps(b)) return a;
expr* r = m_seq.re.mk_concat(a, b);
pin.push_back(r);
return r;
};
auto mk_star = [&](expr* r) -> expr* {
if (is_empty(r) || is_eps(r)) return eps_expr;
if (r && m_seq.re.is_star(r)) return r;
expr* s = m_seq.re.mk_star(r);
pin.push_back(s);
return s;
};
unsigned_vector states;
states.reserve(scc.num_elems());
std::unordered_map<unsigned, unsigned> pos;
for (unsigned s : scc) {
pos.emplace(s, states.size());
states.push_back(s);
}
// scc / pos are keyed by expr id (see collect_scc_for_projection).
auto it_root = pos.find(root_re->get_expr()->get_id());
if (it_root == pos.end())
return nullptr;
unsigned n = states.size();
std::vector<std::vector<expr*>> R(n, std::vector<expr*>(n, nullptr));
for (unsigned src_id : states) {
auto it = m_partial_dfa_out.find(src_id);
if (it == m_partial_dfa_out.end())
continue;
for (unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (!e.m_src || !e.m_dst || !e.m_label)
continue;
if (!scc.contains(e.m_dst->get_id()))
continue;
if (e.m_projection_idx == 0 || e.m_projection_idx > extract_idx)
continue;
auto it_src = pos.find(e.m_src->get_id());
auto it_dst = pos.find(e.m_dst->get_id());
if (it_src == pos.end() || it_dst == pos.end())
continue;
unsigned i = it_src->second;
unsigned j = it_dst->second;
R[i][j] = mk_union(R[i][j], e.m_label);
}
}
for (unsigned i = 0; i < n; ++i) {
R[i][i] = mk_union(R[i][i], eps_expr);
}
for (unsigned k = 0; k < n; ++k) {
std::vector<expr*> col_k(n, nullptr), row_k(n, nullptr);
for (unsigned i = 0; i < n; ++i)
col_k[i] = R[i][k];
for (unsigned j = 0; j < n; ++j)
row_k[j] = R[k][j];
expr* loop_k = R[k][k];
expr* loop_star = mk_star(loop_k);
for (unsigned i = 0; i < n; ++i) {
if (is_empty(col_k[i]))
continue;
for (unsigned j = 0; j < n; ++j) {
if (is_empty(row_k[j]))
continue;
expr* via_k = mk_concat(mk_concat(col_k[i], loop_star), row_k[j]);
R[i][j] = mk_union(R[i][j], via_k);
}
}
}
expr* result = R[it_root->second][it_root->second];
if (!result)
return m_sg.mk(eps_expr);
expr_ref simplified(result, m);
th_rewriter trw(m);
trw(simplified);
return m_sg.mk(simplified);
}
bool nielsen_graph::try_extract_partial_projection(euf::snode* root_re, euf::snode*& projection_re) {
SASSERT(root_re && root_re->get_expr());
projection_re = nullptr;
@ -1207,7 +1110,10 @@ namespace seq {
if (covered_edges <= prev_covered)
return false;
projection_re = build_projection_regex_from_scc(root_re, scc, m_projection_extract_idx);
// Keep the stabilizer symbolic as a projection operator over the
// (just-marked) explored subautomaton snapshot. Its language is
// refined lazily through projection-aware derivatives.
projection_re = mk_projection_term(root_re, m_projection_extract_idx);
if (!projection_re)
return false;
@ -1222,7 +1128,6 @@ namespace seq {
expr_ref new_arg(v->get_expr(), m);
expr_ref new_l(left, m), new_r(right, m);
expr* arg, *l, *r;
std::cout << "Creating slice for " << mk_pp(new_arg, m) << "; " << mk_pp(new_l, m) << "; " << mk_pp(new_r, m) << std::endl;
if (m_sk.is_slice(new_arg, arg, l, r)) {
new_l = a.mk_add(left, l);
@ -1232,7 +1137,6 @@ namespace seq {
new_arg = arg;
}
expr_ref slice = m_sk.mk_slice(new_arg, new_l, new_r);
std::cout << "Got " << mk_pp(slice, m) << std::endl;
return m_sg.mk(slice);
}
@ -1713,35 +1617,45 @@ namespace seq {
break;
euf::snode* src_re = mem.m_regex;
seq_rewriter rw(m);
expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m);
// Extract the inner char expression from seq.unit(?inner)
expr *inner_char = tok->arg(0)->get_expr();
// substitute the inner char for the derivative variable in d
var_subst vs(m);
d = vs(d, inner_char);
th_rewriter thrw(m);
thrw(d);
// Record concrete minterm edges for src_re so cycle_decomp can
// detect SCCs lazily (mirrors the incremental DFA building from
// concrete char consumption in the loop above).
if (src_re->is_ground()) {
euf::snode_vector mts;
sg.compute_minterms(src_re, mts);
for (euf::snode* mt : mts) {
euf::snode* mt_deriv = sg.brzozowski_deriv(src_re, mt);
if (mt_deriv && !mt_deriv->is_fail())
m_graph.record_partial_derivative_edge(src_re, mt, mt_deriv);
}
euf::snode* next = nullptr;
if (src_re->has_projection()) {
// The generic symbolic derivative cannot see the projection
// operator; route through the projection-aware derivative,
// which yields the ite-residual with π propagated to leaves.
next = sg.brzozowski_deriv(src_re, tok);
}
else {
seq_rewriter rw(m);
expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m);
auto next = sg.mk(d);
// Extract the inner char expression from seq.unit(?inner)
expr *inner_char = tok->arg(0)->get_expr();
// substitute the inner char for the derivative variable in d
var_subst vs(m);
d = vs(d, inner_char);
th_rewriter thrw(m);
thrw(d);
// Record concrete minterm edges for src_re so cycle_decomp can
// detect SCCs lazily (mirrors the incremental DFA building from
// concrete char consumption in the loop above).
if (src_re->is_ground()) {
euf::snode_vector mts;
sg.compute_minterms(src_re, mts);
for (euf::snode* mt : mts) {
euf::snode* mt_deriv = sg.brzozowski_deriv(src_re, mt);
if (mt_deriv && !mt_deriv->is_fail())
m_graph.record_partial_derivative_edge(src_re, mt, mt_deriv);
}
}
next = sg.mk(d);
}
if (next->is_fail()) {
TRACE(seq, tout << "empty regex" << spp(mem.m_regex, m) << " d: " << d << "\n");
TRACE(seq, tout << "empty regex" << spp(mem.m_regex, m) << "\n");
set_general_conflict();
set_conflict(backtrack_reason::regex, mem.m_dep);
return simplify_result::conflict;
@ -3384,7 +3298,10 @@ namespace seq {
uint_set scc;
if (!collect_scc_for_projection(root_re, scc))
return nullptr;
return build_projection_regex_from_scc(root_re, scc, m_projection_extract_idx);
// Symbolic projection over the edges marked by earlier extractions
// (index ≤ current snapshot). No new marking here, mirroring the old
// behaviour of reusing the current extraction index.
return mk_projection_term(root_re, m_projection_extract_idx);
}
// -----------------------------------------------------------------------
@ -3395,6 +3312,10 @@ namespace seq {
// -----------------------------------------------------------------------
bool nielsen_graph::apply_cycle_subsumption(nielsen_node* node) {
TRACE(seq, tout << "cycle_subsumption ENTER node " << node->id() << " with mems:\n";
for (auto const& m2 : node->str_mems())
tout << " [prim=" << m2.is_primitive() << "] "
<< mk_pp(m2.m_str->get_expr(), m) << "" << mk_pp(m2.m_regex->get_expr(), m) << "\n");
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
str_mem const& mem = node->str_mems()[mi];
SASSERT(mem.well_formed());
@ -3418,7 +3339,12 @@ namespace seq {
continue;
// Check L(x_regex) ⊆ L(stabilizer).
if (m_seq_regex->is_language_subset(x_regex, stabilizer) != l_true)
lbool subset = m_seq_regex->is_language_subset(x_regex, stabilizer);
TRACE(seq, tout << "cycle_subsumption check: first=" << mk_pp(first->get_expr(), m)
<< "\n x_regex=" << mk_pp(x_regex->get_expr(), m)
<< "\n stabilizer=" << mk_pp(stabilizer->get_expr(), m)
<< "\n subset=" << subset << "\n");
if (subset != l_true)
continue;
// Subsume: replace x·rest ∈ r with rest ∈ r.
@ -3454,10 +3380,11 @@ namespace seq {
// cycle is detected, project SCC onto stabilizer constraint b.
// Rewrites x into x'·x'' with x' ∈ b*, x'' ∈ complement((b ∩ complement(eps)) · Sigma*).
//
// Here b = Proj{R}{E_SCC}{R} is the language of all paths from R back to R
// in the known SCC edges (including ε), which equals s* for the non-empty
// cycle language s. Equivalently, stabilizer_re from build_projection_regex_from_scc
// is the Kleene closure computed by Floyd-Warshall over the SCC subgraph.
// Here stabilizer_re = π_{Q_SCC,{R}}(R) is the projection operator denoting
// the language of all paths from R back to R that stay within the explored
// subautomaton Q_SCC (including ε), i.e. s* for the non-empty cycle language
// s. It is kept symbolic; its derivative/nullability are evaluated lazily by
// the projection-aware sgraph (paper §3.3) rather than materialized.
//
// The constraint on x'' prevents divergence: x'' may not begin with any
// non-empty word from L(stabilizer_re), so it cannot re-enter the cycle.
@ -3501,13 +3428,22 @@ namespace seq {
euf::snode* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), stabilizer_re->get_expr(), seq_sort));
euf::snode* xpp = get_tail(x, compute_length_expr(xp).get());
euf::snode* xp_xpp = m_sg.mk_concat(xp, xpp);
TRACE(seq, tout << "cycle_decomp build: xp=" << mk_pp(xp->get_expr(), m)
<< "\n xpp=" << mk_pp(xpp->get_expr(), m)
<< "\n xp_xpp=" << mk_pp(xp_xpp->get_expr(), m)
<< "\n xp_xpp.first=" << mk_pp(xp_xpp->first()->get_expr(), m) << "\n");
nielsen_node* child = mk_child(node);
SASSERT(child->m_str_mem[mi] == mem);
nielsen_edge* e = mk_edge(node, child, false);
const nielsen_subst s(x, xp_xpp, nullptr, mem.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
// remove from mi^th element of the child the leading token, as it is immediately subsumed
SASSERT(child->m_str_mem[mi].m_str->first() == xp);
child->m_str_mem[mi].m_str = dir_drop(m_sg, child->m_str_mem[mi].m_str, 1, true);
// x' ∈ stabilizer_re (= s*, all repetitions of the detected cycle)
child->add_str_mem(str_mem(xp, stabilizer_re, mem.m_dep));