3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-07-04 22:36:10 +00:00

Prevent expressions in partial dfa being freed to early

This commit is contained in:
CEisenhofer 2026-05-26 13:07:38 +02:00
parent c18aa647e1
commit 4cd908345a
5 changed files with 125 additions and 62 deletions

View file

@ -28,7 +28,7 @@ namespace euf {
m_rewriter(m), m_rewriter(m),
m_egraph(eg), m_egraph(eg),
m_str_sort(m_seq.str.mk_string_sort(), m), m_str_sort(m_seq.str.mk_string_sort(), m),
m_add_plugin(add_plugin) { m_pin(m) {
// create seq_plugin and register it with the egraph // create seq_plugin and register it with the egraph
if (add_plugin) if (add_plugin)
m_egraph.add_plugin(alloc(seq_plugin, m_egraph, this)); m_egraph.add_plugin(alloc(seq_plugin, m_egraph, this));
@ -341,7 +341,12 @@ namespace euf {
unsigned eid = e->get_id(); unsigned eid = e->get_id();
m_expr2snode.reserve(eid + 1, nullptr); m_expr2snode.reserve(eid + 1, nullptr);
m_expr2snode[eid] = n; m_expr2snode[eid] = n;
// pin expression via egraph (the egraph has an expr trail) // Pin the expression for the lifetime of the sgraph: the egraph trail
// would otherwise release it on pop, but the underlying snode lives in
// m_region (never freed) and may still be referenced by clients past
// that pop. See the comment on m_pin in euf_sgraph.h.
m_pin.push_back(e);
// also keep the enode pinning behaviour so congruence closure sees e
mk_enode(e); mk_enode(e);
++m_stats.m_num_nodes; ++m_stats.m_num_nodes;
return n; return n;

View file

@ -89,7 +89,16 @@ namespace euf {
unsigned_vector m_scopes; unsigned_vector m_scopes;
unsigned m_num_scopes = 0; unsigned m_num_scopes = 0;
stats m_stats; stats m_stats;
bool m_add_plugin; // whether sgraph created the seq_plugin
// Pins every expression that any (live or popped) snode references via
// m_expr. snodes are allocated in m_region — which is never freed —
// but their m_expr field is owned by the egraph trail. Without this
// pin the egraph would release expressions on pop while clients still
// hold the matching snode* (e.g. inside nielsen_node str_mems, edge
// substitutions, or the partial-DFA cache), turning every later
// get_expr() into a use-after-free. The pin grows monotonically; it
// is dropped only when sgraph itself is destroyed.
expr_ref_vector m_pin;
// maps expression id to snode // maps expression id to snode
ptr_vector<snode> m_expr2snode; ptr_vector<snode> m_expr2snode;

View file

@ -430,6 +430,7 @@ namespace seq {
m_sk(m, m_rw), m_sk(m, m_rw),
m_length_solver(solver), m_length_solver(solver),
m_context_solver(ctx_solver), m_context_solver(ctx_solver),
m_partial_dfa_pin(sg.get_manager()),
m_parikh(alloc(seq_parikh, sg)), m_parikh(alloc(seq_parikh, sg)),
m_seq_regex(alloc(seq::seq_regex, sg)) {} m_seq_regex(alloc(seq::seq_regex, sg)) {}
@ -526,6 +527,7 @@ namespace seq {
m_partial_dfa_out.clear(); m_partial_dfa_out.clear();
m_partial_dfa_in.clear(); m_partial_dfa_in.clear();
m_partial_dfa_edge_index.clear(); m_partial_dfa_edge_index.clear();
m_partial_dfa_pin.reset();
m_projection_extract_idx = 0; m_projection_extract_idx = 0;
m_projection_cover_size.reset(); m_projection_cover_size.reset();
//m_length_trail.reset(); //m_length_trail.reset();
@ -880,29 +882,44 @@ namespace seq {
if (!m_seq.is_re(label_re->get_expr()) || !label_re->is_ground()) if (!m_seq.is_re(label_re->get_expr()) || !label_re->is_ground())
return; return;
partial_dfa_edge_key key{ src_re->id(), label_re->id(), dst_re->id() }; expr* src_e = src_re->get_expr();
expr* label_e = label_re->get_expr();
expr* dst_e = dst_re->get_expr();
partial_dfa_edge_key key{ src_e->get_id(), label_e->get_id(), dst_e->get_id() };
if (m_partial_dfa_edge_index.contains(key)) if (m_partial_dfa_edge_index.contains(key))
return; return;
// Pin each expression so the egraph cannot release it on pop while
// we still reference it from the cache. Bumping the ref count of an
// already-pinned expression is harmless; the wasted slot is bounded
// by the unique-edge count.
m_partial_dfa_pin.push_back(src_e);
m_partial_dfa_pin.push_back(label_e);
m_partial_dfa_pin.push_back(dst_e);
unsigned edge_idx = m_partial_dfa_edges.size(); unsigned edge_idx = m_partial_dfa_edges.size();
m_partial_dfa_edge_index.emplace(key, edge_idx); m_partial_dfa_edge_index.emplace(key, edge_idx);
partial_dfa_edge e; partial_dfa_edge e;
e.m_src = src_re; e.m_src = src_e;
e.m_label = label_re; e.m_label = label_e;
e.m_dst = dst_re; e.m_dst = dst_e;
m_partial_dfa_edges.push_back(e); m_partial_dfa_edges.push_back(e);
m_partial_dfa_out[src_re->id()].push_back(edge_idx); m_partial_dfa_out[src_e->get_id()].push_back(edge_idx);
m_partial_dfa_in[dst_re->id()].push_back(edge_idx); m_partial_dfa_in[dst_e->get_id()].push_back(edge_idx);
} }
bool nielsen_graph::collect_scc_for_projection(euf::snode* root_re, uint_set& scc) const { bool nielsen_graph::collect_scc_for_projection(euf::snode* root_re, uint_set& scc) const {
scc.reset(); scc.reset();
if (!root_re) if (!root_re || !root_re->get_expr())
return false; return false;
const unsigned root_id = root_re->id(); // scc, fwd, bwd contain expression ids (matching the keys in
// m_partial_dfa_out / m_partial_dfa_in). Expression ids are stable
// across sgraph pops because we pin them in m_partial_dfa_pin.
const unsigned root_id = root_re->get_expr()->get_id();
uint_set fwd, bwd; uint_set fwd, bwd;
unsigned_vector stack; unsigned_vector stack;
@ -921,7 +938,7 @@ namespace seq {
continue; continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx]; partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (e.m_dst) if (e.m_dst)
stack.push_back(e.m_dst->id()); stack.push_back(e.m_dst->get_id());
} }
} }
@ -940,7 +957,7 @@ namespace seq {
continue; continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx]; partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (e.m_src) if (e.m_src)
stack.push_back(e.m_src->id()); stack.push_back(e.m_src->get_id());
} }
} }
@ -962,13 +979,14 @@ namespace seq {
if (edge_idx >= m_partial_dfa_edges.size()) if (edge_idx >= m_partial_dfa_edges.size())
continue; continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx]; partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (e.m_dst && e.m_dst->id() == root_id) if (e.m_dst && e.m_dst->get_id() == root_id)
return true; return true;
} }
return false; return false;
} }
unsigned nielsen_graph::mark_scc_projection_edges(uint_set const& scc) { unsigned nielsen_graph::mark_scc_projection_edges(uint_set const& scc) {
// scc contains expr ids (see collect_scc_for_projection).
++m_projection_extract_idx; ++m_projection_extract_idx;
const unsigned extract_idx = m_projection_extract_idx; const unsigned extract_idx = m_projection_extract_idx;
unsigned covered = 0; unsigned covered = 0;
@ -981,7 +999,7 @@ namespace seq {
if (edge_idx >= m_partial_dfa_edges.size()) if (edge_idx >= m_partial_dfa_edges.size())
continue; continue;
partial_dfa_edge& e = m_partial_dfa_edges[edge_idx]; partial_dfa_edge& e = m_partial_dfa_edges[edge_idx];
if (!e.m_dst || !scc.contains(e.m_dst->id())) if (!e.m_dst || !scc.contains(e.m_dst->get_id()))
continue; continue;
if (e.m_projection_idx == 0) if (e.m_projection_idx == 0)
e.m_projection_idx = extract_idx; e.m_projection_idx = extract_idx;
@ -1000,39 +1018,52 @@ namespace seq {
SASSERT(seq_sort); SASSERT(seq_sort);
sort* re_sort = root_re->get_expr()->get_sort(); sort* re_sort = root_re->get_expr()->get_sort();
euf::snode* eps = m_sg.mk(m_seq.re.mk_epsilon(seq_sort));
euf::snode* empty = m_sg.mk(m_seq.re.mk_empty(re_sort));
auto is_empty = [&](const euf::snode* re) -> bool { // Floyd-Warshall is done over expressions, not snodes: every label
return !re || re->is_fail() || !re->get_expr() || m_seq.re.is_empty(re->get_expr()); // edge that feeds this matrix comes from m_partial_dfa_edges whose
// labels (and src/dst) may correspond to snodes that no longer exist
// at the current sgraph scope. We work entirely with pinned exprs
// here and materialize a single snode for the simplified result at
// the very end. All intermediate exprs are kept alive by an
// expr_ref_vector that lives for the duration of this call.
expr_ref_vector pin(m);
expr* eps_expr = m_seq.re.mk_epsilon(seq_sort);
expr* empty_expr = m_seq.re.mk_empty(re_sort);
pin.push_back(eps_expr);
pin.push_back(empty_expr);
auto is_empty = [&](expr* re) -> bool {
return !re || m_seq.re.is_empty(re);
}; };
auto mk_union = [&](euf::snode* a, euf::snode* b) -> euf::snode* { auto is_eps = [&](expr* re) -> bool {
if (is_empty(a)) return re && m_seq.re.is_epsilon(re);
return b;
if (is_empty(b))
return a;
if (a == b || a->get_expr() == b->get_expr())
return a;
return m_sg.mk(m_seq.re.mk_union(a->get_expr(), b->get_expr()));
}; };
auto mk_concat = [&](euf::snode* a, euf::snode* b) -> euf::snode* { auto mk_union = [&](expr* a, expr* b) -> expr* {
if (is_empty(a) || is_empty(b)) if (is_empty(a)) return b;
return empty; if (is_empty(b)) return a;
if (a == eps || (a->get_expr() && m_seq.re.is_epsilon(a->get_expr()))) if (a == b) return a;
return b; expr* r = m_seq.re.mk_union(a, b);
if (b == eps || (b->get_expr() && m_seq.re.is_epsilon(b->get_expr()))) pin.push_back(r);
return a; return r;
return m_sg.mk(m_seq.re.mk_concat(a->get_expr(), b->get_expr()));
}; };
auto mk_star = [&](euf::snode* r) -> euf::snode* { auto mk_concat = [&](expr* a, expr* b) -> expr* {
if (is_empty(r) || r == eps || (r->get_expr() && m_seq.re.is_epsilon(r->get_expr()))) if (is_empty(a) || is_empty(b)) return empty_expr;
return eps; if (is_eps(a)) return b;
if (r->get_expr() && m_seq.re.is_star(r->get_expr())) if (is_eps(b)) return a;
return r; expr* r = m_seq.re.mk_concat(a, b);
return m_sg.mk(m_seq.re.mk_star(r->get_expr())); pin.push_back(r);
return r;
};
auto mk_star = [&](expr* r) -> expr* {
if (is_empty(r) || is_eps(r)) return eps_expr;
if (r && m_seq.re.is_star(r)) return r;
expr* s = m_seq.re.mk_star(r);
pin.push_back(s);
return s;
}; };
unsigned_vector states; unsigned_vector states;
@ -1043,12 +1074,13 @@ namespace seq {
states.push_back(s); states.push_back(s);
} }
auto it_root = pos.find(root_re->id()); // scc / pos are keyed by expr id (see collect_scc_for_projection).
auto it_root = pos.find(root_re->get_expr()->get_id());
if (it_root == pos.end()) if (it_root == pos.end())
return nullptr; return nullptr;
unsigned n = states.size(); unsigned n = states.size();
std::vector<std::vector<euf::snode*>> R(n, std::vector<euf::snode*>(n, nullptr)); std::vector<std::vector<expr*>> R(n, std::vector<expr*>(n, nullptr));
for (unsigned src_id : states) { for (unsigned src_id : states) {
auto it = m_partial_dfa_out.find(src_id); auto it = m_partial_dfa_out.find(src_id);
@ -1060,12 +1092,12 @@ namespace seq {
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx]; partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (!e.m_src || !e.m_dst || !e.m_label) if (!e.m_src || !e.m_dst || !e.m_label)
continue; continue;
if (!scc.contains(e.m_dst->id())) if (!scc.contains(e.m_dst->get_id()))
continue; continue;
if (e.m_projection_idx == 0 || e.m_projection_idx > extract_idx) if (e.m_projection_idx == 0 || e.m_projection_idx > extract_idx)
continue; continue;
auto it_src = pos.find(e.m_src->id()); auto it_src = pos.find(e.m_src->get_id());
auto it_dst = pos.find(e.m_dst->id()); auto it_dst = pos.find(e.m_dst->get_id());
if (it_src == pos.end() || it_dst == pos.end()) if (it_src == pos.end() || it_dst == pos.end())
continue; continue;
unsigned i = it_src->second; unsigned i = it_src->second;
@ -1075,17 +1107,17 @@ namespace seq {
} }
for (unsigned i = 0; i < n; ++i) { for (unsigned i = 0; i < n; ++i) {
R[i][i] = mk_union(R[i][i], eps); R[i][i] = mk_union(R[i][i], eps_expr);
} }
for (unsigned k = 0; k < n; ++k) { for (unsigned k = 0; k < n; ++k) {
std::vector<euf::snode*> col_k(n, nullptr), row_k(n, nullptr); std::vector<expr*> col_k(n, nullptr), row_k(n, nullptr);
for (unsigned i = 0; i < n; ++i) for (unsigned i = 0; i < n; ++i)
col_k[i] = R[i][k]; col_k[i] = R[i][k];
for (unsigned j = 0; j < n; ++j) for (unsigned j = 0; j < n; ++j)
row_k[j] = R[k][j]; row_k[j] = R[k][j];
euf::snode* loop_k = R[k][k]; expr* loop_k = R[k][k];
euf::snode* loop_star = mk_star(loop_k); expr* loop_star = mk_star(loop_k);
for (unsigned i = 0; i < n; ++i) { for (unsigned i = 0; i < n; ++i) {
if (is_empty(col_k[i])) if (is_empty(col_k[i]))
@ -1093,17 +1125,17 @@ namespace seq {
for (unsigned j = 0; j < n; ++j) { for (unsigned j = 0; j < n; ++j) {
if (is_empty(row_k[j])) if (is_empty(row_k[j]))
continue; continue;
euf::snode* via_k = mk_concat(mk_concat(col_k[i], loop_star), row_k[j]); expr* via_k = mk_concat(mk_concat(col_k[i], loop_star), row_k[j]);
R[i][j] = mk_union(R[i][j], via_k); R[i][j] = mk_union(R[i][j], via_k);
} }
} }
} }
euf::snode* result = R[it_root->second][it_root->second]; expr* result = R[it_root->second][it_root->second];
if (!result) if (!result)
return eps; return m_sg.mk(eps_expr);
expr_ref simplified(result->get_expr(), m); expr_ref simplified(result, m);
th_rewriter trw(m); th_rewriter trw(m);
trw(simplified); trw(simplified);
return m_sg.mk(simplified); return m_sg.mk(simplified);
@ -1119,9 +1151,12 @@ namespace seq {
if (!collect_scc_for_projection(root_re, scc)) if (!collect_scc_for_projection(root_re, scc))
return false; return false;
// Key m_projection_cover_size by expr id (stable across pops), not
// snode id (reused on pop).
const unsigned root_expr_id = root_re->get_expr()->get_id();
const unsigned covered_edges = mark_scc_projection_edges(scc); const unsigned covered_edges = mark_scc_projection_edges(scc);
unsigned prev_covered = 0; unsigned prev_covered = 0;
m_projection_cover_size.find(root_re->id(), prev_covered); m_projection_cover_size.find(root_expr_id, prev_covered);
if (covered_edges <= prev_covered) if (covered_edges <= prev_covered)
return false; return false;
@ -1129,7 +1164,7 @@ namespace seq {
if (!projection_re) if (!projection_re)
return false; return false;
m_projection_cover_size.insert(root_re->id(), covered_edges); m_projection_cover_size.insert(root_expr_id, covered_edges);
return true; return true;
} }
@ -4466,7 +4501,6 @@ namespace seq {
// std::endl; // std::endl;
if (lit != sat::null_literal) { if (lit != sat::null_literal) {
node->set_external_conflict(lit, c.dep); node->set_external_conflict(lit, c.dep);
std::cout << "External conflict: " << mk_pp(c.fml, m) << " with literal " << lit << std::endl;
return; return;
} }
assert_to_subsolver(c); assert_to_subsolver(c);

View file

@ -806,10 +806,16 @@ namespace seq {
friend class nielsen_node; friend class nielsen_node;
friend class nielsen_edge; friend class nielsen_edge;
// Edge endpoints are stored as expr* (not snode*) because the cache
// must survive sgraph pops. snodes are allocated in a region that is
// never freed, but their m_expr field is owned by the egraph trail and
// becomes dangling on pop. We pin the referenced expressions via
// m_partial_dfa_pin so their ids stay stable, and we recover an snode
// at the current scope via m_sg.mk(expr) only when we actually need one.
struct partial_dfa_edge { struct partial_dfa_edge {
euf::snode* m_src = nullptr; expr* m_src = nullptr;
euf::snode* m_label = nullptr; // one-character regex label (char/minterm) expr* m_label = nullptr; // one-character regex label (char/minterm)
euf::snode* m_dst = nullptr; expr* m_dst = nullptr;
unsigned m_projection_idx = 0; // first extraction index that included this edge unsigned m_projection_idx = 0; // first extraction index that included this edge
}; };
@ -885,15 +891,23 @@ namespace seq {
// (e.g., explain_conflict) can call mk_join / linearize. // (e.g., explain_conflict) can call mk_join / linearize.
mutable dep_manager m_dep_mgr; mutable dep_manager m_dep_mgr;
// Global partial derivative DFA (monotone across DFS/backtracking). // Global partial derivative DFA (monotone across DFS/backtracking and
// States are regex snodes; edges are discovered derivatives labeled by // across sgraph push/pop). States are regex expressions (pinned in
// m_partial_dfa_pin); edges are discovered derivatives labeled by
// one-character regexes (concrete chars or minterms). // one-character regexes (concrete chars or minterms).
// All maps below are keyed by expr->get_id(): stable for as long as
// the expression is pinned, unlike snode->id() which is reused on pop.
vector<partial_dfa_edge> m_partial_dfa_edges; vector<partial_dfa_edge> m_partial_dfa_edges;
std::unordered_map<unsigned, unsigned_vector> m_partial_dfa_out; std::unordered_map<unsigned, unsigned_vector> m_partial_dfa_out;
std::unordered_map<unsigned, unsigned_vector> m_partial_dfa_in; std::unordered_map<unsigned, unsigned_vector> m_partial_dfa_in;
std::unordered_map<partial_dfa_edge_key, unsigned, partial_dfa_edge_key_hash> m_partial_dfa_edge_index; std::unordered_map<partial_dfa_edge_key, unsigned, partial_dfa_edge_key_hash> m_partial_dfa_edge_index;
// Pins every expression referenced by m_partial_dfa_edges so the
// egraph cannot release them on pop. We never shrink this — the
// cache is meant to be monotone.
expr_ref_vector m_partial_dfa_pin;
unsigned m_projection_extract_idx = 0; unsigned m_projection_extract_idx = 0;
// Per regex-state: size of SCC-edge coverage at last successful projection. // Per regex-state: size of SCC-edge coverage at last successful projection.
// Keyed by the regex expression's id (NOT the snode id).
u_map<unsigned> m_projection_cover_size; u_map<unsigned> m_projection_cover_size;

View file

@ -468,6 +468,7 @@ namespace seq {
} }
} else if (tok->is_unit()) { } else if (tok->is_unit()) {
// seq.unit with non-literal character: show the character expression // seq.unit with non-literal character: show the character expression
std::cout << mk_pp(e, m) << std::endl;
expr* ch = to_app(e)->get_arg(0); expr* ch = to_app(e)->get_arg(0);
if (is_app(ch) && to_app(ch)->get_num_args() == 0) if (is_app(ch) && to_app(ch)->get_num_args() == 0)
result += "[" + dot_html_escape(to_app(ch)->get_decl()->get_name().str()) + "]"; result += "[" + dot_html_escape(to_app(ch)->get_decl()->get_name().str()) + "]";