3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 15:16:29 +00:00

Avoid unnecessary regex cycle splits

This commit is contained in:
CEisenhofer 2026-05-29 19:14:08 +02:00
parent 70031b674c
commit cebe57dffa
2 changed files with 50 additions and 33 deletions

View file

@ -610,7 +610,6 @@ namespace seq {
m_partial_dfa_edge_index.clear();
m_partial_dfa_pin.reset();
m_projection_extract_idx = 0;
m_projection_cover_size.reset();
//m_length_trail.reset();
//m_length_info.reset();
m_dep_mgr.reset();
@ -967,7 +966,16 @@ namespace seq {
expr* label_e = label_re->get_expr();
expr* dst_e = dst_re->get_expr();
partial_dfa_edge_key key{ src_e->get_id(), label_e->get_id(), dst_e->get_id() };
// Deduplicate transitions by (src, dst) only — NOT by label. The
// projection operator is parameterized by the explored state set Q
// (the Brzozowski automaton is deterministic, so the only a-transition
// out of a state is to δ_a(state)); edge labels are never consulted by
// projection_state_in_Q / collect_scc_for_projection / the projection
// derivative. Keying by label would record the SAME transition twice
// when it is discovered once as a concrete char (to_re "a") and once as
// a minterm range ([a-a]), spuriously inflating the SCC edge count and
// re-triggering cycle decomposition as the derivation walks the cycle.
partial_dfa_edge_key key{ src_e->get_id(), 0, dst_e->get_id() };
if (m_partial_dfa_edge_index.contains(key))
return;
@ -1068,10 +1076,37 @@ namespace seq {
unsigned nielsen_graph::mark_scc_projection_edges(uint_set const& scc) {
// scc contains expr ids (see collect_scc_for_projection).
//
// Returns the number of edges *newly* added to the projection coverage
// by this call (edges that were previously unmarked). Crucially, the
// monotone extraction index is bumped ONLY when there is something new
// to mark. This makes "the explored SCC has grown" a *global* property
// of the SCC's edge set rather than a per-entry-state one: re-visiting
// an already-fully-marked SCC from a different state (e.g. a derivative
// state br of r, which shares the SCC {r, br}) marks nothing new and is
// therefore not treated as a fresh cycle to decompose. Without this,
// each state of the cycle would trigger its own redundant decomposition
// as the derivation walks around the SCC.
unsigned newly_marked = 0;
for (unsigned src_id : scc) {
auto it = m_partial_dfa_out.find(src_id);
if (it == m_partial_dfa_out.end())
continue;
for (const unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (!e.m_dst || !scc.contains(e.m_dst->get_id()))
continue;
if (e.m_projection_idx == 0)
++newly_marked;
}
}
if (newly_marked == 0)
return 0;
++m_projection_extract_idx;
const unsigned extract_idx = m_projection_extract_idx;
unsigned covered = 0;
for (unsigned src_id : scc) {
auto it = m_partial_dfa_out.find(src_id);
if (it == m_partial_dfa_out.end())
@ -1084,11 +1119,9 @@ namespace seq {
continue;
if (e.m_projection_idx == 0)
e.m_projection_idx = extract_idx;
if (e.m_projection_idx <= extract_idx)
++covered;
}
}
return covered;
return newly_marked;
}
bool nielsen_graph::try_extract_partial_projection(euf::snode* root_re, euf::snode*& projection_re) {
@ -1101,13 +1134,12 @@ namespace seq {
if (!collect_scc_for_projection(root_re, scc))
return false;
// Key m_projection_cover_size by expr id (stable across pops), not
// snode id (reused on pop).
const unsigned root_expr_id = root_re->get_expr()->get_id();
const unsigned covered_edges = mark_scc_projection_edges(scc);
unsigned prev_covered = 0;
m_projection_cover_size.find(root_expr_id, prev_covered);
if (covered_edges <= prev_covered)
// Novelty = the SCC's edge set grew (some edge was previously unmarked).
// mark_scc_projection_edges advances the snapshot index only in that
// case, so a re-visit of an already-fully-marked SCC (from any of its
// states) marks nothing new and does not re-trigger a decomposition.
const unsigned newly_marked = mark_scc_projection_edges(scc);
if (newly_marked == 0)
return false;
// Keep the stabilizer symbolic as a projection operator over the
@ -1117,7 +1149,6 @@ namespace seq {
if (!projection_re)
return false;
m_projection_cover_size.insert(root_expr_id, covered_edges);
return true;
}
@ -3312,10 +3343,6 @@ namespace seq {
// -----------------------------------------------------------------------
bool nielsen_graph::apply_cycle_subsumption(nielsen_node* node) {
TRACE(seq, tout << "cycle_subsumption ENTER node " << node->id() << " with mems:\n";
for (auto const& m2 : node->str_mems())
tout << " [prim=" << m2.is_primitive() << "] "
<< mk_pp(m2.m_str->get_expr(), m) << "" << mk_pp(m2.m_regex->get_expr(), m) << "\n");
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
str_mem const& mem = node->str_mems()[mi];
SASSERT(mem.well_formed());
@ -3339,12 +3366,7 @@ namespace seq {
continue;
// Check L(x_regex) ⊆ L(stabilizer).
lbool subset = m_seq_regex->is_language_subset(x_regex, stabilizer);
TRACE(seq, tout << "cycle_subsumption check: first=" << mk_pp(first->get_expr(), m)
<< "\n x_regex=" << mk_pp(x_regex->get_expr(), m)
<< "\n stabilizer=" << mk_pp(stabilizer->get_expr(), m)
<< "\n subset=" << subset << "\n");
if (subset != l_true)
if (m_seq_regex->is_language_subset(x_regex, stabilizer) != l_true)
continue;
// Subsume: replace x·rest ∈ r with rest ∈ r.
@ -3428,10 +3450,6 @@ namespace seq {
euf::snode* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), stabilizer_re->get_expr(), seq_sort));
euf::snode* xpp = get_tail(x, compute_length_expr(xp).get());
euf::snode* xp_xpp = m_sg.mk_concat(xp, xpp);
TRACE(seq, tout << "cycle_decomp build: xp=" << mk_pp(xp->get_expr(), m)
<< "\n xpp=" << mk_pp(xpp->get_expr(), m)
<< "\n xp_xpp=" << mk_pp(xp_xpp->get_expr(), m)
<< "\n xp_xpp.first=" << mk_pp(xp_xpp->first()->get_expr(), m) << "\n");
nielsen_node* child = mk_child(node);
SASSERT(child->m_str_mem[mi] == mem);
@ -4159,7 +4177,6 @@ namespace seq {
//
euf::snode* fresh_char = m_sg.mk(get_or_create_char_var(first));
euf::snode* tail = get_tail(first, 1, true);
euf::snode* replacement = m_sg.mk_concat(fresh_char, tail);
nielsen_node* child = mk_child(node);

View file

@ -958,10 +958,10 @@ namespace seq {
// egraph cannot release them on pop. We never shrink this — the
// cache is meant to be monotone.
expr_ref_vector m_partial_dfa_pin;
// Monotone snapshot index ν. Bumped by mark_scc_projection_edges only
// when the explored SCC's edge set actually grows; identifies which
// partial-DFA edges (m_projection_idx ≤ ν) belong to a projection's Q.
unsigned m_projection_extract_idx = 0;
// Per regex-state: size of SCC-edge coverage at last successful projection.
// Keyed by the regex expression's id (NOT the snode id).
u_map<unsigned> m_projection_cover_size;
public: