3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-05-16 23:25:36 +00:00

Some steps towards partial automatons

This commit is contained in:
CEisenhofer 2026-05-04 18:31:38 +02:00
parent b199b0782a
commit adb9ca4305
2 changed files with 416 additions and 15 deletions

View file

@ -41,6 +41,7 @@ NSB review:
#include <cstdlib>
#include <stack>
#include <unordered_map>
#include <vector>
namespace seq {
@ -507,6 +508,12 @@ namespace seq {
m_fresh_cnt = 0;
m_root_constraints_asserted = false;
m_mod_cnt.reset();
m_partial_dfa_edges.reset();
m_partial_dfa_out.clear();
m_partial_dfa_in.clear();
m_partial_dfa_edge_index.clear();
m_projection_extract_idx = 0;
m_projection_cover_size.reset();
m_length_trail.reset();
m_length_info.reset();
m_dep_mgr.reset();
@ -831,6 +838,289 @@ namespace seq {
return {expr_ref(last_stable_sum, m), last_stable_idx};
}
euf::snode* nielsen_graph::to_partial_label_regex(euf::snode* label) {
if (!label || !label->get_expr())
return nullptr;
expr* e = label->get_expr();
if (m_seq.is_re(e))
return label;
if (m_seq.is_seq(e))
return m_sg.mk(m_seq.re.mk_to_re(e));
if (m_seq.is_char(e)) {
expr_ref u(m_seq.str.mk_unit(e), m);
return m_sg.mk(m_seq.re.mk_to_re(u));
}
return nullptr;
}
void nielsen_graph::record_partial_derivative_edge(euf::snode* src_re, euf::snode* label, euf::snode* dst_re) {
if (!src_re || !dst_re || !src_re->get_expr() || !dst_re->get_expr())
return;
if (!src_re->is_ground() || !dst_re->is_ground())
return;
if (src_re->is_fail() || dst_re->is_fail())
return;
euf::snode* label_re = to_partial_label_regex(label);
if (!label_re || !label_re->get_expr())
return;
if (!m_seq.is_re(label_re->get_expr()) || !label_re->is_ground())
return;
partial_dfa_edge_key key{ src_re->id(), label_re->id(), dst_re->id() };
if (m_partial_dfa_edge_index.find(key) != m_partial_dfa_edge_index.end())
return;
unsigned edge_idx = m_partial_dfa_edges.size();
m_partial_dfa_edge_index.emplace(key, edge_idx);
partial_dfa_edge e;
e.m_src = src_re;
e.m_label = label_re;
e.m_dst = dst_re;
m_partial_dfa_edges.push_back(e);
m_partial_dfa_out[src_re->id()].push_back(edge_idx);
m_partial_dfa_in[dst_re->id()].push_back(edge_idx);
}
bool nielsen_graph::collect_scc_for_projection(euf::snode* root_re, uint_set& scc) const {
scc.reset();
if (!root_re)
return false;
unsigned root_id = root_re->id();
uint_set fwd, bwd;
unsigned_vector stack;
stack.push_back(root_id);
while (!stack.empty()) {
unsigned s = stack.back();
stack.pop_back();
if (fwd.contains(s))
continue;
fwd.insert(s);
auto it = m_partial_dfa_out.find(s);
if (it == m_partial_dfa_out.end())
continue;
for (unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (e.m_dst)
stack.push_back(e.m_dst->id());
}
}
stack.push_back(root_id);
while (!stack.empty()) {
unsigned s = stack.back();
stack.pop_back();
if (bwd.contains(s))
continue;
bwd.insert(s);
auto it = m_partial_dfa_in.find(s);
if (it == m_partial_dfa_in.end())
continue;
for (unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (e.m_src)
stack.push_back(e.m_src->id());
}
}
for (unsigned s : fwd) {
if (bwd.contains(s))
scc.insert(s);
}
if (!scc.contains(root_id))
return false;
if (scc.num_elems() > 1)
return true;
auto it = m_partial_dfa_out.find(root_id);
if (it == m_partial_dfa_out.end())
return false;
for (unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (e.m_dst && e.m_dst->id() == root_id)
return true;
}
return false;
}
unsigned nielsen_graph::mark_scc_projection_edges(uint_set const& scc) {
++m_projection_extract_idx;
unsigned extract_idx = m_projection_extract_idx;
unsigned covered = 0;
for (unsigned src_id : scc) {
auto it = m_partial_dfa_out.find(src_id);
if (it == m_partial_dfa_out.end())
continue;
for (unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
partial_dfa_edge& e = m_partial_dfa_edges[edge_idx];
if (!e.m_dst || !scc.contains(e.m_dst->id()))
continue;
if (e.m_projection_idx == 0)
e.m_projection_idx = extract_idx;
if (e.m_projection_idx <= extract_idx)
++covered;
}
}
return covered;
}
euf::snode* nielsen_graph::build_projection_regex_from_scc(euf::snode* root_re, uint_set const& scc, unsigned extract_idx) {
if (!root_re || !root_re->get_expr())
return nullptr;
sort* seq_sort = nullptr;
if (!m_seq.is_re(root_re->get_expr(), seq_sort) || !seq_sort)
return nullptr;
sort* re_sort = root_re->get_expr()->get_sort();
euf::snode* eps = m_sg.mk(m_seq.re.mk_epsilon(seq_sort));
euf::snode* empty = m_sg.mk(m_seq.re.mk_empty(re_sort));
auto is_empty = [&](euf::snode* re) {
return !re || re->is_fail() || !re->get_expr() || m_seq.re.is_empty(re->get_expr());
};
auto mk_union = [&](euf::snode* a, euf::snode* b) -> euf::snode* {
if (is_empty(a))
return b;
if (is_empty(b))
return a;
if (a == b || a->get_expr() == b->get_expr())
return a;
return m_sg.mk(m_seq.re.mk_union(a->get_expr(), b->get_expr()));
};
auto mk_concat = [&](euf::snode* a, euf::snode* b) -> euf::snode* {
if (is_empty(a) || is_empty(b))
return empty;
if (a == eps || (a->get_expr() && m_seq.re.is_epsilon(a->get_expr())))
return b;
if (b == eps || (b->get_expr() && m_seq.re.is_epsilon(b->get_expr())))
return a;
return m_sg.mk(m_seq.re.mk_concat(a->get_expr(), b->get_expr()));
};
auto mk_star = [&](euf::snode* r) -> euf::snode* {
if (is_empty(r) || r == eps || (r->get_expr() && m_seq.re.is_epsilon(r->get_expr())))
return eps;
if (r->get_expr() && m_seq.re.is_star(r->get_expr()))
return r;
return m_sg.mk(m_seq.re.mk_star(r->get_expr()));
};
unsigned_vector states;
states.reserve(scc.num_elems());
std::unordered_map<unsigned, unsigned> pos;
for (unsigned s : scc) {
pos.emplace(s, states.size());
states.push_back(s);
}
auto it_root = pos.find(root_re->id());
if (it_root == pos.end())
return nullptr;
unsigned n = states.size();
std::vector<std::vector<euf::snode*>> R(n, std::vector<euf::snode*>(n, nullptr));
for (unsigned src_id : states) {
auto it = m_partial_dfa_out.find(src_id);
if (it == m_partial_dfa_out.end())
continue;
for (unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (!e.m_src || !e.m_dst || !e.m_label)
continue;
if (!scc.contains(e.m_dst->id()))
continue;
if (e.m_projection_idx == 0 || e.m_projection_idx > extract_idx)
continue;
auto it_src = pos.find(e.m_src->id());
auto it_dst = pos.find(e.m_dst->id());
if (it_src == pos.end() || it_dst == pos.end())
continue;
unsigned i = it_src->second;
unsigned j = it_dst->second;
R[i][j] = mk_union(R[i][j], e.m_label);
}
}
for (unsigned i = 0; i < n; ++i)
R[i][i] = mk_union(R[i][i], eps);
for (unsigned k = 0; k < n; ++k) {
std::vector<euf::snode*> col_k(n, nullptr), row_k(n, nullptr);
for (unsigned i = 0; i < n; ++i)
col_k[i] = R[i][k];
for (unsigned j = 0; j < n; ++j)
row_k[j] = R[k][j];
euf::snode* loop_k = R[k][k];
euf::snode* loop_star = mk_star(loop_k);
for (unsigned i = 0; i < n; ++i) {
if (is_empty(col_k[i]))
continue;
for (unsigned j = 0; j < n; ++j) {
if (is_empty(row_k[j]))
continue;
euf::snode* via_k = mk_concat(mk_concat(col_k[i], loop_star), row_k[j]);
R[i][j] = mk_union(R[i][j], via_k);
}
}
}
euf::snode* result = R[it_root->second][it_root->second];
if (!result)
return eps;
expr_ref simplified(result->get_expr(), m);
th_rewriter trw(m);
trw(simplified);
return m_sg.mk(simplified);
}
bool nielsen_graph::try_extract_partial_projection(euf::snode* root_re, euf::snode*& projection_re) {
projection_re = nullptr;
if (!root_re || !root_re->get_expr() || !root_re->is_ground())
return false;
uint_set scc;
if (!collect_scc_for_projection(root_re, scc))
return false;
unsigned covered_edges = mark_scc_projection_edges(scc);
unsigned prev_covered = 0;
m_projection_cover_size.find(root_re->id(), prev_covered);
if (covered_edges <= prev_covered)
return false;
projection_re = build_projection_regex_from_scc(root_re, scc, m_projection_extract_idx);
if (!projection_re)
return false;
m_projection_cover_size.insert(root_re->id(), covered_edges);
return true;
}
simplify_result nielsen_node::simplify_and_init(ptr_vector<nielsen_edge> const& cur_path) {
if (m_is_extended)
return simplify_result::proceed;
@ -1132,6 +1422,7 @@ namespace seq {
euf::snode* tok = dir_token(mem.m_str, fwd);
if (!tok || !tok->is_char_or_unit())
break;
euf::snode* src_re = mem.m_regex;
euf::snode* deriv = fwd
? sg.brzozowski_deriv(mem.m_regex, tok)
: reverse_brzozowski_deriv(sg, mem.m_regex, tok);
@ -1143,6 +1434,8 @@ namespace seq {
set_conflict(backtrack_reason::regex, mem.m_dep);
return simplify_result::conflict;
}
if (fwd)
m_graph.record_partial_derivative_edge(src_re, tok, deriv);
mem.m_str = dir_drop(sg, mem.m_str, 1, fwd);
mem.m_regex = deriv;
}
@ -2329,9 +2622,9 @@ namespace seq {
if (apply_eq_split(node))
return ++m_stats.m_mod_eq_split, true;
// Priority 6: StarIntr - stabilizer introduction for regex cycles
// if (apply_star_intr(node))
// return ++m_stats.m_mod_star_intr, true;
// Priority 6: CycleDecomp - stabilizer introduction for regex cycles using partial DFA projection
if (apply_cycle_decomposition(node))
return ++m_stats.m_mod_star_intr, true;
// Priority 7: GPowerIntr - ground power introduction
if (apply_gpower_intr(node))
@ -2743,16 +3036,56 @@ namespace seq {
}
// -----------------------------------------------------------------------
// Modifier: apply_star_intr
// Star introduction: for a str_mem x·s ∈ R where a cycle is detected
// (backedge exists), introduce a stabilizer constraint x ∈ base*.
// Creates a child that splits x into pr·po, adds pr ∈ base* and
// po·s ∈ R, with a blocking constraint on po.
// Enhanced to use strengthened_stabilizer and register via add_stabilizer.
// mirrors ZIPT's StarIntrModifier (StarIntrModifier.cs:22-85)
// Modifier: apply_cycle_decomposition
// Cycle decomposition: for a str_mem x·s ∈ R where a partial DFA
// cycle is detected, project SCC onto stabilizer constraint b.
// Rewrites x into x'·x'' with x' ∈ b*, x'' ∈ complement((b ∩ complement(eps)) · Sigma*).
// -----------------------------------------------------------------------
bool nielsen_graph::apply_star_intr(nielsen_node* node) {
bool nielsen_graph::apply_cycle_decomposition(nielsen_node* node) {
return false; // For now, disable
if (!node->backedge())
return false;
// Look for a str_mem with a variable-headed string
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
str_mem const& mem = node->str_mems()[mi];
if (!mem.m_str || !mem.m_regex)
continue;
if (mem.is_primitive())
continue;
euf::snode* first = mem.m_str->first();
if (!first || !first->is_var())
continue;
euf::snode* x = first;
euf::snode* stabilizer_re = nullptr;
if (!try_extract_partial_projection(mem.m_regex, stabilizer_re))
continue;
SASSERT(stabilizer_re);
// Construct the replacement x = x' * x''
expr_ref xp_expr(m_seq.str.mk_string(zstring()), m); // temp dummy init
expr_ref xpp_expr(m_seq.str.mk_string(zstring()), m);
euf::snode* xp = m_sg.mk(xp_expr);
euf::snode* xpp = m_sg.mk(xpp_expr);
euf::snode* xp_xpp = m_sg.mk_concat(xp, xpp);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(x, xp_xpp, nullptr, mem.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
// Add constraints: x' \in stabilizer_re
child->add_str_mem(str_mem(xp, stabilizer_re, mem.m_dep));
// x'' \in complement((b ∩ complement(eps)) · Sigma*)
// Placeholder: for now we rely on subsequent bounds/splits
return true;
}
return false;
}
@ -3448,6 +3781,7 @@ namespace seq {
SASSERT(deriv);
if (deriv->is_fail())
continue;
record_partial_derivative_edge(mem.m_regex, mt, deriv);
// std::cout << "Result: " << spp(deriv, m) << std::endl;
SASSERT(m_seq_regex);

View file

@ -797,6 +797,33 @@ namespace seq {
class nielsen_graph {
friend class nielsen_node;
friend class nielsen_edge;
struct partial_dfa_edge {
euf::snode* m_src = nullptr;
euf::snode* m_label = nullptr; // one-character regex label (char/minterm)
euf::snode* m_dst = nullptr;
unsigned m_projection_idx = 0; // first extraction index that included this edge
};
struct partial_dfa_edge_key {
unsigned m_src = 0;
unsigned m_label = 0;
unsigned m_dst = 0;
bool operator==(partial_dfa_edge_key const& o) const {
return m_src == o.m_src && m_label == o.m_label && m_dst == o.m_dst;
}
};
struct partial_dfa_edge_key_hash {
size_t operator()(partial_dfa_edge_key const& k) const {
size_t h = static_cast<size_t>(k.m_src);
h = (h * 1315423911u) ^ static_cast<size_t>(k.m_label + 0x9e3779b9u);
h = (h * 2654435761u) ^ static_cast<size_t>(k.m_dst + 0x85ebca6bu);
return h;
}
};
ast_manager& m;
arith_util a;
seq_util& m_seq;
@ -851,6 +878,17 @@ namespace seq {
// (e.g., explain_conflict) can call mk_join / linearize.
mutable dep_manager m_dep_mgr;
// Global partial derivative DFA (monotone across DFS/backtracking).
// States are regex snodes; edges are discovered derivatives labeled by
// one-character regexes (concrete chars or minterms).
vector<partial_dfa_edge> m_partial_dfa_edges;
std::unordered_map<unsigned, unsigned_vector> m_partial_dfa_out;
std::unordered_map<unsigned, unsigned_vector> m_partial_dfa_in;
std::unordered_map<partial_dfa_edge_key, unsigned, partial_dfa_edge_key_hash> m_partial_dfa_edge_index;
unsigned m_projection_extract_idx = 0;
// Per regex-state: size of SCC-edge coverage at last successful projection.
u_map<unsigned> m_projection_cover_size;
public:
// Construct with a caller-supplied solver. Ownership is NOT transferred;
@ -1030,6 +1068,35 @@ namespace seq {
// Mirrors ZIPT NielsenNode.CheckRegex)
bool check_leaf_regex(nielsen_node const& node, dep_tracker& dep);
// -------------------------------------------------------------------
// Partial DFA projection helpers
// -------------------------------------------------------------------
// Record a discovered derivative edge in the global partial DFA.
// The `label` may be a concrete string token (converted to to_re)
// or an already-regular-expression minterm.
void record_partial_derivative_edge(euf::snode* src_re, euf::snode* label, euf::snode* dst_re);
// Convert a transition label (string token or regex minterm) into a
// one-character regex snode used by the partial DFA.
euf::snode* to_partial_label_regex(euf::snode* label);
// Collect the SCC containing root_re in the current partial DFA.
// Returns false if no cyclic SCC containing root_re exists.
bool collect_scc_for_projection(euf::snode* root_re, uint_set& scc) const;
// Mark SCC edges with a monotone extraction index and return the
// currently covered edge count for this extraction.
unsigned mark_scc_projection_edges(uint_set const& scc);
// Build regex equivalent to proj(root_re, E_scc, {root_re}) using the
// currently marked SCC edges (projection index <= extract_idx).
euf::snode* build_projection_regex_from_scc(euf::snode* root_re, uint_set const& scc, unsigned extract_idx);
// Try to extract a stronger projection for root_re. Returns true and
// stores it in projection_re iff SCC coverage has grown.
bool try_extract_partial_projection(euf::snode* root_re, euf::snode*& projection_re);
// Apply the Parikh image filter to a node: generate modular length
// constraints from regex memberships and append them to the node's
// constraints. Also performs a lightweight feasibility pre-check;
@ -1109,10 +1176,10 @@ namespace seq {
// branch into s ∈ th under condition c, and s ∈ el under condition ¬c.
bool apply_regex_if_split(nielsen_node* node);
// star introduction: for a str_mem x·s ∈ R where a cycle is detected
// (backedge exists), introduce stabilizer: x ∈ base* with x split.
// mirrors ZIPT's StarIntrModifier
bool apply_star_intr(nielsen_node* node);
// cycle decomposition: for a str_mem x·s ∈ R where a partial DFA
// cycle is detected, project SCC onto stabilizer constraint b.
// Rewrites x into x'·x'' with x' ∈ b*, x'' ∈ complement((b ∩ complement(eps)) · Sigma*).
bool apply_cycle_decomposition(nielsen_node* node);
// generalized power introduction: for an equation where one head is
// a variable v and the other side has ground prefix + a variable x