3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-29 11:58:51 +00:00

wip(seq): clean_leaf cofactors, mk_union_core simplifications, re_is_empty antimirov emptiness check

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Margus Veanes 2026-06-26 00:36:14 +03:00
parent 3d42d997c6
commit 149549b946
7 changed files with 216 additions and 22 deletions

View file

@ -461,6 +461,24 @@ namespace smt {
if (re().is_empty(r))
//trivially true
return;
// When one side is re.none the equation is a pure emptiness check on
// the other regex (symmetric_diff already returned it as r). Decide
// it directly by antimirov NFA reachability instead of running the
// bisimulation/XOR closure, which would build large un-canonicalized
// product states for intersections of contains-patterns.
if ((re().is_empty(r1) || re().is_empty(r2)) && is_ground(r)) {
switch (re_is_empty(r)) {
case l_true:
STRACE(seq_regex_brief, tout << "empty:eq ";);
return; // languages equal (both empty): trivially true
case l_false:
STRACE(seq_regex_brief, tout << "empty:neq ";);
th.add_axiom(~th.mk_eq(r1, r2, false), false_literal);
return;
case l_undef:
break;
}
}
// Try the bisimulation procedure on ground regexes first. If it
// returns a definite answer, dispatch the corresponding axiom and
// bypass the symbolic emptiness/derivative closure.
@ -671,6 +689,67 @@ namespace smt {
return result;
}
/*
Decide emptiness of a ground regex r via antimirov-mode NFA
reachability.
The symbolic derivative engine runs in antimirov mode, so the
derivative of an intersection distributes into a *set* of individual
product states inter(A_i, B_j) (each a small, ground regex) rather
than one giant union-of-intersections term. get_derivative_targets
enumerates these NFA successor states.
We short-circuit to l_false (non-empty) as soon as a reachable state
is nullable (accepts the empty word) or classical (a regex built only
from to_re/all/union/concat/star/plus/opt/loop, hence non-empty). An
intersection itself is never classical, but once one operand reduces
to Σ* the intersection collapses (via the derivative's subset
simplification) to the other, classical, operand.
If the worklist is exhausted with no such state, r is empty (l_true).
Returns l_undef if a step bound is hit, so callers can fall back to
the general procedure.
*/
lbool seq_regex::re_is_empty(expr* r) {
if (re().is_empty(r))
return l_true;
expr_ref_vector pinned(m);
obj_hashtable<expr> visited;
ptr_vector<expr> work;
work.push_back(r);
visited.insert(r);
pinned.push_back(r);
unsigned const bound = 100000;
unsigned steps = 0;
while (!work.empty()) {
if (++steps > bound)
return l_undef;
expr* s = work.back();
work.pop_back();
auto info = re().get_info(s);
if (!info.is_known())
return l_undef;
// ε ∈ L(s) or s is a non-empty classical regex ⇒ L(r) non-empty.
if (info.nullable == l_true || info.classical)
return l_false;
// Dead state: prune (min_length == UINT_MAX means no word is
// accepted from here).
if (info.min_length == UINT_MAX)
continue;
expr_ref_vector targets(m);
get_derivative_targets(s, targets);
for (expr* t : targets) {
if (visited.contains(t))
continue;
visited.insert(t);
pinned.push_back(t);
work.push_back(t);
}
}
return l_true;
}
/*
Return a list of all target regexes in the derivative of a regex r,
ignoring the conditions along each path.

View file

@ -165,6 +165,12 @@ namespace smt {
expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
void get_derivative_targets(expr* r, expr_ref_vector& targets);
// Decide emptiness of a ground regex by antimirov-mode NFA
// reachability: explore derivative target states, short-circuiting to
// "non-empty" on the first reachable nullable or classical state.
// Returns l_true (empty), l_false (non-empty), l_undef (gave up).
lbool re_is_empty(expr* r);
/*
Pretty print the regex of the state id to the out stream,
seq_regex_ptr must be a pointer to seq_regex and the