From 2161f9cfac4d3bfa7f19078ceebc163d84c1a32f Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Fri, 26 Jun 2026 09:48:19 +0300 Subject: [PATCH] Merge same-condition ITEs and subsume nested union members in derivative The antimirov-mode symbolic derivative builds union(ite(c,t1,e1), ite(c,t2,e2)) when deriving a concat/loop, e.g. delta(a{0,k}.R.ab) yields a{0,k-1}.(R.ab) and a{0,k}.(R.ab) under the same character condition c. derive::mk_union_core only applied subsumption pairwise on its two direct operands, so the two prefixes -- sitting in separate ITE branches and nested behind an unrelated union member -- never met in a common union and the subset collapse a{0,k-1}.X subset a{0,k}.X never fired. Each derivative step then produced a fresh a{0,k}.X state, exploding the emptiness/membership worklist to O(N) states on loop-intersect-comp regexes (z3test 5731 sub#1/#3 timed out). mk_union_core now (1) flattens the left operand, (2) inserts a single disjunct into a union by comparing it against every member, and (3) merges same-condition ITE alternatives ite(c,t1,_) U ite(c,t2,_) -> ite(c, t1 U t2, _), bringing the bodies into one union where subsumption collapses them. 5731 all sub-checks now solve in 0.06s (were timeouts); 5728 stays at 0.16s; 92/92 unit tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ast/rewriter/seq_derive.cpp | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index f662ed4179..c942b7a8e5 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -753,8 +753,37 @@ namespace seq { if (re().is_full_seq(a) || re().is_full_seq(b)) return expr_ref(re().mk_full_seq(a->get_sort()), m); - // Prefix factoring: a·x ∪ a·y = a·(x ∪ y) expr *a1, *a2, *b1, *b2; + + // Flatten the left operand: insert each disjunct of `a` into `b` + // individually. After this `a` is a single (non-union) disjunct. + if (re().is_union(a, a1, a2)) + return mk_union(a1, mk_union(a2, b)); + + // Insert the single disjunct `a` into the (already reduced) union `b`, + // comparing it against *every* member. The pairwise subsumption below + // only inspects the two direct operands, so a term subsumed by a member + // nested inside `b` would survive — this is the root cause of the + // loop ∩ comp derivative accumulating one a{0,k}·R state per k. + if (re().is_union(b, b1, b2)) { + expr_ref m1 = mk_union(a, b1); + if (m1.get() == b1) return expr_ref(b, m); // a ⊆ b1 + if (!re().is_union(m1)) return mk_union(m1, b2); // a, b1 collapsed + return mk_union(b1, mk_union(a, b2)); // a independent of b1 + } + + // Same-condition ITE merge: ite(c,t1,e1) ∪ ite(c,t2,e2) → ite(c, t1∪t2, e1∪e2). + // The concat/loop derivative emits same-condition ITE alternatives such + // as a{0,k-1}·R and a{0,k}·R guarded by the same character condition. + // Without merging they sit in separate ITE branches and the subset rule + // below never sees the bodies together, so each derivative step yields a + // new a{0,k}·R state → O(N) states on loop ∩ comp regexes. Merging + // unites the bodies in one union where the subset collapse fires. + expr *c1, *t1, *e1, *c2, *t2, *e2; + if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1 == c2) + return mk_ite(c1, mk_union(t1, t2), mk_union(e1, e2)); + + // Prefix factoring: a·x ∪ a·y = a·(x ∪ y) if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && a1 == b1) { expr_ref tail = mk_union(a2, b2); return mk_deriv_concat(a1, tail);