3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 15:16:29 +00:00

fix seq::derive::intersect_intervals dropping kept intervals on disjoint case

The intersect_intervals routine in seq_derive.cpp maintains a path-tracking
disjoint union of character intervals.  When intersecting the active suffix
with a new constraint [lo, hi], it iterated the suffix and, on encountering
the first interval disjoint from [lo, hi], reset the output cursor to the
end-marker and broke out of the loop.  This both threw away the intervals it
had already kept and skipped every remaining interval, so e.g.
[(0, 96), (98, max)] intersected with [98, 98] became empty instead of
[(98, 98)].

Inside derive that silently killed valid branches of symbolic derivatives.
For example D(a|b) collapsed to ite(c='a', eps, empty) -- the 'b' branch
vanished -- which made the bisimulation procedure conclude bogus equalities
such as a* == (a|b)*.  On the regex-equivalence corpus this single bug
accounted for ~510 false-unsat results vs master.

Fix: drop only the disjoint interval and continue scanning the rest of the
suffix.  Add a small assertion-based regression test that builds D(a|b),
checks both branches survive, and runs bisim on a* vs (a|b)*.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Margus Veanes 2026-06-15 01:38:55 -07:00
parent cf0e43ab38
commit fb6470a1a1
4 changed files with 81 additions and 10 deletions

View file

@ -1246,21 +1246,18 @@ namespace seq {
// Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi] // Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi]
void derive::intersect_intervals(unsigned lo, unsigned hi) { void derive::intersect_intervals(unsigned lo, unsigned hi) {
// Copy active suffix to end, update start, then filter // Copy active suffix to end, update start, then filter
unsigned old_start = m_intervals_start;
unsigned old_sz = m_intervals.size(); unsigned old_sz = m_intervals.size();
for (unsigned i = old_start; i < old_sz; ++i) for (unsigned i = m_intervals_start; i < old_sz; ++i)
m_intervals.push_back(m_intervals[i]); m_intervals.push_back(m_intervals[i]);
m_intervals_start = old_sz; m_intervals_start = old_sz;
// Filter in-place within new suffix // Filter in-place within new suffix: drop intervals disjoint from [lo,hi],
// keep the intersection for overlapping ones.
unsigned j = m_intervals_start; unsigned j = m_intervals_start;
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) { for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
auto [lo1, hi1] = m_intervals[i]; auto [lo1, hi1] = m_intervals[i];
if (hi < lo1 || lo > hi1) { if (hi < lo1 || lo > hi1)
j = old_sz; continue; // disjoint with this interval — drop it
break; m_intervals[j++] = {std::max(lo1, lo), std::min(hi1, hi)};
}
if (hi1 >= lo)
m_intervals[j++] = {std::max(lo1, lo), std::min(hi1, hi)};
} }
m_intervals.shrink(j); m_intervals.shrink(j);
} }

View file

@ -113,6 +113,7 @@ add_executable(test-z3
polynomial_factorization.cpp polynomial_factorization.cpp
polynorm.cpp polynorm.cpp
prime_generator.cpp prime_generator.cpp
seq_regex_bisim.cpp
proof_checker.cpp proof_checker.cpp
qe_arith.cpp qe_arith.cpp
mbp_qel.cpp mbp_qel.cpp

View file

@ -193,7 +193,8 @@
X(ho_matcher) \ X(ho_matcher) \
X(finite_set) \ X(finite_set) \
X(finite_set_rewriter) \ X(finite_set_rewriter) \
X(fpa) X(fpa) \
X(seq_regex_bisim)
#define FOR_EACH_TEST(X, X_ARGV) \ #define FOR_EACH_TEST(X, X_ARGV) \
FOR_EACH_ALL_TEST(X, X_ARGV) \ FOR_EACH_ALL_TEST(X, X_ARGV) \

View file

@ -0,0 +1,72 @@
// Regression test for the seq::derive::intersect_intervals bug.
//
// Background: derive uses a path-tracking interval set to compute symbolic
// derivatives. The intersect_intervals routine used to react to a single
// disjoint interval by dropping the entire kept suffix and skipping the rest
// of the list, which silently killed valid branches in derivatives such as
// D(a|b). That made the bisimulation procedure conclude bogus equalities
// like a* == (a|b)*.
//
// This test asserts the basic shape of the derivative and the bisim result
// for that minimal case, so the bug cannot silently regress.
#include "ast/ast.h"
#include "ast/ast_pp.h"
#include "ast/reg_decl_plugins.h"
#include "ast/seq_decl_plugin.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/rewriter/seq_regex_bisim.h"
#include <iostream>
static void test_a_star_neq_ab_star() {
ast_manager m;
reg_decl_plugins(m);
seq_util u(m);
seq_rewriter rw(m);
sort_ref str_sort(u.str.mk_string_sort(), m);
zstring sa("a"), sb("b");
expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
expr_ref re_b(u.re.mk_to_re(u.str.mk_string(sb)), m);
expr_ref a_star(u.re.mk_star(re_a), m);
expr_ref ab(u.re.mk_union(re_a, re_b), m);
expr_ref ab_star(u.re.mk_star(ab), m);
expr_ref d_ab = rw.mk_brz_derivative(ab);
std::cout << "D(a|b) = " << mk_pp(d_ab, m) << "\n";
// Both the 'a' branch and the 'b' branch of D(a|b) must reach epsilon.
// Collect the regex leaves of the symbolic derivative and require at
// least two distinct accepting leaves (one for 'a' and one for 'b').
expr_ref_vector leaves(m);
auto collect = [&](expr* e, auto&& self) -> void {
expr* c, *t, *f;
if (m.is_ite(e, c, t, f) || u.re.is_union(e, t, f) || u.re.is_antimirov_union(e, t, f)) {
self(t, self);
self(f, self);
return;
}
if (u.re.is_empty(e)) return;
leaves.push_back(e);
};
collect(d_ab, collect);
unsigned nullable_leaves = 0;
for (expr* l : leaves) {
expr_ref n = rw.is_nullable(l);
if (m.is_true(n)) ++nullable_leaves;
}
std::cout << "D(a|b) leaves=" << leaves.size()
<< " nullable=" << nullable_leaves << "\n";
ENSURE(nullable_leaves >= 2);
// Bisim must report the two languages are not equivalent.
seq::regex_bisim bisim(rw);
lbool eq = bisim.are_equivalent(a_star, ab_star);
std::cout << "bisim(a*, (a|b)*) = "
<< (eq == l_true ? "true" : eq == l_false ? "false" : "undef") << "\n";
ENSURE(eq == l_false);
}
void tst_seq_regex_bisim() {
test_a_star_neq_ab_star();
}