mirror of
https://github.com/Z3Prover/z3
synced 2026-06-19 15:16:29 +00:00
fix seq::derive::intersect_intervals dropping kept intervals on disjoint case
The intersect_intervals routine in seq_derive.cpp maintains a path-tracking disjoint union of character intervals. When intersecting the active suffix with a new constraint [lo, hi], it iterated the suffix and, on encountering the first interval disjoint from [lo, hi], reset the output cursor to the end-marker and broke out of the loop. This both threw away the intervals it had already kept and skipped every remaining interval, so e.g. [(0, 96), (98, max)] intersected with [98, 98] became empty instead of [(98, 98)]. Inside derive that silently killed valid branches of symbolic derivatives. For example D(a|b) collapsed to ite(c='a', eps, empty) -- the 'b' branch vanished -- which made the bisimulation procedure conclude bogus equalities such as a* == (a|b)*. On the regex-equivalence corpus this single bug accounted for ~510 false-unsat results vs master. Fix: drop only the disjoint interval and continue scanning the rest of the suffix. Add a small assertion-based regression test that builds D(a|b), checks both branches survive, and runs bisim on a* vs (a|b)*. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
cf0e43ab38
commit
fb6470a1a1
4 changed files with 81 additions and 10 deletions
|
|
@ -1246,21 +1246,18 @@ namespace seq {
|
||||||
// Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi]
|
// Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi]
|
||||||
void derive::intersect_intervals(unsigned lo, unsigned hi) {
|
void derive::intersect_intervals(unsigned lo, unsigned hi) {
|
||||||
// Copy active suffix to end, update start, then filter
|
// Copy active suffix to end, update start, then filter
|
||||||
unsigned old_start = m_intervals_start;
|
|
||||||
unsigned old_sz = m_intervals.size();
|
unsigned old_sz = m_intervals.size();
|
||||||
for (unsigned i = old_start; i < old_sz; ++i)
|
for (unsigned i = m_intervals_start; i < old_sz; ++i)
|
||||||
m_intervals.push_back(m_intervals[i]);
|
m_intervals.push_back(m_intervals[i]);
|
||||||
m_intervals_start = old_sz;
|
m_intervals_start = old_sz;
|
||||||
// Filter in-place within new suffix
|
// Filter in-place within new suffix: drop intervals disjoint from [lo,hi],
|
||||||
|
// keep the intersection for overlapping ones.
|
||||||
unsigned j = m_intervals_start;
|
unsigned j = m_intervals_start;
|
||||||
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
|
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
|
||||||
auto [lo1, hi1] = m_intervals[i];
|
auto [lo1, hi1] = m_intervals[i];
|
||||||
if (hi < lo1 || lo > hi1) {
|
if (hi < lo1 || lo > hi1)
|
||||||
j = old_sz;
|
continue; // disjoint with this interval — drop it
|
||||||
break;
|
m_intervals[j++] = {std::max(lo1, lo), std::min(hi1, hi)};
|
||||||
}
|
|
||||||
if (hi1 >= lo)
|
|
||||||
m_intervals[j++] = {std::max(lo1, lo), std::min(hi1, hi)};
|
|
||||||
}
|
}
|
||||||
m_intervals.shrink(j);
|
m_intervals.shrink(j);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -113,6 +113,7 @@ add_executable(test-z3
|
||||||
polynomial_factorization.cpp
|
polynomial_factorization.cpp
|
||||||
polynorm.cpp
|
polynorm.cpp
|
||||||
prime_generator.cpp
|
prime_generator.cpp
|
||||||
|
seq_regex_bisim.cpp
|
||||||
proof_checker.cpp
|
proof_checker.cpp
|
||||||
qe_arith.cpp
|
qe_arith.cpp
|
||||||
mbp_qel.cpp
|
mbp_qel.cpp
|
||||||
|
|
|
||||||
|
|
@ -193,7 +193,8 @@
|
||||||
X(ho_matcher) \
|
X(ho_matcher) \
|
||||||
X(finite_set) \
|
X(finite_set) \
|
||||||
X(finite_set_rewriter) \
|
X(finite_set_rewriter) \
|
||||||
X(fpa)
|
X(fpa) \
|
||||||
|
X(seq_regex_bisim)
|
||||||
|
|
||||||
#define FOR_EACH_TEST(X, X_ARGV) \
|
#define FOR_EACH_TEST(X, X_ARGV) \
|
||||||
FOR_EACH_ALL_TEST(X, X_ARGV) \
|
FOR_EACH_ALL_TEST(X, X_ARGV) \
|
||||||
|
|
|
||||||
72
src/test/seq_regex_bisim.cpp
Normal file
72
src/test/seq_regex_bisim.cpp
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
// Regression test for the seq::derive::intersect_intervals bug.
|
||||||
|
//
|
||||||
|
// Background: derive uses a path-tracking interval set to compute symbolic
|
||||||
|
// derivatives. The intersect_intervals routine used to react to a single
|
||||||
|
// disjoint interval by dropping the entire kept suffix and skipping the rest
|
||||||
|
// of the list, which silently killed valid branches in derivatives such as
|
||||||
|
// D(a|b). That made the bisimulation procedure conclude bogus equalities
|
||||||
|
// like a* == (a|b)*.
|
||||||
|
//
|
||||||
|
// This test asserts the basic shape of the derivative and the bisim result
|
||||||
|
// for that minimal case, so the bug cannot silently regress.
|
||||||
|
#include "ast/ast.h"
|
||||||
|
#include "ast/ast_pp.h"
|
||||||
|
#include "ast/reg_decl_plugins.h"
|
||||||
|
#include "ast/seq_decl_plugin.h"
|
||||||
|
#include "ast/rewriter/seq_rewriter.h"
|
||||||
|
#include "ast/rewriter/seq_regex_bisim.h"
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
static void test_a_star_neq_ab_star() {
|
||||||
|
ast_manager m;
|
||||||
|
reg_decl_plugins(m);
|
||||||
|
seq_util u(m);
|
||||||
|
seq_rewriter rw(m);
|
||||||
|
|
||||||
|
sort_ref str_sort(u.str.mk_string_sort(), m);
|
||||||
|
|
||||||
|
zstring sa("a"), sb("b");
|
||||||
|
expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
|
||||||
|
expr_ref re_b(u.re.mk_to_re(u.str.mk_string(sb)), m);
|
||||||
|
expr_ref a_star(u.re.mk_star(re_a), m);
|
||||||
|
expr_ref ab(u.re.mk_union(re_a, re_b), m);
|
||||||
|
expr_ref ab_star(u.re.mk_star(ab), m);
|
||||||
|
|
||||||
|
expr_ref d_ab = rw.mk_brz_derivative(ab);
|
||||||
|
std::cout << "D(a|b) = " << mk_pp(d_ab, m) << "\n";
|
||||||
|
|
||||||
|
// Both the 'a' branch and the 'b' branch of D(a|b) must reach epsilon.
|
||||||
|
// Collect the regex leaves of the symbolic derivative and require at
|
||||||
|
// least two distinct accepting leaves (one for 'a' and one for 'b').
|
||||||
|
expr_ref_vector leaves(m);
|
||||||
|
auto collect = [&](expr* e, auto&& self) -> void {
|
||||||
|
expr* c, *t, *f;
|
||||||
|
if (m.is_ite(e, c, t, f) || u.re.is_union(e, t, f) || u.re.is_antimirov_union(e, t, f)) {
|
||||||
|
self(t, self);
|
||||||
|
self(f, self);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (u.re.is_empty(e)) return;
|
||||||
|
leaves.push_back(e);
|
||||||
|
};
|
||||||
|
collect(d_ab, collect);
|
||||||
|
unsigned nullable_leaves = 0;
|
||||||
|
for (expr* l : leaves) {
|
||||||
|
expr_ref n = rw.is_nullable(l);
|
||||||
|
if (m.is_true(n)) ++nullable_leaves;
|
||||||
|
}
|
||||||
|
std::cout << "D(a|b) leaves=" << leaves.size()
|
||||||
|
<< " nullable=" << nullable_leaves << "\n";
|
||||||
|
ENSURE(nullable_leaves >= 2);
|
||||||
|
|
||||||
|
// Bisim must report the two languages are not equivalent.
|
||||||
|
seq::regex_bisim bisim(rw);
|
||||||
|
lbool eq = bisim.are_equivalent(a_star, ab_star);
|
||||||
|
std::cout << "bisim(a*, (a|b)*) = "
|
||||||
|
<< (eq == l_true ? "true" : eq == l_false ? "false" : "undef") << "\n";
|
||||||
|
ENSURE(eq == l_false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_seq_regex_bisim() {
|
||||||
|
test_a_star_neq_ab_star();
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue