mirror of
https://github.com/Z3Prover/z3
synced 2026-06-19 15:16:29 +00:00
fix(seq::derive): symbolize top-level cache key to avoid concrete-ele poisoning
`seq::derive::operator()(ele, r)` looks up `m_top_cache` keyed only by the regex `r`, but on a miss it used to set `m_ele = ele` (a concrete char) before calling `derive_rec(r)`. The resulting ITE-tree contained constant-folded `(= ele c)` conditions, so the "symbolic" derivative stored in the cache was actually specialized to that one ele. Subsequent calls with the same `r` but a different ele hit the stale cached answer and the substitution at the bottom was a no-op (no `v0` left to replace). Simplest victim: (str.in_re "aP" (re.++ (re.* "a") "P")) returned `unsat`. The first call D_'a'(a*P) computed `a*P` and cached it under key `a*P`; the next call D_'P'(a*P) hit that cache entry and returned `a*P` instead of epsilon, so the membership check ended on a non-nullable state. Fix: set `m_ele = v` (the canonical fresh var) so the derivative is genuinely symbolic. Concrete-ele callers go through the existing substitution at the bottom of `operator()`. Adds a regression test in src/test/seq_regex_bisim.cpp checking that D_'a'(a*P) is not nullable while D_'P'(a*P) is. Note: this is independent of the mut_0013 bisim-level unsoundness; that case still fails and is being tracked separately. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
fb6470a1a1
commit
0b8bb98656
2 changed files with 63 additions and 3 deletions
|
|
@ -80,7 +80,12 @@ namespace seq {
|
|||
result = cached;
|
||||
}
|
||||
else {
|
||||
m_ele = ele;
|
||||
// Always compute the SYMBOLIC derivative wrt the canonical
|
||||
// variable v (so the cached result is reusable for any
|
||||
// concrete ele via substitution below). Using the concrete
|
||||
// `ele` here would bake it into the cached ITE-tree and
|
||||
// poison future lookups for the same r with a different ele.
|
||||
m_ele = v;
|
||||
m_depth = 0;
|
||||
// Initialize path state for inline pruning
|
||||
m_path.reset();
|
||||
|
|
|
|||
|
|
@ -7,14 +7,22 @@
|
|||
// D(a|b). That made the bisimulation procedure conclude bogus equalities
|
||||
// like a* == (a|b)*.
|
||||
//
|
||||
// This test asserts the basic shape of the derivative and the bisim result
|
||||
// for that minimal case, so the bug cannot silently regress.
|
||||
// This file also covers the seq::derive top-level-cache poisoning bug.
|
||||
// `m_top_cache` is keyed only by the regex; the routine used to populate it
|
||||
// while `m_ele` was set to a *concrete* character, baking that character
|
||||
// into the cached "symbolic" derivative. Subsequent calls with the same
|
||||
// regex but a different ele then returned a stale concrete answer instead
|
||||
// of the true symbolic derivative. The simplest victim is
|
||||
// (str.in_re "aP" (re.++ (re.* "a") "P"))
|
||||
// which used to return false because the derivative wrt 'a' was cached and
|
||||
// re-used as the derivative wrt 'P'.
|
||||
#include "ast/ast.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
#include "ast/rewriter/seq_regex_bisim.h"
|
||||
#include "ast/rewriter/th_rewriter.h"
|
||||
#include <iostream>
|
||||
|
||||
static void test_a_star_neq_ab_star() {
|
||||
|
|
@ -67,6 +75,53 @@ static void test_a_star_neq_ab_star() {
|
|||
ENSURE(eq == l_false);
|
||||
}
|
||||
|
||||
// Regression for the derive top-level-cache poisoning bug.
|
||||
// Take r = (re.* "a") ++ "P" and check str.in_re "aP" r. Before the fix
|
||||
// the first per-char derivative call (wrt 'a') populated m_top_cache with
|
||||
// 'a' baked into the symbolic ITE-tree, so the next call (wrt 'P') returned
|
||||
// that stale cached value instead of computing D_P(r) = epsilon, making
|
||||
// str.in_re wrongly return false.
|
||||
static void test_derive_cache_per_ele() {
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
seq_util u(m);
|
||||
seq_rewriter rw(m);
|
||||
|
||||
sort_ref str_sort(u.str.mk_string_sort(), m);
|
||||
|
||||
zstring sa("a"), sP("P"), s_aP("aP");
|
||||
expr_ref re_a(u.re.mk_to_re(u.str.mk_string(sa)), m);
|
||||
expr_ref re_P(u.re.mk_to_re(u.str.mk_string(sP)), m);
|
||||
expr_ref a_star(u.re.mk_star(re_a), m);
|
||||
expr_ref r(u.re.mk_concat(a_star, re_P), m);
|
||||
expr_ref aP(u.str.mk_string(s_aP), m);
|
||||
|
||||
// Compute D_'a'(a*P) and D_'P'(a*P) directly via mk_derivative.
|
||||
// Before the fix, m_top_cache was populated while m_ele = ele (the
|
||||
// concrete char), so the second call hit the stale cached answer from
|
||||
// the first. After the fix the cache is keyed by a symbolic var, so
|
||||
// each concrete-ele substitution produces the right answer.
|
||||
expr_ref ch_a(u.mk_char('a'), m);
|
||||
expr_ref ch_P(u.mk_char('P'), m);
|
||||
expr_ref d_a = rw.mk_derivative(ch_a, r);
|
||||
expr_ref d_P = rw.mk_derivative(ch_P, r);
|
||||
std::cout << "D_a(a*P) = " << mk_pp(d_a, m) << "\n";
|
||||
std::cout << "D_P(a*P) = " << mk_pp(d_P, m) << "\n";
|
||||
|
||||
// D_P(a*P) must be nullable (it accepts the empty suffix), while
|
||||
// D_a(a*P) must not be (it still needs a trailing 'P').
|
||||
expr_ref n_a = rw.is_nullable(d_a);
|
||||
expr_ref n_P = rw.is_nullable(d_P);
|
||||
th_rewriter trw(m);
|
||||
trw(n_a);
|
||||
trw(n_P);
|
||||
std::cout << "nullable(D_a) = " << mk_pp(n_a, m) << "\n";
|
||||
std::cout << "nullable(D_P) = " << mk_pp(n_P, m) << "\n";
|
||||
ENSURE(m.is_false(n_a));
|
||||
ENSURE(m.is_true(n_P));
|
||||
}
|
||||
|
||||
void tst_seq_regex_bisim() {
|
||||
test_a_star_neq_ab_star();
|
||||
test_derive_cache_per_ele();
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue