3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-04-02 09:58:59 +00:00

fix bug in non-emptiness witness extraction

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Nikolaj Bjorner 2026-03-24 12:34:40 -07:00
parent da9d8c8694
commit 5803c6f202
7 changed files with 198 additions and 61 deletions

View file

@ -1185,4 +1185,26 @@ void bool_rewriter::mk_ge2(expr* a, expr* b, expr* c, expr_ref& r) {
}
bool bool_rewriter::decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el) {
expr *cond = nullptr, *r1 = nullptr, *r2 = nullptr;
if (m().is_ite(r, cond, r1, r2)) {
c = cond;
th = r1;
el = r2;
return true;
}
for (expr *e : subterms::ground(expr_ref(r, m()))) {
if (m().is_ite(e, cond, r1, r2)) {
c = cond;
th = r1;
el = r2;
return true;
}
}
return false;
}
template class rewriter_tpl<bool_rewriter_cfg>;

View file

@ -242,6 +242,9 @@ public:
void mk_nand(expr * arg1, expr * arg2, expr_ref & result);
void mk_nor(expr * arg1, expr * arg2, expr_ref & result);
void mk_ge2(expr* a, expr* b, expr* c, expr_ref& result);
bool decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el);
};
struct bool_rewriter_cfg : public default_rewriter_cfg {

View file

@ -25,6 +25,7 @@ Authors:
#include "ast/ast_pp.h"
#include "ast/ast_ll_pp.h"
#include "ast/ast_util.h"
#include "ast/for_each_expr.h"
#include "ast/well_sorted.h"
#include "ast/rewriter/var_subst.h"
#include "ast/rewriter/expr_safe_replace.h"
@ -6083,11 +6084,9 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
re_eval_pos current = todo.back();
todo.pop_back();
r = current.e;
// IF_VERBOSE(1, verbose_stream() << "derive " << mk_pp(r, m()) << "\n");
str.resize(current.str_len);
if (current.needs_derivation) {
SASSERT(current.exclude.empty());
// We are looking for the next character => generate derivation
if (visited.is_marked(r))
continue;
if (re().is_empty(r))
@ -6095,11 +6094,11 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
auto info = re().get_info(r);
if (info.nullable == l_true)
return l_true;
IF_VERBOSE(2, verbose_stream() << " derive str=" << str.size() << " " << mk_bounded_pp(r, m(), 2) << "\n");
visited.mark(r);
if (re().is_union(r)) {
for (expr* arg : *to_app(r)) {
for (expr* arg : *to_app(r))
todo.push_back({ expr_ref(arg, m()), str.size(), {}, true });
}
continue;
}
@ -6109,7 +6108,7 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
buffer<std::pair<unsigned, unsigned>> exclude = std::move(current.exclude);
expr* c, * th, * el;
if (re().is_empty(r))
continue;
if (re().is_union(r)) {
@ -6118,7 +6117,8 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
}
continue;
}
if (m().is_ite(r, c, th, el)) {
expr_ref c(m()), th(m()), el(m());
if (bool_rewriter(m()).decompose_ite(r, c, th, el)) {
unsigned low = 0, high = zstring::unicode_max_char();
bool has_bounds = get_bounds(c, low, high);
if (!re().is_empty(el)) {
@ -6129,8 +6129,16 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
exclude.pop_back();
}
if (has_bounds) {
// I want this case to be processed first => push it last
// reason: current string is only pruned
if (any_of(subterms::all(th), [&](expr *t) { return m().is_ite(t); })) {
if (low > 0)
exclude.push_back({0, low - 1});
if (high < zstring::unicode_max_char())
exclude.push_back({high + 1, zstring::unicode_max_char()});
todo.push_back({ expr_ref(th, m()), str.size(), exclude, false });
continue;
}
SASSERT(low <= high);
unsigned ch = low;
bool found = true;
@ -6153,12 +6161,14 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
}
if (found && ch <= high) {
str.push_back(ch);
todo.push_back({ expr_ref(th, m()), str.size(), {}, true });
todo.push_back({expr_ref(th, m()), str.size(), {}, true});
}
}
continue;
}
if (is_ground(r)) {
// ensure selected character is not in exclude
unsigned ch = 'a';

View file

@ -503,7 +503,8 @@ namespace seq {
// -----------------------------------------------------------------------
lbool seq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
SASSERT(re && re->get_expr());
if (!re)
return l_undef;
if (re->is_fail())
return l_true;
if (re->is_nullable())

View file

@ -673,55 +673,7 @@ namespace smt {
}
}
/*
Return a list of all (cond, leaf) pairs in a given derivative
expression r.
Note: this implementation is inefficient: it simply collects all expressions under an if and
iterates over all combinations.
This method is still used by:
propagate_is_empty
propagate_is_non_empty
*/
void seq_regex::get_cofactors(expr* r, expr_ref_pair_vector& result) {
obj_hashtable<expr> ifs;
expr* cond = nullptr, * r1 = nullptr, * r2 = nullptr;
for (expr* e : subterms::ground(expr_ref(r, m)))
if (m.is_ite(e, cond, r1, r2))
ifs.insert(cond);
expr_ref_vector rs(m);
vector<expr_ref_vector> conds;
conds.push_back(expr_ref_vector(m));
rs.push_back(r);
for (expr* c : ifs) {
unsigned sz = conds.size();
expr_safe_replace rep1(m);
expr_safe_replace rep2(m);
rep1.insert(c, m.mk_true());
rep2.insert(c, m.mk_false());
expr_ref r2(m);
for (unsigned i = 0; i < sz; ++i) {
expr_ref_vector cs = conds[i];
cs.push_back(mk_not(m, c));
conds.push_back(cs);
conds[i].push_back(c);
expr_ref r1(rs.get(i), m);
rep1(r1, r2);
rs[i] = r2;
rep2(r1, r2);
rs.push_back(r2);
}
}
for (unsigned i = 0; i < conds.size(); ++i) {
expr_ref conj = mk_and(conds[i]);
expr_ref r(rs.get(i), m);
ctx.get_rewriter()(r);
if (!m.is_false(conj) && !re().is_empty(r))
result.push_back(conj, r);
}
}
/*
is_empty(r, u) => ~is_nullable(r)
@ -877,4 +829,50 @@ namespace smt {
return std::string("id") + std::to_string(e->get_id());
}
/**
Return a list of all (cond, leaf) pairs in a given
expression r.
Note: this implementation is inefficient: it simply collects all expressions under an if and
iterates over all combinations.
*/
void seq_regex::get_cofactors(expr *r, expr_ref_pair_vector &result) {
obj_hashtable<expr> ifs;
expr *cond = nullptr, *r1 = nullptr, *r2 = nullptr;
for (expr *e : subterms::ground(expr_ref(r, m)))
if (m.is_ite(e, cond, r1, r2))
ifs.insert(cond);
expr_ref_vector rs(m);
vector<expr_ref_vector> conds;
conds.push_back(expr_ref_vector(m));
rs.push_back(r);
for (expr *c : ifs) {
unsigned sz = conds.size();
expr_safe_replace rep1(m);
expr_safe_replace rep2(m);
rep1.insert(c, m.mk_true());
rep2.insert(c, m.mk_false());
expr_ref r2(m);
for (unsigned i = 0; i < sz; ++i) {
expr_ref_vector cs = conds[i];
cs.push_back(m.mk_not(c));
conds.push_back(cs);
conds[i].push_back(c);
expr_ref r1(rs.get(i), m);
rep1(r1, r2);
rs[i] = r2;
rep2(r1, r2);
rs.push_back(r2);
}
}
for (unsigned i = 0; i < conds.size(); ++i) {
expr_ref conj = mk_and(conds[i]);
expr_ref r(rs.get(i), m);
ctx.get_rewriter()(r);
if (!m.is_false(conj) && !re().is_empty(r))
result.push_back(conj, r);
}
}
}

View file

@ -164,7 +164,6 @@ namespace smt {
// returned by derivative_wrapper
expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
void get_derivative_targets(expr* r, expr_ref_vector& targets);
void get_cofactors(expr* r, expr_ref_pair_vector& result);
/*
Pretty print the regex of the state id to the out stream,
@ -184,6 +183,9 @@ namespace smt {
bool block_if_empty(expr* r, literal lit);
void get_cofactors(expr *r, expr_ref_pair_vector &result);
public:
seq_regex(theory_seq& th);

View file

@ -712,6 +712,105 @@ static void test_some_seq_in_re_excluded_low_regression() {
std::cout << " ok: witness=" << ws << " satisfies [A-Z] \\ {A}\n";
}
// Regression: some_seq_in_re returns l_false for re.inter of "odd number+\n" and "phone number+\n"
// The regex is non-empty, a valid witness is e.g. "1111111111\n".
// Root cause: derivative of re.inter produces nested ITEs, and the witness
// search incorrectly pushes inner ITE nodes with needs_derivation=true,
// causing ITE conditions from the first derivative to leak into the next.
static void test_some_seq_in_re_inter_loop_regression() {
std::cout << "test_some_seq_in_re_inter_loop_regression\n";
ast_manager m;
reg_decl_plugins(m);
seq_util su(m);
seq_rewriter rw(m);
th_rewriter tr(m);
// Helpers
auto mk_to_re = [&](const char* s) -> expr_ref {
return expr_ref(su.re.mk_to_re(su.str.mk_string(s)), m);
};
auto mk_range = [&](const char* lo, const char* hi) -> expr_ref {
expr_ref l(su.mk_char(lo[0]), m);
expr_ref h(su.mk_char(hi[0]), m);
return expr_ref(su.re.mk_range(su.str.mk_unit(l), su.str.mk_unit(h)), m);
};
auto cat = [&](expr* a, expr* b) -> expr_ref {
return expr_ref(su.re.mk_concat(a, b), m);
};
auto un = [&](expr* a, expr* b) -> expr_ref {
return expr_ref(su.re.mk_union(a, b), m);
};
// Build the regex from the crash output:
// a!1 = ([1-9][1-9]* | "")
expr_ref range19 = mk_range("1", "9");
expr_ref a1(su.re.mk_union(
su.re.mk_concat(range19, su.re.mk_star(range19)),
su.re.mk_to_re(su.str.mk_string(""))), m);
// a!2 = "3" | "5" | "7" | "9"
expr_ref a2 = un(mk_to_re("3"), un(mk_to_re("5"), un(mk_to_re("7"), mk_to_re("9"))));
// a!3 = (a!1 ++ ("1" | a!2)) ++ "\n"
expr_ref a3 = cat(cat(a1, un(mk_to_re("1"), a2)), mk_to_re("\x0a"));
// a!4 = "(" ++ loop(3,3,[0-9]) ++ ")"
expr_ref range09 = mk_range("0", "9");
expr_ref loop3(su.re.mk_loop(range09, 3, 3), m);
expr_ref a4 = cat(cat(mk_to_re("("), loop3), mk_to_re(")"));
// a!5 = a!4 ++ ("" | " ") ++ loop(3,3,[0-9])
expr_ref a5 = cat(cat(a4, un(mk_to_re(""), mk_to_re(" "))), loop3);
// a!6 = a!5 ++ ("" | " " | "-")
expr_ref sep3 = un(mk_to_re(""), un(mk_to_re(" "), mk_to_re("-")));
expr_ref a6 = cat(a5, sep3);
// a!7 = loop(3,3,[0-9]) ++ ("" | " " | "-")
expr_ref a7 = cat(loop3, sep3);
// a!8 = a!7 ++ loop(3,3,[0-9]) ++ ("" | " " | "-")
expr_ref a8 = cat(cat(a7, loop3), sep3);
// a!9 = a!8 ++ loop(4,4,[0-9]) ++ "\n"
expr_ref loop4(su.re.mk_loop(range09, 4, 4), m);
expr_ref a9 = cat(cat(a8, loop4), mk_to_re("\x0a"));
// a!10 = (a!6 ++ loop(4,4,[0-9])) | a!9
expr_ref a10 = un(cat(a6, loop4), a9);
// Final regex = re.inter(a!3, a!10)
expr_ref re_expr(su.re.mk_inter(a3, a10), m);
std::cout << " regex: " << mk_pp(re_expr, m) << "\n";
// The regex is non-empty: "1111111111\n" matches both a!3 and a!10
// some_seq_in_re must return l_true with a valid witness
expr_ref witness(m);
lbool wr = rw.some_seq_in_re(re_expr, witness);
std::cout << " some_seq_in_re returned: " << wr << "\n";
if (witness)
std::cout << " witness: " << mk_pp(witness, m) << "\n";
else
std::cout << " witness: null\n";
ENSURE(wr == l_true);
ENSURE(witness.get() != nullptr);
if (wr != l_true || !witness)
return;
// Verify witness satisfies the regex
expr_ref in_re(su.re.mk_in_re(witness, re_expr), m);
expr_ref in_re_simpl(m);
tr(in_re, in_re_simpl);
std::cout << " in_re simplified: " << mk_pp(in_re_simpl, m) << "\n";
SASSERT(m.is_true(in_re_simpl));
zstring ws;
VERIFY(su.str.is_string(witness, ws));
std::cout << " ok: witness=\"" << ws << "\" satisfies the intersection regex\n";
}
void tst_seq_regex() {
test_seq_regex_instantiation();
test_seq_regex_is_empty();
@ -731,8 +830,6 @@ void tst_seq_regex() {
test_bfs_empty_union_of_empties();
test_bfs_nonempty_range();
test_bfs_empty_complement_full();
test_bfs_null_safety();
test_bfs_bounded();
// New tests for regex membership completion
test_char_set_is_subset();
test_stabilizer_store_basic();
@ -743,5 +840,9 @@ void tst_seq_regex() {
test_is_language_subset_false();
test_is_language_subset_trivial();
test_some_seq_in_re_excluded_low_regression();
test_some_seq_in_re_inter_loop_regression();
// test_bfs_null_safety has a pre-existing failure, run it last
test_bfs_null_safety();
test_bfs_bounded();
std::cout << "seq_regex: all tests passed\n";
}