3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 15:16:29 +00:00

Address PR feedback on derive, nullability, and requested reverts

This commit is contained in:
copilot-swe-agent[bot] 2026-06-10 18:18:46 +00:00 committed by GitHub
parent 77ac58484f
commit 00fcd3a36d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 143 additions and 267 deletions

View file

@ -252,7 +252,7 @@ namespace seq {
// δ(reverse(r1)) - normalize by pushing reverse inward, then derive
if (re().is_reverse(r, r1)) {
expr_ref norm = normalize_reverse(r1);
expr_ref norm = mk_regex_reverse(r1);
if (norm != r)
return derive_rec(norm);
return expr_ref(re().mk_derivative(m_ele, r), m);
@ -423,96 +423,54 @@ namespace seq {
}
// -------------------------------------------------------
// Normalize reverse by pushing it inward
// Normalize reverse
// -------------------------------------------------------
expr_ref derive::normalize_reverse(expr* r) {
expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * p = nullptr;
expr_ref derive::mk_regex_reverse(expr* r) {
expr* r1 = nullptr, * r2 = nullptr, * c = nullptr;
unsigned lo = 0, hi = 0;
zstring zs;
// reverse(reverse(r1)) = r1
if (re().is_reverse(r, r1))
return expr_ref(r1, m);
// reverse(r1 · r2) = reverse(r2) · reverse(r1)
if (re().is_concat(r, r1, r2)) {
expr_ref a(re().mk_reverse(r2), m);
expr_ref b(re().mk_reverse(r1), m);
return expr_ref(re().mk_concat(a, b), m);
expr_ref result(m);
if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) ||
re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r))
result = r;
else if (re().is_to_re(r))
result = re().mk_reverse(r);
else if (re().is_reverse(r, r1))
result = r1;
else if (re().is_concat(r, r1, r2))
result = mk_concat(mk_regex_reverse(r2), mk_regex_reverse(r1));
else if (m.is_ite(r, c, r1, r2))
result = m.mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2));
else if (re().is_union(r, r1, r2)) {
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_union(a1, b1);
}
// reverse(r1 r2) = reverse(r1) reverse(r2)
if (re().is_union(r, r1, r2)) {
expr_ref a(re().mk_reverse(r1), m);
expr_ref b(re().mk_reverse(r2), m);
return expr_ref(re().mk_union(a, b), m);
else if (re().is_intersection(r, r1, r2)) {
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_inter(a1, b1);
}
// reverse(r1 ∩ r2) = reverse(r1) ∩ reverse(r2)
if (re().is_intersection(r, r1, r2)) {
expr_ref a(re().mk_reverse(r1), m);
expr_ref b(re().mk_reverse(r2), m);
return m_re.mk_inter(a, b);
else if (re().is_diff(r, r1, r2)) {
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_diff(a1, b1);
}
// reverse(r1 \ r2) = reverse(r1) \ reverse(r2)
if (re().is_diff(r, r1, r2)) {
expr_ref a(re().mk_reverse(r1), m);
expr_ref b(re().mk_reverse(r2), m);
return expr_ref(re().mk_diff(a, b), m);
}
// reverse(ite(c, r1, r2)) = ite(c, reverse(r1), reverse(r2))
if (m.is_ite(r, p, r1, r2))
return expr_ref(m.mk_ite(p, re().mk_reverse(r1), re().mk_reverse(r2)), m);
// reverse(r1?) = reverse(r1)?
if (re().is_opt(r, r1))
return expr_ref(re().mk_opt(re().mk_reverse(r1)), m);
// reverse(~r1) = ~reverse(r1)
if (re().is_complement(r, r1))
return expr_ref(re().mk_complement(re().mk_reverse(r1)), m);
// reverse(r1*) = reverse(r1)*
if (re().is_star(r, r1))
return expr_ref(re().mk_star(re().mk_reverse(r1)), m);
// reverse(r1+) = reverse(r1)+
if (re().is_plus(r, r1))
return expr_ref(re().mk_plus(re().mk_reverse(r1)), m);
// reverse(r1{lo,}) = reverse(r1){lo,}
if (re().is_loop(r, r1, lo))
return expr_ref(re().mk_loop(re().mk_reverse(r1), lo), m);
// reverse(r1{lo,hi}) = reverse(r1){lo,hi}
if (re().is_loop(r, r1, lo, hi))
return expr_ref(re().mk_loop_proper(re().mk_reverse(r1), lo, hi), m);
// Symmetric: full_seq, empty, range, full_char, of_pred
if (re().is_full_seq(r) || re().is_empty(r) || re().is_range(r) ||
re().is_full_char(r) || re().is_of_pred(r))
return expr_ref(r, m);
// reverse(to_re(s)) where s is a string literal
if (re().is_to_re(r, s) && u().str.is_string(s, zs))
return expr_ref(re().mk_to_re(u().str.mk_string(zs.reverse())), m);
// reverse(to_re(unit)) = to_re(unit)
if (re().is_to_re(r, s) && u().str.is_unit(s))
return expr_ref(r, m);
// reverse(to_re(s1 ++ s2)) = reverse(to_re(s2)) · reverse(to_re(s1))
if (re().is_to_re(r, s) && u().str.is_concat(s, r1, r2)) {
expr_ref a(re().mk_reverse(re().mk_to_re(r2)), m);
expr_ref b(re().mk_reverse(re().mk_to_re(r1)), m);
return expr_ref(re().mk_concat(a, b), m);
}
// Stuck — cannot normalize further
return expr_ref(re().mk_reverse(r), m);
else if (re().is_star(r, r1))
result = re().mk_star(mk_regex_reverse(r1));
else if (re().is_plus(r, r1))
result = re().mk_plus(mk_regex_reverse(r1));
else if (re().is_loop(r, r1, lo))
result = re().mk_loop(mk_regex_reverse(r1), lo);
else if (re().is_loop(r, r1, lo, hi))
result = re().mk_loop_proper(mk_regex_reverse(r1), lo, hi);
else if (re().is_opt(r, r1))
result = re().mk_opt(mk_regex_reverse(r1));
else if (re().is_complement(r, r1))
result = re().mk_complement(mk_regex_reverse(r1));
else
result = re().mk_reverse(r);
return result;
}
// -------------------------------------------------------
@ -520,16 +478,92 @@ namespace seq {
// -------------------------------------------------------
expr_ref derive::is_nullable(expr* r) {
// First, try the static info which handles ground/interpreted regex
lbool nb = re().get_info(r).nullable;
if (nb == l_true)
return expr_ref(m.mk_true(), m);
if (nb == l_false)
return expr_ref(m.mk_false(), m);
// For symbolic regexes, return a membership predicate
sort* s = nullptr;
VERIFY(m_util.is_re(r, s));
return expr_ref(re().mk_in_re(u().str.mk_empty(s), r), m);
SASSERT(m_util.is_re(r) || m_util.is_seq(r));
expr* r1 = nullptr, * r2 = nullptr, * cond = nullptr;
sort* seq_sort = nullptr;
unsigned lo = 0, hi = 0;
zstring s1;
expr_ref result(m);
if (re().is_concat(r, r1, r2) ||
re().is_intersection(r, r1, r2)) {
m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
}
else if (re().is_union(r, r1, r2) || re().is_antimirov_union(r, r1, r2)) {
m_br.mk_or(is_nullable(r1), is_nullable(r2), result);
}
else if (re().is_diff(r, r1, r2)) {
m_br.mk_not(is_nullable(r2), result);
m_br.mk_and(result, is_nullable(r1), result);
}
else if (re().is_star(r) ||
re().is_opt(r) ||
re().is_full_seq(r) ||
re().is_epsilon(r) ||
(re().is_loop(r, r1, lo) && lo == 0) ||
(re().is_loop(r, r1, lo, hi) && lo == 0)) {
result = m.mk_true();
}
else if (re().is_full_char(r) ||
re().is_empty(r) ||
re().is_of_pred(r) ||
re().is_range(r)) {
result = m.mk_false();
}
else if (re().is_plus(r, r1) ||
(re().is_loop(r, r1, lo) && lo > 0) ||
(re().is_loop(r, r1, lo, hi) && lo > 0) ||
(re().is_reverse(r, r1))) {
result = is_nullable(r1);
}
else if (re().is_complement(r, r1)) {
m_br.mk_not(is_nullable(r1), result);
}
else if (re().is_to_re(r, r1)) {
result = is_nullable(r1);
}
else if (m.is_ite(r, cond, r1, r2)) {
m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result);
}
else if (m_util.is_re(r, seq_sort)) {
result = is_nullable_symbolic_regex(r, seq_sort);
}
else if (u().str.is_concat(r, r1, r2)) {
m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
}
else if (u().str.is_empty(r)) {
result = m.mk_true();
}
else if (u().str.is_unit(r)) {
result = m.mk_false();
}
else if (u().str.is_string(r, s1)) {
result = m.mk_bool_val(s1.length() == 0);
}
else {
SASSERT(m_util.is_seq(r));
result = m.mk_eq(u().str.mk_empty(r->get_sort()), r);
}
return result;
}
expr_ref derive::is_nullable_symbolic_regex(expr* r, sort* seq_sort) {
SASSERT(m_util.is_re(r));
expr* elem = nullptr, * r1 = r, * r2 = nullptr, * s = nullptr;
expr_ref elems(u().str.mk_empty(seq_sort), m);
expr_ref result(m);
while (re().is_derivative(r1, elem, r2)) {
if (u().str.is_empty(elems))
elems = u().str.mk_unit(elem);
else
elems = u().str.mk_concat(u().str.mk_unit(elem), elems);
r1 = r2;
}
if (re().is_to_re(r1, s)) {
result = m.mk_eq(elems, s);
return result;
}
result = re().mk_in_re(u().str.mk_empty(seq_sort), r);
return result;
}
// -------------------------------------------------------
@ -695,10 +729,19 @@ namespace seq {
expr_ref derive::mk_concat(expr* a, expr* b) {
sort* seq_s = nullptr, * ele_s = nullptr;
VERIFY(m_util.is_re(a, seq_s));
VERIFY(u().is_seq(seq_s, ele_s));
if (re().is_empty(a)) return expr_ref(a, m);
if (re().is_empty(b)) return expr_ref(b, m);
if (re().is_epsilon(a)) return expr_ref(b, m);
if (re().is_epsilon(b)) return expr_ref(a, m);
if (re().is_full_seq(a) && re().is_full_seq(b))
return expr_ref(a, m);
if (re().is_full_char(a) && re().is_full_seq(b))
return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m);
if (re().is_full_seq(a) && re().is_full_char(b))
return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m);
// to_re(s1) · to_re(s2) → to_re(s1 ++ s2)
expr* s1 = nullptr, * s2 = nullptr;

View file

@ -125,6 +125,7 @@ namespace seq {
// Nullable check: returns a Boolean expression
expr_ref is_nullable(expr* r);
expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort);
// Smart constructors with path-aware simplification and ACI canonicalization
expr_ref mk_union(expr* a, expr* b);
@ -150,8 +151,8 @@ namespace seq {
bool pred_implies(bool sign_a, expr* a, bool sign_b, expr* b);
bool pred_implies(expr* a, expr* b);
// Normalize reverse(r) by pushing reverse inward
expr_ref normalize_reverse(expr* r);
// Normalize reverse(r)
expr_ref mk_regex_reverse(expr* r);
// Condition evaluation helpers
lbool eval_cond(expr* cond);
@ -184,4 +185,3 @@ namespace seq {
};
}

View file

@ -3080,98 +3080,6 @@ expr_ref seq_rewriter::merge_regex_sets(expr* r1, expr* r2, expr* unit,
}
}
expr_ref seq_rewriter::mk_regex_reverse(expr* r) {
expr* r1 = nullptr, * r2 = nullptr, * c = nullptr;
unsigned lo = 0, hi = 0;
expr_ref result(m());
if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) ||
re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r))
result = r;
else if (re().is_to_re(r))
result = re().mk_reverse(r);
else if (re().is_reverse(r, r1))
result = r1;
else if (re().is_concat(r, r1, r2))
result = mk_regex_concat(mk_regex_reverse(r2), mk_regex_reverse(r1));
else if (m().is_ite(r, c, r1, r2))
result = m().mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2));
else if (re().is_union(r, r1, r2)) {
// enforce deterministic evaluation order
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_union(a1, b1);
}
else if (re().is_intersection(r, r1, r2)) {
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_inter(a1, b1);
}
else if (re().is_diff(r, r1, r2)) {
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_diff(a1, b1);
}
else if (re().is_star(r, r1))
result = re().mk_star(mk_regex_reverse(r1));
else if (re().is_plus(r, r1))
result = re().mk_plus(mk_regex_reverse(r1));
else if (re().is_loop(r, r1, lo))
result = re().mk_loop(mk_regex_reverse(r1), lo);
else if (re().is_loop(r, r1, lo, hi))
result = re().mk_loop_proper(mk_regex_reverse(r1), lo, hi);
else if (re().is_opt(r, r1))
result = re().mk_opt(mk_regex_reverse(r1));
else if (re().is_complement(r, r1))
result = re().mk_complement(mk_regex_reverse(r1));
else
//stuck cases: such as r being a regex variable
//observe that re().mk_reverse(to_re(s)) is not a stuck case
result = re().mk_reverse(r);
return result;
}
expr_ref seq_rewriter::mk_regex_concat(expr* r, expr* s) {
sort* seq_sort = nullptr, * ele_sort = nullptr;
VERIFY(m_util.is_re(r, seq_sort));
VERIFY(u().is_seq(seq_sort, ele_sort));
SASSERT(r->get_sort() == s->get_sort());
expr_ref result(m());
expr* r1, * r2;
if (re().is_epsilon(r) || re().is_empty(s))
result = s;
else if (re().is_epsilon(s) || re().is_empty(r))
result = r;
else if (re().is_full_seq(r) && re().is_full_seq(s))
result = r;
else if (re().is_full_char(r) && re().is_full_seq(s))
// ..* = .+
result = re().mk_plus(re().mk_full_char(ele_sort));
else if (re().is_full_seq(r) && re().is_full_char(s))
// .*. = .+
result = re().mk_plus(re().mk_full_char(ele_sort));
else if (re().is_concat(r, r1, r2))
// create the resulting concatenation in right-associative form except for the following case
// TODO: maintain the following invariant for A ++ B{m,n} + C
// concat(concat(A, B{m,n}), C) (if A != () and C != ())
// concat(B{m,n}, C) (if A == () and C != ())
// where A, B, C are regexes
// Using & below for Intersection and | for Union
// In other words, do not make A ++ B{m,n} into right-assoc form, but keep B{m,n} at the top
// This will help to identify this situation in the merge routine:
// concat(concat(A, B{0,m}), C) | concat(concat(A, B{0,n}), C)
// simplifies to
// concat(concat(A, B{0,max(m,n)}), C)
// analogously:
// concat(concat(A, B{0,m}), C) & concat(concat(A, B{0,n}), C)
// simplifies to
// concat(concat(A, B{0,min(m,n)}), C)
result = mk_regex_concat(r1, mk_regex_concat(r2, s));
else {
result = re().mk_concat(r, s);
}
return result;
}
/*
* calls elim_condition
*/

View file

@ -195,9 +195,6 @@ class seq_rewriter {
bool check_deriv_normal_form(expr* r, int level = 3);
#endif
expr_ref mk_regex_reverse(expr* r);
expr_ref mk_regex_concat(expr* r1, expr* r2);
expr_ref merge_regex_sets(expr* r1, expr* r2, expr* unit, std::function<bool(expr*, expr*&, expr*&)>& decompose, std::function<expr* (expr*, expr*)>& compose);
// elem is (:var 0) and path a condition that may have (:var 0) as a free variable

View file

@ -139,9 +139,6 @@ expr * datatype_factory::get_almost_fresh_value(sort * s) {
expr * datatype_factory::get_fresh_value(sort * s) {
if (!m_util.is_datatype(s))
return m_model.get_fresh_value(s);
if (m_fresh_depth >= m_max_fresh_depth)
return get_last_fresh_value(s);
struct depth_guard { unsigned& d; depth_guard(unsigned& d) : d(d) { ++d; } ~depth_guard() { --d; } } _dg(m_fresh_depth);
TRACE(datatype, tout << "generating fresh value for: " << s->get_name() << "\n";);
auto& [set, values] = get_value_set(s);
// Approach 0)

View file

@ -24,8 +24,6 @@ Revision History:
class datatype_factory : public struct_factory {
datatype_util m_util;
obj_map<sort, expr *> m_last_fresh_value;
unsigned m_fresh_depth = 0;
static const unsigned m_max_fresh_depth = 512;
expr * get_last_fresh_value(sort * s);
expr * get_almost_fresh_value(sort * s);

View file

@ -22,7 +22,6 @@ Author:
#include "ast/ast_util.h"
#include "ast/for_each_expr.h"
#include <ast/rewriter/expr_safe_replace.h>
#include <chrono>
namespace smt {
@ -224,40 +223,6 @@ namespace smt {
th.add_axiom(~lit);
return true;
}
// Second pass: deeper exploration for intersection/complement/diff regexes
// These are candidates for dead state detection (the result may be empty)
// For these, do unlimited depth exploration with a time budget
unsigned r_id = get_state_id(r);
expr* r1 = nullptr, *r2 = nullptr;
if (!m_state_graph.is_dead(r_id) && !m_state_graph.is_live(r_id) &&
(re().is_intersection(r, r1, r2) || re().is_complement(r, r1) || re().is_diff(r, r1, r2))) {
// Collect all unexplored states and explore them iteratively
// with a time budget
auto pass2_start = std::chrono::steady_clock::now();
bool changed = true;
while (changed && !m_state_graph.is_dead(r_id)) {
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - pass2_start).count();
if (elapsed > 100) break;
changed = false;
for (unsigned i = 0; i < m_state_to_expr.size() && !m_state_graph.is_dead(r_id); ++i) {
unsigned st_id = i + 1;
if (m_state_graph.is_done(st_id) || m_state_graph.is_live(st_id) || m_state_graph.is_dead(st_id))
continue;
// This is an unexplored state — explore it
expr* st = m_state_to_expr.get(i);
if (re().get_info(st).nullable == l_true)
continue;
if (update_state_graph(st, 1))
changed = true;
}
}
if (m_state_graph.is_dead(r_id)) {
STRACE(seq_regex_brief, tout << "(dead2) ";);
th.add_axiom(~lit);
return true;
}
}
}
return false;
}
@ -851,7 +816,7 @@ namespace smt {
/*
Update the state graph with expression r and all its derivatives.
*/
bool seq_regex::update_state_graph(expr* r, unsigned depth) {
bool seq_regex::update_state_graph(expr* r) {
unsigned r_id = get_state_id(r);
if (m_state_graph.is_done(r_id)) return false;
if (m_state_graph.get_size() >= m_max_state_graph_size) {
@ -894,38 +859,6 @@ namespace smt {
m_state_graph.add_edge(r_id, dr_id, maybecycle);
}
m_state_graph.mark_done(r_id);
// Explore direct targets for dead state detection (depth 1 only)
// This compensates for less-canonical derivative representations
if (depth < 1) {
for (auto const& dr: derivatives) {
unsigned dr_id = get_state_id(dr);
if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id))
continue;
if (re().get_info(dr).nullable == l_true)
continue;
update_state_graph(dr, depth + 1);
}
}
else if (depth == 1) {
// At depth 1, do lightweight exploration: compute derivatives
// of this state's targets but only to check if they're all dead.
// Don't add complex states to the graph — just mark them dead if
// their get_info says min_length == UINT_MAX or is_empty.
for (auto const& dr: derivatives) {
unsigned dr_id = get_state_id(dr);
if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id))
continue;
auto dr_info = re().get_info(dr);
if (dr_info.nullable == l_true) {
m_state_graph.add_state(dr_id);
m_state_graph.mark_live(dr_id);
}
else if (re().is_empty(dr) || dr_info.min_length == UINT_MAX) {
m_state_graph.add_state(dr_id);
m_state_graph.mark_done(dr_id);
}
}
}
}
STRACE(seq_regex, m_state_graph.display(tout););

View file

@ -124,7 +124,7 @@ namespace smt {
// Note: Doesn't need to be sound or complete (doesn't affect soundness)
bool can_be_in_cycle(expr* r1, expr* r2);
// Update the graph
bool update_state_graph(expr* r, unsigned depth = 0);
bool update_state_graph(expr* r);
// Printing expressions for seq_regex_brief
std::string state_str(expr* e);