3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 15:16:29 +00:00
z3/src/ast/rewriter/seq_derive.cpp
Margus Veanes 4bef7d513c seq::derive — Stage 2: range_predicate-based path state
Replace seq::derive's m_intervals + m_intervals_start append-only
char-range stack and the imperative intersect_intervals /
exclude_interval helpers with a single canonical range_predicate
m_path_pred that tracks the feasible character set under the
current path.

* Add range_predicate_translator: pure AST -> range_predicate
  translator for the boolean-over-char_le fragment
  (true/false, eq with const, char_le with const, not/and/or any
  nesting). Returns false on the first sub-term outside the
  fragment so the caller can fall back to other reasoning.

* push_intervals_impl: translate the candidate predicate to a
  range_predicate and reduce path tracking to set arithmetic
  (intersection + subset/empty checks). The legacy top-level
  and/or descent is preserved for mixed char / non-char
  conditions.

* eval_range_cond: implication becomes subset_of and contradiction
  becomes !intersects, both linear in the number of ranges with no
  AST allocation.

* range_predicate gains subset_of / intersects / disjoint_from to
  support allocation-free path queries.

* path_save now stores the saved range_predicate by value; the
  stack switches from svector (CallDestructors=false) to vector
  because range_predicate owns an inner svector.

Tests: 91/91 pass with /a, including the new
range_predicate_translator unit test exercising true/false, eq,
char_le, and/or/not, and De Morgan agreement.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-15 03:27:54 -07:00

1214 lines
44 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_derive.cpp
Abstract:
Symbolic derivative computation for regular expressions.
Produces an ITE-tree (transition regex) representation following
the approach of RE# (Varatalu, Veanes, Ernits - POPL 2025).
The symbolic derivative δ(r) maps each character to the resulting
derivative state via an ITE-tree. The free variable (:var 0) represents
the input character.
Authors:
Nikolaj Bjorner (nbjorner) 2026-06-03
--*/
#include "ast/rewriter/seq_derive.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/rewriter/var_subst.h"
#include "ast/ast_pp.h"
#include "ast/array_decl_plugin.h"
#include "ast/rewriter/bool_rewriter.h"
#include "util/util.h"
#include <algorithm>
namespace seq {
derive::derive(ast_manager& m, seq_rewriter& re) :
m(m),
m_util(m),
m_autil(m),
m_br(m),
m_re(re),
m_trail(m),
m_ele(m),
m_path_expr(m) {
m_br.set_flat_and_or(false);
}
void derive::reset() {
m_cache.reset();
m_top_cache.reset();
m_union_cache.reset();
m_inter_cache.reset();
m_concat_cache.reset();
m_complement_cache.reset();
m_trail.reset();
}
// Reset only operation caches (union/inter/concat/complement)
// while preserving derivative caches (m_cache, m_top_cache)
void derive::reset_op_caches() {
m_union_cache.reset();
m_inter_cache.reset();
m_concat_cache.reset();
m_complement_cache.reset();
}
expr_ref derive::operator()(expr* ele, expr* r) {
SASSERT(m_util.is_re(r));
if (m_trail.size() > 500000)
reset();
else if (m_trail.size() > 100000)
reset_op_caches();
sort *seq_sort = nullptr, *ele_sort = nullptr;
VERIFY(m_util.is_re(r, seq_sort));
VERIFY(m_util.is_seq(seq_sort, ele_sort));
expr_ref v(m.mk_var(0, ele_sort), m);
// Check top-level cache (post-simplify result)
expr* cached = nullptr;
expr_ref result(m);
if (m_top_cache.find(r, cached)) {
result = cached;
}
else {
// Always compute the SYMBOLIC derivative wrt the canonical
// variable v (so the cached result is reusable for any
// concrete ele via substitution below). Using the concrete
// `ele` here would bake it into the cached ITE-tree and
// poison future lookups for the same r with a different ele.
m_ele = v;
m_depth = 0;
// Initialize path state for inline pruning
m_path.reset();
m_path_pred = range_predicate::top(u().max_char());
m_path_expr = m.mk_true();
result = derive_rec(r);
m_top_cache.insert(r, result);
}
if (v != ele) {
var_subst subst(m);
result = subst(result, 1, &ele);
}
m_ele = nullptr;
// Cache and pin the final result
m_trail.push_back(ele);
m_trail.push_back(r);
m_trail.push_back(result);
return result;
}
expr_ref derive::operator()(expr* r) {
SASSERT(m_util.is_re(r));
sort* seq_sort = nullptr, * ele_sort = nullptr;
VERIFY(m_util.is_re(r, seq_sort));
VERIFY(m_util.is_seq(seq_sort, ele_sort));
expr_ref v(m.mk_var(0, ele_sort), m);
return (*this)(v, r);
}
// -------------------------------------------------------
// Core derivative computation
// -------------------------------------------------------
expr_ref derive::derive_rec(expr* r) {
SASSERT(m_util.is_re(r));
// Check cache (indexed by both m_ele and r)
expr* cached = nullptr;
if (m_cache.find(m_ele, r, cached))
return expr_ref(cached, m);
// Depth check
if (m_depth >= m_max_depth) {
// Return stuck derivative (the derivative operator applied symbolically)
return expr_ref(re().mk_derivative(m_ele, r), m);
}
flet<unsigned> _scoped_depth(m_depth, m_depth + 1);
expr_ref result = derive_core(r);
// Cache the result
m_cache.insert(m_ele, r, result);
m_trail.push_back(m_ele);
m_trail.push_back(r);
m_trail.push_back(result);
return result;
}
// Forward declaration helper
expr_ref derive::derive_core(expr* r) {
sort* s = nullptr;
VERIFY(m_util.is_re(r, s));
auto nothing = [&]() { return expr_ref(re().mk_empty(r->get_sort()), m); };
auto epsilon = [&]() { return expr_ref(re().mk_to_re(u().str.mk_empty(s)), m); };
auto dotstar = [&]() { return expr_ref(re().mk_full_seq(r->get_sort()), m); };
expr* r1 = nullptr;
expr* r2 = nullptr;
expr* cond = nullptr;
unsigned lo = 0, hi = 0;
// δ(∅) = ∅, δ(ε) = ∅
if (re().is_empty(r) || re().is_epsilon(r))
return nothing();
// δ(Σ*) = Σ*, δ(.+) = Σ*
if (re().is_full_seq(r) || re().is_dot_plus(r))
return dotstar();
// δ(.) = ε (full char accepts any single character)
if (re().is_full_char(r))
return epsilon();
// δ(str.to_re(s)) - derivative of a literal string
if (re().is_to_re(r, r1))
return derive_to_re(r1, s);
// δ(re.range(lo, hi)) - character range
if (re().is_range(r, r1, r2))
return derive_range(r1, r2, s);
// δ(re.of_pred(p)) - predicate-based regex
if (re().is_of_pred(r, r1))
return derive_of_pred(r1, s);
// δ(r1 · r2) = δ(r1) · r2 (if nullable(r1) then δ(r2) else ∅)
if (re().is_concat(r, r1, r2)) {
expr_ref d1 = derive_rec(r1);
expr_ref d1_r2 = mk_deriv_concat(d1, r2);
expr_ref nullable_r1 = is_nullable(r1);
if (m.is_true(nullable_r1))
return mk_union(d1_r2, derive_rec(r2));
if (m.is_false(nullable_r1))
return d1_r2;
// Conditional: nullable is a Boolean expression
expr_ref d2 = derive_rec(r2);
expr_ref guarded = mk_ite(nullable_r1, d2, nothing());
return mk_union(d1_r2, guarded);
}
// δ(r1 r2) = δ(r1) δ(r2)
if (re().is_union(r, r1, r2)) {
expr_ref d1 = derive_rec(r1);
expr_ref d2 = derive_rec(r2);
return mk_union(d1, d2);
}
// δ(r1 x r2) = δ(r1) x δ(r2)
if (re().is_xor(r, r1, r2)) {
expr_ref d1 = derive_rec(r1);
expr_ref d2 = derive_rec(r2);
return mk_xor(d1, d2);
}
// δ(r1 ∩ r2) = δ(r1) ∩ δ(r2)
if (re().is_intersection(r, r1, r2)) {
expr_ref d1 = derive_rec(r1);
expr_ref d2 = derive_rec(r2);
return mk_inter(d1, d2);
}
// δ(~r1) = ~δ(r1)
if (re().is_complement(r, r1)) {
expr_ref d1 = derive_rec(r1);
return mk_complement(d1);
}
// δ(r1*) = δ(r1) · r1*
if (re().is_star(r, r1)) {
expr_ref d1 = derive_rec(r1);
expr_ref star_r1(re().mk_star(r1), m);
return mk_deriv_concat(d1, star_r1);
}
// δ(r1+) = δ(r1) · r1*
if (re().is_plus(r, r1)) {
expr_ref d1 = derive_rec(r1);
expr_ref star_r1(re().mk_star(r1), m);
return mk_deriv_concat(d1, star_r1);
}
// δ(r1?) = δ(r1)
if (re().is_opt(r, r1))
return derive_rec(r1);
// δ(r1{lo,hi})
if (re().is_loop(r, r1, lo, hi)) {
if (hi == 0 || hi < lo)
return nothing();
expr_ref d1 = derive_rec(r1);
expr_ref tail(re().mk_loop_proper(r1, (lo == 0 ? 0 : lo - 1), hi - 1), m);
return mk_deriv_concat(d1, tail);
}
// δ(r1{lo,}) - unbounded loop
if (re().is_loop(r, r1, lo)) {
expr_ref d1 = derive_rec(r1);
expr_ref tail(re().mk_loop(r1, (lo == 0 ? 0 : lo - 1)), m);
return mk_deriv_concat(d1, tail);
}
// δ(r1 \ r2) = δ(r1) ∩ ~δ(r2)
if (re().is_diff(r, r1, r2)) {
expr_ref d1 = derive_rec(r1);
expr_ref d2 = derive_rec(r2);
expr_ref neg_d2 = mk_complement(d2);
return mk_inter(d1, neg_d2);
}
// δ(ite(c, r1, r2)) = ite(c, δ(r1), δ(r2))
if (m.is_ite(r, cond, r1, r2)) {
expr_ref d1 = derive_rec(r1);
expr_ref d2 = derive_rec(r2);
return mk_ite(cond, d1, d2);
}
// δ(reverse(r1)) - normalize by pushing reverse inward, then derive
if (re().is_reverse(r, r1)) {
expr_ref norm = mk_regex_reverse(r1);
if (norm != r)
return derive_rec(norm);
return expr_ref(re().mk_derivative(m_ele, r), m);
}
// Stuck/uninterpreted case
return expr_ref(re().mk_derivative(m_ele, r), m);
}
// -------------------------------------------------------
// Derivative of specific regex constructs
// -------------------------------------------------------
expr_ref derive::derive_to_re(expr* s, sort* seq_sort) {
sort* re_sort = re().mk_re(seq_sort);
// δ(to_re("")) = ∅
if (u().str.is_empty(s))
return expr_ref(re().mk_empty(re_sort), m);
// δ(to_re("c₁c₂...cₙ")) = ite(ele = c₁, to_re("c₂...cₙ"), ∅)
zstring zs;
if (u().str.is_string(s, zs)) {
if (zs.length() == 0)
return expr_ref(re().mk_empty(re_sort), m);
// First character
expr_ref head(m_util.mk_char(zs[0]), m);
expr_ref cond(m.mk_eq(m_ele, head), m);
// Tail string
expr_ref tail_str(u().str.mk_string(zs.extract(1, zs.length() - 1)), m);
expr_ref tail_re(re().mk_to_re(tail_str), m);
expr_ref empty(re().mk_empty(re_sort), m);
return mk_ite(cond, tail_re, empty);
}
// δ(to_re(unit(c))) = ite(ele = c, ε, ∅)
expr* ch = nullptr;
if (u().str.is_unit(s, ch)) {
expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m);
expr_ref empty(re().mk_empty(re_sort), m);
expr_ref cond(m.mk_eq(m_ele, ch), m);
return mk_ite(cond, eps, empty);
}
// δ(to_re(s1 ++ s2)) = ite(head matches, to_re(tail ++ s2), ∅)
expr* s1 = nullptr, * s2 = nullptr;
if (u().str.is_concat(s, s1, s2)) {
expr_ref hd(m), tl(m);
if (get_head_tail(s1, s2, hd, tl)) {
expr_ref cond(m.mk_eq(m_ele, hd), m);
expr_ref tail_re(re().mk_to_re(tl), m);
expr_ref empty(re().mk_empty(re_sort), m);
return mk_ite(cond, tail_re, empty);
}
}
// δ(to_re(itos(n))) - derivative of integer-to-string
// itos(n) produces digits '0'-'9' when n >= 0, empty when n < 0
expr* n = nullptr;
if (u().str.is_itos(s, n)) {
expr_ref empty(re().mk_empty(re_sort), m);
// Guard: n >= 0 and element is a digit and element = s[0]
expr_ref n_ge_0(m_autil.mk_ge(n, m_autil.mk_int(0)), m);
expr_ref char_0(m_util.mk_char('0'), m);
expr_ref char_9(m_util.mk_char('9'), m);
expr_ref ge_0(m_util.mk_le(char_0, m_ele), m);
expr_ref le_9(m_util.mk_le(m_ele, char_9), m);
expr_ref is_digit(m.mk_and(ge_0, le_9), m);
// First character of itos(n) matches ele
expr_ref zero_idx(m_autil.mk_int(0), m);
expr_ref first(u().str.mk_nth_i(s, zero_idx), m);
expr_ref eq_first(m.mk_eq(m_ele, first), m);
// Guard = n >= 0 && is_digit && ele = s[0]
expr_ref guard(m.mk_and(n_ge_0, m.mk_and(is_digit, eq_first)), m);
// Tail: to_re(substr(itos(n), 1, len(itos(n)) - 1))
expr_ref one(m_autil.mk_int(1), m);
expr_ref len(u().str.mk_length(s), m);
expr_ref rest_len(m_autil.mk_sub(len, one), m);
expr_ref rest(u().str.mk_substr(s, one, rest_len), m);
expr_ref rest_re(re().mk_to_re(rest), m);
return mk_ite(guard, rest_re, empty);
}
// Non-ground sequence: δ(to_re(s)) = ite(s ≠ "" ∧ ele = s[0], to_re(s[1:]), ∅)
expr_ref empty_seq(u().str.mk_empty(seq_sort), m);
expr_ref is_non_empty(m.mk_not(m.mk_eq(s, empty_seq)), m);
expr_ref zero(m_autil.mk_int(0), m);
expr_ref first(u().str.mk_nth_i(s, zero), m);
expr_ref eq_first(m.mk_eq(m_ele, first), m);
expr_ref guard(m.mk_and(is_non_empty, eq_first), m);
expr_ref one(m_autil.mk_int(1), m);
expr_ref len(u().str.mk_length(s), m);
expr_ref rest_len(m_autil.mk_sub(len, one), m);
expr_ref rest(u().str.mk_substr(s, one, rest_len), m);
expr_ref rest_re(re().mk_to_re(rest), m);
expr_ref empty(re().mk_empty(re_sort), m);
return mk_ite(guard, rest_re, empty);
}
expr_ref derive::derive_range(expr* lo, expr* hi, sort* seq_sort) {
sort* re_sort = re().mk_re(seq_sort);
expr_ref empty(re().mk_empty(re_sort), m);
expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m);
// Extract character values from unit strings
expr_ref c_lo(m), c_hi(m);
if (u().str.is_unit_string(lo, c_lo) && u().str.is_unit_string(hi, c_hi)) {
// Build range condition, simplifying trivial bounds
unsigned lo_val = 0, hi_val = 0;
bool lo_trivial = m_util.is_const_char(c_lo, lo_val) && lo_val == 0;
bool hi_trivial = m_util.is_const_char(c_hi, hi_val) && hi_val == u().max_char();
if (lo_trivial && hi_trivial)
return eps; // full charset range — always matches
expr_ref in_range(m);
if (lo_trivial)
in_range = m_util.mk_le(m_ele, c_hi);
else if (hi_trivial)
in_range = m_util.mk_le(c_lo, m_ele);
else
in_range = m.mk_and(m_util.mk_le(c_lo, m_ele), m_util.mk_le(m_ele, c_hi));
return mk_ite(in_range, eps, empty);
}
// Fallback: stuck derivative
return expr_ref(re().mk_derivative(m_ele, re().mk_range(lo, hi)), m);
}
expr_ref derive::derive_of_pred(expr* pred, sort* seq_sort) {
sort* re_sort = re().mk_re(seq_sort);
expr_ref empty(re().mk_empty(re_sort), m);
expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m);
// Apply predicate to the element
array_util autil(m);
expr* args[2] = { pred, m_ele };
expr_ref cond(autil.mk_select(2, args), m);
return mk_ite(cond, eps, empty);
}
// Extract head character and remaining tail from a sequence
// s1 is the first part, s2 is the continuation (from str.concat(s1, s2))
bool derive::get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl) {
expr* ch = nullptr;
expr* a = nullptr, * b = nullptr;
if (u().str.is_unit(s1, ch)) {
hd = ch;
tl = s2;
return true;
}
if (u().str.is_concat(s1, a, b)) {
expr_ref new_s2(u().str.mk_concat(b, s2), m);
return get_head_tail(a, new_s2, hd, tl);
}
zstring zs;
if (u().str.is_string(s1, zs) && zs.length() > 0) {
hd = m_util.mk_char(zs[0]);
if (zs.length() == 1)
tl = s2;
else {
expr_ref rest(u().str.mk_string(zs.extract(1, zs.length() - 1)), m);
tl = u().str.mk_concat(rest, s2);
}
return true;
}
return false;
}
// -------------------------------------------------------
// Normalize reverse
// -------------------------------------------------------
expr_ref derive::mk_regex_reverse(expr* r) {
expr* r1 = nullptr, * r2 = nullptr, * c = nullptr;
unsigned lo = 0, hi = 0;
expr_ref result(m);
if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) ||
re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r))
result = r;
else if (re().is_to_re(r))
result = re().mk_reverse(r);
else if (re().is_reverse(r, r1))
result = r1;
else if (re().is_concat(r, r1, r2))
result = re().mk_concat(mk_regex_reverse(r2), mk_regex_reverse(r1));
else if (m.is_ite(r, c, r1, r2))
result = m.mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2));
else if (re().is_union(r, r1, r2)) {
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_union(a1, b1);
}
else if (re().is_intersection(r, r1, r2)) {
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_inter(a1, b1);
}
else if (re().is_diff(r, r1, r2)) {
auto a1 = mk_regex_reverse(r1);
auto b1 = mk_regex_reverse(r2);
result = re().mk_diff(a1, b1);
}
else if (re().is_star(r, r1))
result = re().mk_star(mk_regex_reverse(r1));
else if (re().is_plus(r, r1))
result = re().mk_plus(mk_regex_reverse(r1));
else if (re().is_loop(r, r1, lo))
result = re().mk_loop(mk_regex_reverse(r1), lo);
else if (re().is_loop(r, r1, lo, hi))
result = re().mk_loop_proper(mk_regex_reverse(r1), lo, hi);
else if (re().is_opt(r, r1))
result = re().mk_opt(mk_regex_reverse(r1));
else if (re().is_complement(r, r1))
result = re().mk_complement(mk_regex_reverse(r1));
else
result = re().mk_reverse(r);
return result;
}
// -------------------------------------------------------
// Nullability - uses info class from seq_decl_plugin.h
// -------------------------------------------------------
expr_ref derive::is_nullable(expr* r) {
SASSERT(m_util.is_re(r) || m_util.is_seq(r));
expr* r1 = nullptr, * r2 = nullptr, * cond = nullptr;
sort* seq_sort = nullptr;
unsigned lo = 0, hi = 0;
zstring s1;
if (m_util.is_re(r)) {
auto info = re().get_info(r);
switch (info.nullable) {
case l_true: return expr_ref(m.mk_true(), m);
case l_false: return expr_ref(m.mk_false(), m);
default: break;
}
}
expr_ref result(m);
if (re().is_concat(r, r1, r2) ||
re().is_intersection(r, r1, r2)) {
m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
}
else if (re().is_union(r, r1, r2) || re().is_antimirov_union(r, r1, r2)) {
m_br.mk_or(is_nullable(r1), is_nullable(r2), result);
}
else if (re().is_diff(r, r1, r2)) {
m_br.mk_not(is_nullable(r2), result);
m_br.mk_and(result, is_nullable(r1), result);
}
else if (re().is_xor(r, r1, r2)) {
m_br.mk_xor(is_nullable(r1), is_nullable(r2), result);
}
else if (re().is_star(r) ||
re().is_opt(r) ||
re().is_full_seq(r) ||
re().is_epsilon(r) ||
(re().is_loop(r, r1, lo) && lo == 0) ||
(re().is_loop(r, r1, lo, hi) && lo == 0)) {
result = m.mk_true();
}
else if (re().is_full_char(r) ||
re().is_empty(r) ||
re().is_of_pred(r) ||
re().is_range(r)) {
result = m.mk_false();
}
else if (re().is_plus(r, r1) ||
(re().is_loop(r, r1, lo) && lo > 0) ||
(re().is_loop(r, r1, lo, hi) && lo > 0) ||
(re().is_reverse(r, r1))) {
result = is_nullable(r1);
}
else if (re().is_complement(r, r1)) {
m_br.mk_not(is_nullable(r1), result);
}
else if (re().is_to_re(r, r1)) {
result = is_nullable(r1);
}
else if (m.is_ite(r, cond, r1, r2)) {
m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result);
}
else if (m_util.is_re(r, seq_sort)) {
result = is_nullable_symbolic_regex(r, seq_sort);
}
else if (u().str.is_concat(r, r1, r2)) {
m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
}
else if (u().str.is_empty(r)) {
result = m.mk_true();
}
else if (u().str.is_unit(r)) {
result = m.mk_false();
}
else if (u().str.is_string(r, s1)) {
result = m.mk_bool_val(s1.length() == 0);
}
else {
SASSERT(m_util.is_seq(r));
result = m.mk_eq(u().str.mk_empty(r->get_sort()), r);
}
return result;
}
expr_ref derive::is_nullable_symbolic_regex(expr* r, sort* seq_sort) {
SASSERT(m_util.is_re(r));
expr* elem = nullptr, * r1 = r, * r2 = nullptr, * s = nullptr;
expr_ref elems(u().str.mk_empty(seq_sort), m);
expr_ref result(m);
while (re().is_derivative(r1, elem, r2)) {
if (u().str.is_empty(elems))
elems = u().str.mk_unit(elem);
else
elems = u().str.mk_concat(u().str.mk_unit(elem), elems);
r1 = r2;
}
if (re().is_to_re(r1, s)) {
result = m.mk_eq(elems, s);
return result;
}
result = re().mk_in_re(u().str.mk_empty(seq_sort), r);
return result;
}
// -------------------------------------------------------
// Smart constructors with simplification
// -------------------------------------------------------
// Extract character range [lo, hi] from a derivative condition.
// Conditions are of the form:
// ele == c → range [c, c]
// char_le(lo_expr, ele) && char_le(ele, hi_expr) → range [lo, hi]
// char_le(lo_expr, ele) → range [lo, max_char]
// char_le(ele, hi_expr) → range [0, hi]
// Returns false if not a recognizable range condition.
// Predicate implication for character range conditions.
// Returns true if: whenever cond_a is true, cond_b must also be true.
// pred_implies(sign_a, a, sign_b, b): does (sign_a ? ¬a : a) imply (sign_b ? ¬b : b)?
bool derive::pred_implies(bool sign_a, expr* a, bool sign_b, expr* b) {
// Same atom: check sign compatibility
if (a == b) return sign_a == sign_b;
// Both negated: ¬a → ¬b iff b → a, i.e. pred_implies(false, b, false, a)
if (sign_a && sign_b)
return pred_implies(false, b, false, a);
unsigned lo_a, hi_a, lo_b, hi_b;
bool neg_a, neg_b;
if (!sign_a && !sign_b) {
// a → b: range_a ⊆ range_b
if (u().is_char_const_range(m_ele, a, lo_a, hi_a, neg_a) && !neg_a &&
u().is_char_const_range(m_ele, b, lo_b, hi_b, neg_b) && !neg_b)
return lo_b <= lo_a && hi_a <= hi_b;
}
else if (!sign_a && sign_b) {
// a → ¬b: range_a ∩ range_b = ∅
if (u().is_char_const_range(m_ele, a, lo_a, hi_a, neg_a) && !neg_a &&
u().is_char_const_range(m_ele, b, lo_b, hi_b, neg_b) && !neg_b)
return hi_a < lo_b || hi_b < lo_a;
}
else if (sign_a && !sign_b) {
// ¬a → b: complement of range_a ⊆ range_b
if (u().is_char_const_range(m_ele, a, lo_a, hi_a, neg_a) && !neg_a &&
u().is_char_const_range(m_ele, b, lo_b, hi_b, neg_b) && !neg_b)
return lo_b == 0 && hi_b >= u().max_char();
}
return false;
}
bool derive::pred_implies(expr* a, expr* b) {
bool sign_a = m.is_not(a, a);
bool sign_b = m.is_not(b, b);
return pred_implies(sign_a, a, sign_b, b);
}
expr_ref derive::mk_xor(expr *a, expr *b) {
return mk_core(OP_RE_XOR, a, b);
}
expr_ref derive::mk_xor_core(expr *a, expr *b) {
return m_re.mk_xor0(a, b);
}
expr_ref derive::mk_core(decl_kind k, expr* a, expr* b) {
expr *pe = get_path_expr();
expr *cached = nullptr;
auto& cache = k == OP_RE_UNION ? m_union_cache : k == OP_RE_INTERSECT ? m_inter_cache : m_xor_cache;
if (cache.find(a, b, pe, cached))
return expr_ref(cached, m);
expr_ref result(m);
// ITE handling with path pruning
auto inter_op = [&](expr *x, expr *y) { return mk_inter(x, y); };
auto union_op = [&](expr *x, expr *y) { return mk_union(x, y); };
auto xor_op = [&](expr *x, expr *y) { return mk_xor(x, y); };
switch (k) {
case OP_RE_UNION:
result = hoist_ite(a, b, union_op);
if (!result)
result = mk_union_core(a, b);
break;
case OP_RE_INTERSECT:
result = hoist_ite(a, b, inter_op);
if (!result)
result = mk_inter_core(a, b);
break;
case OP_RE_XOR:
result = hoist_ite(a, b, xor_op);
if (!result)
result = mk_xor_core(a, b);
break;
default:
UNREACHABLE();
break;
}
// Store in cache
cache.insert(a, b, pe, result);
m_trail.push_back(a);
m_trail.push_back(b);
m_trail.push_back(pe);
m_trail.push_back(result);
return result;
}
expr_ref derive::mk_union(expr* a, expr* b) {
return mk_core(OP_RE_UNION, a, b);
}
// Lightweight structural subsumption: checks if L(a) ⊆ L(b)
bool derive::is_subset(expr* a, expr* b) {
return m_re.is_subset(a, b);
}
bool derive::are_complements(expr* a, expr* b) {
expr* c = nullptr;
if (re().is_complement(a, c) && c == b) return true;
if (re().is_complement(b, c) && c == a) return true;
return false;
}
expr_ref derive::mk_union_core(expr* a, expr* b) {
// Prefix factoring: a·x a·y = a·(x y)
expr *a1, *a2, *b1, *b2;
if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && a1 == b1) {
expr_ref tail = mk_union(a2, b2);
return mk_deriv_concat(a1, tail);
}
return m_re.mk_union(a, b);
}
expr_ref derive::mk_inter(expr* a, expr* b) {
return mk_core(OP_RE_INTERSECT, a, b);
}
expr_ref derive::mk_inter_core(expr* a, expr* b) {
// Subsumption covers: a==b, empty(a), empty(b), full(a), full(b), etc.
if (is_subset(a, b)) return expr_ref(a, m);
if (is_subset(b, a)) return expr_ref(b, m);
// Complement absorption: r ∩ ~r = ∅
expr *c = nullptr, *d = nullptr;
if (re().is_complement(a, c) && c == b)
return expr_ref(re().mk_empty(a->get_sort()), m);
if (re().is_complement(b, c) && c == a)
return expr_ref(re().mk_empty(a->get_sort()), m);
if (re().is_complement(a, c) && re().is_complement(b, d))
return expr_ref(re().mk_complement(mk_union_core(c, d)), m);
// TODO: Distribution of intersection over union
// Disabled pending performance analysis
// expr *u1, *u2;
// if (re().is_union(a, u1, u2))
// return mk_union(mk_inter(u1, b), mk_inter(u2, b));
// if (re().is_union(b, u1, u2))
// return mk_union(mk_inter(a, u1), mk_inter(a, u2));
// Base case: build raw intersection
return m_re.mk_inter(a, b);
}
expr_ref derive::mk_concat(expr* a, expr* b) {
sort* seq_s = nullptr, * ele_s = nullptr;
VERIFY(m_util.is_re(a, seq_s));
VERIFY(u().is_seq(seq_s, ele_s));
if (re().is_empty(a)) return expr_ref(a, m);
if (re().is_empty(b)) return expr_ref(b, m);
if (re().is_epsilon(a)) return expr_ref(b, m);
if (re().is_epsilon(b)) return expr_ref(a, m);
if (re().is_full_seq(a) && re().is_full_seq(b))
return expr_ref(a, m);
if (re().is_full_char(a) && re().is_full_seq(b))
return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m);
if (re().is_full_seq(a) && re().is_full_char(b))
return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m);
// to_re(s1) · to_re(s2) → to_re(s1 ++ s2)
expr* s1 = nullptr, * s2 = nullptr;
if (re().is_to_re(a, s1) && re().is_to_re(b, s2))
return expr_ref(re().mk_to_re(u().str.mk_concat(s1, s2)), m);
// r* · r* → r*
expr* a1 = nullptr, *a2 = nullptr, * b1 = nullptr;
if (re().is_star(a, a1) && re().is_star(b, b1) && a1 == b1)
return expr_ref(a, m);
// Right-associate: (a · b) · c → a · (b · c)
if (re().is_concat(a, a1, a2))
return mk_concat(a1, mk_concat(a2, b));
return expr_ref(re().mk_concat(a, b), m);
}
expr_ref derive::mk_complement(expr* a) {
// Check path-aware op cache
expr* pe = get_path_expr();
expr* cached = nullptr;
if (m_complement_cache.find(a, pe, cached))
return expr_ref(cached, m);
expr_ref result = mk_complement_core(a);
// Store in cache
m_complement_cache.insert(a, pe, result);
m_trail.push_back(a);
m_trail.push_back(pe);
m_trail.push_back(result);
return result;
}
expr_ref derive::mk_complement_core(expr* a) {
// ~~r → r
expr* r = nullptr;
if (re().is_complement(a, r))
return expr_ref(r, m);
// ~∅ → Σ*
if (re().is_empty(a))
return expr_ref(re().mk_full_seq(a->get_sort()), m);
// ~Σ* → ∅
if (re().is_full_seq(a))
return expr_ref(re().mk_empty(a->get_sort()), m);
// Push through ITE with path pruning: ~(ite(c, t, e)) → ite(c, ~t, ~e)
expr* c, * t, * e;
if (m.is_ite(a, c, t, e)) {
auto comp_op = [&](expr* x) { return mk_complement(x); };
expr_ref r = apply_ite(c, t, e, comp_op);
if (r) return r;
return expr_ref(re().mk_full_seq(a->get_sort()), m);
}
// ~ε → .+
sort* s = nullptr;
expr* r1 = nullptr;
if (re().is_to_re(a, r1) && u().str.is_empty(r1)) {
VERIFY(m_util.is_re(a, s));
return expr_ref(re().mk_plus(re().mk_full_char(a->get_sort())), m);
}
return expr_ref(re().mk_complement(a), m);
}
expr_ref derive::mk_ite(expr* c, expr* t, expr* e) {
if (m.is_true(c) || t == e)
return expr_ref(t, m);
if (m.is_false(c))
return expr_ref(e, m);
// Use path-aware condition evaluation
lbool cond_val = eval_path_cond(c);
if (cond_val == l_true) return expr_ref(t, m);
if (cond_val == l_false) return expr_ref(e, m);
return expr_ref(m.mk_ite(c, t, e), m);
}
// -------------------------------------------------------
// Distribute concat through ITE/union structure of derivative
// -------------------------------------------------------
expr_ref derive::mk_deriv_concat(expr* d, expr* tail) {
// Check op cache
expr* cached = nullptr;
if (m_concat_cache.find(d, tail, cached))
return expr_ref(cached, m);
expr_ref result = mk_deriv_concat_core(d, tail);
// Store in cache
m_concat_cache.insert(d, tail, result);
m_trail.push_back(d);
m_trail.push_back(tail);
m_trail.push_back(result);
return result;
}
expr_ref derive::mk_deriv_concat_core(expr* d, expr* tail) {
expr_ref _d(d, m), _tail(tail, m);
expr* c, * t, * e;
if (re().is_empty(d))
return expr_ref(d, m);
if (re().is_epsilon(d))
return expr_ref(tail, m);
if (m.is_ite(d, c, t, e)) {
expr_ref then_r = mk_deriv_concat(t, tail);
expr_ref else_r = mk_deriv_concat(e, tail);
return mk_ite(c, then_r, else_r);
}
if (re().is_union(d, t, e)) {
expr_ref left = mk_deriv_concat(t, tail);
expr_ref right = mk_deriv_concat(e, tail);
return mk_union(left, right);
}
return mk_concat(d, tail);
}
// -------------------------------------------------------
// Path management for inline pruning
// -------------------------------------------------------
lbool derive::push(expr* c, bool sign) {
// Check if (c, sign) is already determined by the path
lbool cv = eval_path_cond(c);
if (cv == l_true && !sign) return l_true; // c implied true, push(c,false) is redundant
if (cv == l_false && sign) return l_true; // c implied false, push(c,true) is redundant
if (cv == l_true && sign) return l_false; // c implied true, push(c,true) contradicts
if (cv == l_false && !sign) return l_false; // c implied false, push(c,false) contradicts
// Save current state
unsigned saved_path_sz = m_path.size();
range_predicate saved_path_pred = m_path_pred;
expr* saved_path_expr = m_path_expr;
// Push atoms onto path and check for contradiction or implication
lbool result = push_path_atoms(c, sign);
if (result != l_undef) {
m_path.shrink(saved_path_sz);
return result;
}
// Update path predicate (feasible character set)
result = push_intervals_impl(c, sign);
if (result != l_undef) {
m_path.shrink(saved_path_sz);
m_path_pred = std::move(saved_path_pred);
return result;
}
// Update path expression
expr* atom = sign ? m.mk_not(c) : c;
m_path_expr = m.mk_and(m_path_expr, atom);
m_trail.push_back(m_path_expr);
// Commit: save state for pop()
m_path_stack.push_back({ saved_path_sz, std::move(saved_path_pred), saved_path_expr });
return l_undef;
}
void derive::pop() {
SASSERT(!m_path_stack.empty());
auto& saved = m_path_stack.back();
m_path.shrink(saved.path_sz);
m_path_pred = std::move(saved.saved_path_pred);
m_path_expr = saved.path_expr;
m_path_stack.pop_back();
}
// Binary apply_ite: hoist ite(c, t, e) op r with path pruning
expr_ref derive::apply_ite(expr* c, expr* t, expr* e, expr* r, std::function<expr_ref(expr*, expr*)> apply_op) {
expr_ref then_br(m), else_br(m);
switch (push(c, false)) {
case l_true: return apply_op(t, r);
case l_undef: then_br = apply_op(t, r); pop(); break;
case l_false: break;
}
switch (push(c, true)) {
case l_true: return apply_op(e, r);
case l_undef: else_br = apply_op(e, r); pop(); break;
case l_false: break;
}
if (then_br && else_br) return mk_ite(c, then_br, else_br);
if (then_br) return then_br;
if (else_br) return else_br;
return expr_ref(nullptr, m);
}
// Same-condition merge: ite(c, t1, e1) op ite(c, t2, e2) → ite(c, t1 op t2, e1 op e2)
expr_ref derive::apply_ite(expr* c, expr* t1, expr* e1, expr* t2, expr* e2, std::function<expr_ref(expr*, expr*)> apply_op) {
expr_ref then_br(m), else_br(m);
switch (push(c, false)) {
case l_true: return apply_op(t1, t2);
case l_undef: then_br = apply_op(t1, t2); pop(); break;
case l_false: break;
}
switch (push(c, true)) {
case l_true: return apply_op(e1, e2);
case l_undef: else_br = apply_op(e1, e2); pop(); break;
case l_false: break;
}
if (then_br && else_br) return mk_ite(c, then_br, else_br);
if (then_br) return then_br;
if (else_br) return else_br;
return expr_ref(nullptr, m);
}
// Unary apply_ite: hoist ite(c, t, e) through unary op with path pruning
expr_ref derive::apply_ite(expr* c, expr* t, expr* e, std::function<expr_ref(expr*)> apply_op) {
expr_ref then_br(m), else_br(m);
switch (push(c, false)) {
case l_true: return apply_op(t);
case l_undef: then_br = apply_op(t); pop(); break;
case l_false: break;
}
switch (push(c, true)) {
case l_true: return apply_op(e);
case l_undef: else_br = apply_op(e); pop(); break;
case l_false: break;
}
if (then_br && else_br) return mk_ite(c, then_br, else_br);
if (then_br) return then_br;
if (else_br) return else_br;
return expr_ref(nullptr, m);
}
// Common ITE dispatch for binary ops (union/inter).
// Returns nullptr if neither a nor b is ITE.
expr_ref derive::hoist_ite(expr* a, expr* b, std::function<expr_ref(expr*, expr*)> apply_op) {
expr *c1, *t1, *e1, *c2, *t2, *e2;
if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1->get_id() > c2->get_id())
std::swap(a, b);
if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) {
expr_ref r(m);
if (c1 == c2)
r = apply_ite(c1, t1, e1, t2, e2, apply_op);
else
r = apply_ite(c1, t1, e1, b, apply_op);
if (r) return r;
return expr_ref(re().mk_empty(a->get_sort()), m);
}
// Single-ITE hoisting: must always recurse to maintain path-aware
// soundness — falling back to a non-path-aware structural rewrite
// here would bake unreachable branches into the result tree.
if (m.is_ite(a, c1, t1, e1)) {
expr_ref r = apply_ite(c1, t1, e1, b, apply_op);
if (r) return r;
return expr_ref(re().mk_empty(a->get_sort()), m);
}
if (m.is_ite(b, c2, t2, e2)) {
expr_ref r = apply_ite(c2, t2, e2, a, apply_op);
if (r) return r;
return expr_ref(re().mk_empty(a->get_sort()), m);
}
return expr_ref(nullptr, m);
}
// Push signed atoms onto m_path. Returns l_true if implied, l_false if contradicted, l_undef if pushed.
lbool derive::push_path_atoms(expr* c, bool sign) {
// Check if (c, sign) is already determined by the path
for (auto const& [cond, csign] : m_path) {
if (c == cond)
return csign == sign ? l_true : l_false;
expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr;
// x = v, v != w |-> x != w
if (!csign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) {
if (m.is_value(lhs1)) std::swap(lhs1, rhs1);
if (m.is_value(lhs2)) std::swap(lhs2, rhs2);
if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2))
return sign ? l_true : l_false;
}
}
// Composite: conjunction assumed true, or disjunction assumed false
if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
bool all_implied = true;
for (expr* arg : *to_app(c)) {
lbool r = push_path_atoms(arg, sign);
if (r == l_false) return l_false;
if (r == l_undef) all_implied = false;
}
return all_implied ? l_true : l_undef;
}
// Atomic: push onto path
m_path.push_back({ c, sign });
return l_undef;
}
// Update m_path_pred with the feasible-character constraint induced by
// (c, sign). Returns l_true if already implied (no change), l_false if
// the resulting set becomes empty (contradiction), and l_undef otherwise.
lbool derive::push_intervals_impl(expr* c, bool sign) {
range_predicate p(u().max_char());
if (m_pred_xlate.translate(m_ele, c, p)) {
range_predicate constraint = sign ? ~p : std::move(p);
if (m_path_pred.subset_of(constraint))
return l_true;
range_predicate new_pred = m_path_pred & constraint;
if (new_pred.is_empty())
return l_false;
m_path_pred = std::move(new_pred);
return l_undef;
}
// Translation failed: descend into a top-level and (under sign=false)
// or top-level or (under sign=true) to extract translatable
// sub-conditions. Non-translatable arguments are ignored and only
// weaken the implied/undef return.
if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
bool all_implied = true;
for (expr* arg : *to_app(c)) {
lbool r = push_intervals_impl(arg, sign);
if (r == l_false) return l_false;
if (r == l_undef) all_implied = false;
}
return all_implied ? l_true : (m_path_pred.is_empty() ? l_false : l_undef);
}
return m_path_pred.is_empty() ? l_false : l_undef;
}
// Evaluate a condition against the current path and intervals.
lbool derive::eval_path_cond(expr* c) {
// First try static evaluation (concrete m_ele, tautologies)
lbool v = eval_cond(c);
if (v != l_undef) return v;
// Check against path atoms
for (auto const& [cond, sign] : m_path) {
if (c == cond)
return sign ? l_false : l_true;
}
// Check against intervals
v = eval_range_cond(c);
if (v != l_undef) return v;
// Check pred_implies from path atoms
for (auto const& [cond, sign] : m_path) {
if (pred_implies(sign, cond, false, c))
return l_true;
if (pred_implies(sign, cond, true, c))
return l_false;
}
return l_undef;
}
// -------------------------------------------------------
// Condition evaluation helpers
// -------------------------------------------------------
lbool derive::eval_cond(expr* cond) {
expr* e1 = nullptr;
if (m.is_true(cond)) return l_true;
if (m.is_false(cond)) return l_false;
// Use is_char_const_range to evaluate conditions involving m_ele
unsigned lo = 0, hi = 0, ele_val = 0;
bool negated = false;
if (m_util.is_char_const_range(m_ele, cond, lo, hi, negated) && u().is_const_char(m_ele, ele_val)) {
bool in_range = (lo <= ele_val && ele_val <= hi);
return (in_range != negated) ? l_true : l_false;
}
// Handle self-equality and constant comparisons not involving m_ele
expr* lhs = nullptr, * rhs = nullptr;
if (m.is_eq(cond, lhs, rhs) && lhs == rhs)
return l_true;
unsigned vl = 0, vr = 0;
if (u().is_char_le(cond, lhs, rhs)) {
if (u().is_const_char(lhs, vl) && u().is_const_char(rhs, vr))
return vl <= vr ? l_true : l_false;
if (u().is_const_char(lhs, vl) && vl == 0)
return l_true;
if (u().is_const_char(rhs, vr) && vr == u().max_char())
return l_true;
}
// not(e1)
if (m.is_not(cond, e1))
return ~eval_cond(e1);
// and(...)
if (m.is_and(cond)) {
lbool r = l_true;
for (expr* arg : *to_app(cond)) {
lbool v = eval_cond(arg);
if (v == l_false) return l_false;
if (v == l_undef) r = l_undef;
}
return r;
}
// or(...)
if (m.is_or(cond)) {
lbool r = l_false;
for (expr* arg : *to_app(cond)) {
lbool v = eval_cond(arg);
if (v == l_true) return l_true;
if (v == l_undef) r = l_undef;
}
return r;
}
return l_undef;
}
lbool derive::eval_range_cond(expr* c) {
if (m_path_pred.is_empty())
return l_false;
range_predicate p(u().max_char());
if (!m_pred_xlate.translate(m_ele, c, p))
return l_undef;
// c is implied true iff every feasible char satisfies it.
if (m_path_pred.subset_of(p))
return l_true;
// c is implied false iff no feasible char satisfies it.
if (!m_path_pred.intersects(p))
return l_false;
return l_undef;
}
}