mirror of
https://github.com/Z3Prover/z3
synced 2026-06-19 07:06:28 +00:00
1295 lines
48 KiB
C++
1295 lines
48 KiB
C++
/*++
|
||
Copyright (c) 2026 Microsoft Corporation
|
||
|
||
Module Name:
|
||
|
||
seq_derive.cpp
|
||
|
||
Abstract:
|
||
|
||
Symbolic derivative computation for regular expressions.
|
||
Produces an ITE-tree (transition regex) representation following
|
||
the approach of RE# (Varatalu, Veanes, Ernits - POPL 2025).
|
||
|
||
The symbolic derivative δ(r) maps each character to the resulting
|
||
derivative state via an ITE-tree. The free variable (:var 0) represents
|
||
the input character.
|
||
|
||
Authors:
|
||
|
||
Nikolaj Bjorner (nbjorner) 2026-06-03
|
||
|
||
--*/
|
||
|
||
#include "ast/rewriter/seq_derive.h"
|
||
#include "ast/rewriter/seq_rewriter.h"
|
||
#include "ast/rewriter/var_subst.h"
|
||
#include "ast/ast_pp.h"
|
||
#include "ast/array_decl_plugin.h"
|
||
#include "ast/rewriter/bool_rewriter.h"
|
||
#include "util/util.h"
|
||
#include <algorithm>
|
||
|
||
namespace seq {
|
||
|
||
derive::derive(ast_manager& m, seq_rewriter& re) :
|
||
m(m),
|
||
m_util(m),
|
||
m_autil(m),
|
||
m_br(m),
|
||
m_re(re),
|
||
m_trail(m),
|
||
m_ele(m),
|
||
m_path_expr(m) {
|
||
m_br.set_flat_and_or(false);
|
||
}
|
||
|
||
void derive::reset() {
|
||
m_cache.reset();
|
||
m_top_cache.reset();
|
||
m_union_cache.reset();
|
||
m_inter_cache.reset();
|
||
m_concat_cache.reset();
|
||
m_complement_cache.reset();
|
||
m_trail.reset();
|
||
}
|
||
|
||
// Reset only operation caches (union/inter/concat/complement)
|
||
// while preserving derivative caches (m_cache, m_top_cache)
|
||
void derive::reset_op_caches() {
|
||
m_union_cache.reset();
|
||
m_inter_cache.reset();
|
||
m_concat_cache.reset();
|
||
m_complement_cache.reset();
|
||
}
|
||
|
||
expr_ref derive::operator()(expr* ele, expr* r) {
|
||
SASSERT(m_util.is_re(r));
|
||
if (m_trail.size() > 500000)
|
||
reset();
|
||
else if (m_trail.size() > 100000)
|
||
reset_op_caches();
|
||
sort *seq_sort = nullptr, *ele_sort = nullptr;
|
||
VERIFY(m_util.is_re(r, seq_sort));
|
||
VERIFY(m_util.is_seq(seq_sort, ele_sort));
|
||
expr_ref v(m.mk_var(0, ele_sort), m);
|
||
// Check top-level cache (post-simplify result)
|
||
expr* cached = nullptr;
|
||
expr_ref result(m);
|
||
if (m_top_cache.find(r, cached)) {
|
||
result = cached;
|
||
}
|
||
else {
|
||
// Always compute the SYMBOLIC derivative wrt the canonical
|
||
// variable v (so the cached result is reusable for any
|
||
// concrete ele via substitution below). Using the concrete
|
||
// `ele` here would bake it into the cached ITE-tree and
|
||
// poison future lookups for the same r with a different ele.
|
||
m_ele = v;
|
||
m_depth = 0;
|
||
// Initialize path state for inline pruning
|
||
m_path.reset();
|
||
m_intervals.reset();
|
||
m_intervals.push_back({0u, u().max_char()});
|
||
m_intervals_start = 0;
|
||
m_path_expr = m.mk_true();
|
||
result = derive_rec(r);
|
||
m_top_cache.insert(r, result);
|
||
}
|
||
if (v != ele) {
|
||
var_subst subst(m);
|
||
result = subst(result, 1, &ele);
|
||
}
|
||
m_ele = nullptr;
|
||
// Cache and pin the final result
|
||
m_trail.push_back(ele);
|
||
m_trail.push_back(r);
|
||
m_trail.push_back(result);
|
||
return result;
|
||
}
|
||
|
||
expr_ref derive::operator()(expr* r) {
|
||
SASSERT(m_util.is_re(r));
|
||
sort* seq_sort = nullptr, * ele_sort = nullptr;
|
||
VERIFY(m_util.is_re(r, seq_sort));
|
||
VERIFY(m_util.is_seq(seq_sort, ele_sort));
|
||
expr_ref v(m.mk_var(0, ele_sort), m);
|
||
return (*this)(v, r);
|
||
}
|
||
|
||
// -------------------------------------------------------
|
||
// Core derivative computation
|
||
// -------------------------------------------------------
|
||
|
||
expr_ref derive::derive_rec(expr* r) {
|
||
SASSERT(m_util.is_re(r));
|
||
|
||
// Check cache (indexed by both m_ele and r)
|
||
expr* cached = nullptr;
|
||
if (m_cache.find(m_ele, r, cached))
|
||
return expr_ref(cached, m);
|
||
|
||
// Depth check
|
||
if (m_depth >= m_max_depth) {
|
||
// Return stuck derivative (the derivative operator applied symbolically)
|
||
return expr_ref(re().mk_derivative(m_ele, r), m);
|
||
}
|
||
|
||
flet<unsigned> _scoped_depth(m_depth, m_depth + 1);
|
||
expr_ref result = derive_core(r);
|
||
|
||
// Cache the result
|
||
m_cache.insert(m_ele, r, result);
|
||
m_trail.push_back(m_ele);
|
||
m_trail.push_back(r);
|
||
m_trail.push_back(result);
|
||
return result;
|
||
}
|
||
|
||
// Forward declaration helper
|
||
expr_ref derive::derive_core(expr* r) {
|
||
sort* s = nullptr;
|
||
VERIFY(m_util.is_re(r, s));
|
||
|
||
auto nothing = [&]() { return expr_ref(re().mk_empty(r->get_sort()), m); };
|
||
auto epsilon = [&]() { return expr_ref(re().mk_to_re(u().str.mk_empty(s)), m); };
|
||
auto dotstar = [&]() { return expr_ref(re().mk_full_seq(r->get_sort()), m); };
|
||
|
||
expr* r1 = nullptr;
|
||
expr* r2 = nullptr;
|
||
expr* cond = nullptr;
|
||
unsigned lo = 0, hi = 0;
|
||
|
||
// δ(∅) = ∅, δ(ε) = ∅
|
||
if (re().is_empty(r) || re().is_epsilon(r))
|
||
return nothing();
|
||
|
||
// δ(Σ*) = Σ*, δ(.+) = Σ*
|
||
if (re().is_full_seq(r) || re().is_dot_plus(r))
|
||
return dotstar();
|
||
|
||
// δ(.) = ε (full char accepts any single character)
|
||
if (re().is_full_char(r))
|
||
return epsilon();
|
||
|
||
// δ(str.to_re(s)) - derivative of a literal string
|
||
if (re().is_to_re(r, r1))
|
||
return derive_to_re(r1, s);
|
||
|
||
// δ(re.range(lo, hi)) - character range
|
||
if (re().is_range(r, r1, r2))
|
||
return derive_range(r1, r2, s);
|
||
|
||
// δ(re.of_pred(p)) - predicate-based regex
|
||
if (re().is_of_pred(r, r1))
|
||
return derive_of_pred(r1, s);
|
||
|
||
// δ(r1 · r2) = δ(r1) · r2 ∪ (if nullable(r1) then δ(r2) else ∅)
|
||
if (re().is_concat(r, r1, r2)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref d1_r2 = mk_deriv_concat(d1, r2);
|
||
expr_ref nullable_r1 = is_nullable(r1);
|
||
if (m.is_true(nullable_r1))
|
||
return mk_union(d1_r2, derive_rec(r2));
|
||
if (m.is_false(nullable_r1))
|
||
return d1_r2;
|
||
// Conditional: nullable is a Boolean expression
|
||
expr_ref d2 = derive_rec(r2);
|
||
expr_ref guarded = mk_ite(nullable_r1, d2, nothing());
|
||
return mk_union(d1_r2, guarded);
|
||
}
|
||
|
||
// δ(r1 ∪ r2) = δ(r1) ∪ δ(r2)
|
||
if (re().is_union(r, r1, r2)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref d2 = derive_rec(r2);
|
||
return mk_union(d1, d2);
|
||
}
|
||
|
||
// δ(r1 x r2) = δ(r1) x δ(r2)
|
||
if (re().is_xor(r, r1, r2)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref d2 = derive_rec(r2);
|
||
return mk_xor(d1, d2);
|
||
}
|
||
|
||
// δ(r1 ∩ r2) = δ(r1) ∩ δ(r2)
|
||
if (re().is_intersection(r, r1, r2)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref d2 = derive_rec(r2);
|
||
return mk_inter(d1, d2);
|
||
}
|
||
|
||
// δ(~r1) = ~δ(r1)
|
||
if (re().is_complement(r, r1)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
return mk_complement(d1);
|
||
}
|
||
|
||
// δ(r1*) = δ(r1) · r1*
|
||
if (re().is_star(r, r1)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref star_r1(re().mk_star(r1), m);
|
||
return mk_deriv_concat(d1, star_r1);
|
||
}
|
||
|
||
// δ(r1+) = δ(r1) · r1*
|
||
if (re().is_plus(r, r1)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref star_r1(re().mk_star(r1), m);
|
||
return mk_deriv_concat(d1, star_r1);
|
||
}
|
||
|
||
// δ(r1?) = δ(r1)
|
||
if (re().is_opt(r, r1))
|
||
return derive_rec(r1);
|
||
|
||
// δ(r1{lo,hi})
|
||
if (re().is_loop(r, r1, lo, hi)) {
|
||
if (hi == 0 || hi < lo)
|
||
return nothing();
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref tail(re().mk_loop_proper(r1, (lo == 0 ? 0 : lo - 1), hi - 1), m);
|
||
return mk_deriv_concat(d1, tail);
|
||
}
|
||
|
||
// δ(r1{lo,}) - unbounded loop
|
||
if (re().is_loop(r, r1, lo)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref tail(re().mk_loop(r1, (lo == 0 ? 0 : lo - 1)), m);
|
||
return mk_deriv_concat(d1, tail);
|
||
}
|
||
|
||
// δ(r1 \ r2) = δ(r1) ∩ ~δ(r2)
|
||
if (re().is_diff(r, r1, r2)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref d2 = derive_rec(r2);
|
||
expr_ref neg_d2 = mk_complement(d2);
|
||
return mk_inter(d1, neg_d2);
|
||
}
|
||
|
||
// δ(ite(c, r1, r2)) = ite(c, δ(r1), δ(r2))
|
||
if (m.is_ite(r, cond, r1, r2)) {
|
||
expr_ref d1 = derive_rec(r1);
|
||
expr_ref d2 = derive_rec(r2);
|
||
return mk_ite(cond, d1, d2);
|
||
}
|
||
|
||
// δ(reverse(r1)) - normalize by pushing reverse inward, then derive
|
||
if (re().is_reverse(r, r1)) {
|
||
expr_ref norm = mk_regex_reverse(r1);
|
||
if (norm != r)
|
||
return derive_rec(norm);
|
||
return expr_ref(re().mk_derivative(m_ele, r), m);
|
||
}
|
||
|
||
// Stuck/uninterpreted case
|
||
return expr_ref(re().mk_derivative(m_ele, r), m);
|
||
}
|
||
|
||
// -------------------------------------------------------
|
||
// Derivative of specific regex constructs
|
||
// -------------------------------------------------------
|
||
|
||
expr_ref derive::derive_to_re(expr* s, sort* seq_sort) {
|
||
sort* re_sort = re().mk_re(seq_sort);
|
||
// δ(to_re("")) = ∅
|
||
if (u().str.is_empty(s))
|
||
return expr_ref(re().mk_empty(re_sort), m);
|
||
|
||
// δ(to_re("c₁c₂...cₙ")) = ite(ele = c₁, to_re("c₂...cₙ"), ∅)
|
||
zstring zs;
|
||
if (u().str.is_string(s, zs)) {
|
||
if (zs.length() == 0)
|
||
return expr_ref(re().mk_empty(re_sort), m);
|
||
// First character
|
||
expr_ref head(m_util.mk_char(zs[0]), m);
|
||
expr_ref cond(m.mk_eq(m_ele, head), m);
|
||
// Tail string
|
||
expr_ref tail_str(u().str.mk_string(zs.extract(1, zs.length() - 1)), m);
|
||
expr_ref tail_re(re().mk_to_re(tail_str), m);
|
||
expr_ref empty(re().mk_empty(re_sort), m);
|
||
return mk_ite(cond, tail_re, empty);
|
||
}
|
||
|
||
// δ(to_re(unit(c))) = ite(ele = c, ε, ∅)
|
||
expr* ch = nullptr;
|
||
if (u().str.is_unit(s, ch)) {
|
||
expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m);
|
||
expr_ref empty(re().mk_empty(re_sort), m);
|
||
expr_ref cond(m.mk_eq(m_ele, ch), m);
|
||
return mk_ite(cond, eps, empty);
|
||
}
|
||
|
||
// δ(to_re(s1 ++ s2)) = ite(head matches, to_re(tail ++ s2), ∅)
|
||
expr* s1 = nullptr, * s2 = nullptr;
|
||
if (u().str.is_concat(s, s1, s2)) {
|
||
expr_ref hd(m), tl(m);
|
||
if (get_head_tail(s1, s2, hd, tl)) {
|
||
expr_ref cond(m.mk_eq(m_ele, hd), m);
|
||
expr_ref tail_re(re().mk_to_re(tl), m);
|
||
expr_ref empty(re().mk_empty(re_sort), m);
|
||
return mk_ite(cond, tail_re, empty);
|
||
}
|
||
}
|
||
|
||
// δ(to_re(itos(n))) - derivative of integer-to-string
|
||
// itos(n) produces digits '0'-'9' when n >= 0, empty when n < 0
|
||
expr* n = nullptr;
|
||
if (u().str.is_itos(s, n)) {
|
||
expr_ref empty(re().mk_empty(re_sort), m);
|
||
// Guard: n >= 0 and element is a digit and element = s[0]
|
||
expr_ref n_ge_0(m_autil.mk_ge(n, m_autil.mk_int(0)), m);
|
||
expr_ref char_0(m_util.mk_char('0'), m);
|
||
expr_ref char_9(m_util.mk_char('9'), m);
|
||
expr_ref ge_0(m_util.mk_le(char_0, m_ele), m);
|
||
expr_ref le_9(m_util.mk_le(m_ele, char_9), m);
|
||
expr_ref is_digit(m.mk_and(ge_0, le_9), m);
|
||
// First character of itos(n) matches ele
|
||
expr_ref zero_idx(m_autil.mk_int(0), m);
|
||
expr_ref first(u().str.mk_nth_i(s, zero_idx), m);
|
||
expr_ref eq_first(m.mk_eq(m_ele, first), m);
|
||
// Guard = n >= 0 && is_digit && ele = s[0]
|
||
expr_ref guard(m.mk_and(n_ge_0, m.mk_and(is_digit, eq_first)), m);
|
||
// Tail: to_re(substr(itos(n), 1, len(itos(n)) - 1))
|
||
expr_ref one(m_autil.mk_int(1), m);
|
||
expr_ref len(u().str.mk_length(s), m);
|
||
expr_ref rest_len(m_autil.mk_sub(len, one), m);
|
||
expr_ref rest(u().str.mk_substr(s, one, rest_len), m);
|
||
expr_ref rest_re(re().mk_to_re(rest), m);
|
||
return mk_ite(guard, rest_re, empty);
|
||
}
|
||
|
||
// Non-ground sequence: δ(to_re(s)) = ite(s ≠ "" ∧ ele = s[0], to_re(s[1:]), ∅)
|
||
expr_ref empty_seq(u().str.mk_empty(seq_sort), m);
|
||
expr_ref is_non_empty(m.mk_not(m.mk_eq(s, empty_seq)), m);
|
||
expr_ref zero(m_autil.mk_int(0), m);
|
||
expr_ref first(u().str.mk_nth_i(s, zero), m);
|
||
expr_ref eq_first(m.mk_eq(m_ele, first), m);
|
||
expr_ref guard(m.mk_and(is_non_empty, eq_first), m);
|
||
expr_ref one(m_autil.mk_int(1), m);
|
||
expr_ref len(u().str.mk_length(s), m);
|
||
expr_ref rest_len(m_autil.mk_sub(len, one), m);
|
||
expr_ref rest(u().str.mk_substr(s, one, rest_len), m);
|
||
expr_ref rest_re(re().mk_to_re(rest), m);
|
||
expr_ref empty(re().mk_empty(re_sort), m);
|
||
return mk_ite(guard, rest_re, empty);
|
||
}
|
||
|
||
expr_ref derive::derive_range(expr* lo, expr* hi, sort* seq_sort) {
|
||
sort* re_sort = re().mk_re(seq_sort);
|
||
expr_ref empty(re().mk_empty(re_sort), m);
|
||
expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m);
|
||
|
||
// Extract character values from unit strings
|
||
expr_ref c_lo(m), c_hi(m);
|
||
if (u().str.is_unit_string(lo, c_lo) && u().str.is_unit_string(hi, c_hi)) {
|
||
// Build range condition, simplifying trivial bounds
|
||
unsigned lo_val = 0, hi_val = 0;
|
||
bool lo_trivial = m_util.is_const_char(c_lo, lo_val) && lo_val == 0;
|
||
bool hi_trivial = m_util.is_const_char(c_hi, hi_val) && hi_val == u().max_char();
|
||
|
||
if (lo_trivial && hi_trivial)
|
||
return eps; // full charset range — always matches
|
||
|
||
expr_ref in_range(m);
|
||
if (lo_trivial)
|
||
in_range = m_util.mk_le(m_ele, c_hi);
|
||
else if (hi_trivial)
|
||
in_range = m_util.mk_le(c_lo, m_ele);
|
||
else
|
||
in_range = m.mk_and(m_util.mk_le(c_lo, m_ele), m_util.mk_le(m_ele, c_hi));
|
||
|
||
return mk_ite(in_range, eps, empty);
|
||
}
|
||
|
||
// Fallback: stuck derivative
|
||
return expr_ref(re().mk_derivative(m_ele, re().mk_range(lo, hi)), m);
|
||
}
|
||
|
||
expr_ref derive::derive_of_pred(expr* pred, sort* seq_sort) {
|
||
sort* re_sort = re().mk_re(seq_sort);
|
||
expr_ref empty(re().mk_empty(re_sort), m);
|
||
expr_ref eps(re().mk_to_re(u().str.mk_empty(seq_sort)), m);
|
||
|
||
// Apply predicate to the element
|
||
array_util autil(m);
|
||
expr* args[2] = { pred, m_ele };
|
||
expr_ref cond(autil.mk_select(2, args), m);
|
||
return mk_ite(cond, eps, empty);
|
||
}
|
||
|
||
// Extract head character and remaining tail from a sequence
|
||
// s1 is the first part, s2 is the continuation (from str.concat(s1, s2))
|
||
bool derive::get_head_tail(expr* s1, expr* s2, expr_ref& hd, expr_ref& tl) {
|
||
expr* ch = nullptr;
|
||
expr* a = nullptr, * b = nullptr;
|
||
if (u().str.is_unit(s1, ch)) {
|
||
hd = ch;
|
||
tl = s2;
|
||
return true;
|
||
}
|
||
if (u().str.is_concat(s1, a, b)) {
|
||
expr_ref new_s2(u().str.mk_concat(b, s2), m);
|
||
return get_head_tail(a, new_s2, hd, tl);
|
||
}
|
||
zstring zs;
|
||
if (u().str.is_string(s1, zs) && zs.length() > 0) {
|
||
hd = m_util.mk_char(zs[0]);
|
||
if (zs.length() == 1)
|
||
tl = s2;
|
||
else {
|
||
expr_ref rest(u().str.mk_string(zs.extract(1, zs.length() - 1)), m);
|
||
tl = u().str.mk_concat(rest, s2);
|
||
}
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -------------------------------------------------------
|
||
// Normalize reverse
|
||
// -------------------------------------------------------
|
||
|
||
expr_ref derive::mk_regex_reverse(expr* r) {
|
||
expr* r1 = nullptr, * r2 = nullptr, * c = nullptr;
|
||
unsigned lo = 0, hi = 0;
|
||
expr_ref result(m);
|
||
if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) ||
|
||
re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r))
|
||
result = r;
|
||
else if (re().is_to_re(r))
|
||
result = re().mk_reverse(r);
|
||
else if (re().is_reverse(r, r1))
|
||
result = r1;
|
||
else if (re().is_concat(r, r1, r2))
|
||
result = re().mk_concat(mk_regex_reverse(r2), mk_regex_reverse(r1));
|
||
else if (m.is_ite(r, c, r1, r2))
|
||
result = m.mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2));
|
||
else if (re().is_union(r, r1, r2)) {
|
||
auto a1 = mk_regex_reverse(r1);
|
||
auto b1 = mk_regex_reverse(r2);
|
||
result = re().mk_union(a1, b1);
|
||
}
|
||
else if (re().is_intersection(r, r1, r2)) {
|
||
auto a1 = mk_regex_reverse(r1);
|
||
auto b1 = mk_regex_reverse(r2);
|
||
result = re().mk_inter(a1, b1);
|
||
}
|
||
else if (re().is_diff(r, r1, r2)) {
|
||
auto a1 = mk_regex_reverse(r1);
|
||
auto b1 = mk_regex_reverse(r2);
|
||
result = re().mk_diff(a1, b1);
|
||
}
|
||
else if (re().is_star(r, r1))
|
||
result = re().mk_star(mk_regex_reverse(r1));
|
||
else if (re().is_plus(r, r1))
|
||
result = re().mk_plus(mk_regex_reverse(r1));
|
||
else if (re().is_loop(r, r1, lo))
|
||
result = re().mk_loop(mk_regex_reverse(r1), lo);
|
||
else if (re().is_loop(r, r1, lo, hi))
|
||
result = re().mk_loop_proper(mk_regex_reverse(r1), lo, hi);
|
||
else if (re().is_opt(r, r1))
|
||
result = re().mk_opt(mk_regex_reverse(r1));
|
||
else if (re().is_complement(r, r1))
|
||
result = re().mk_complement(mk_regex_reverse(r1));
|
||
else
|
||
result = re().mk_reverse(r);
|
||
return result;
|
||
}
|
||
|
||
// -------------------------------------------------------
|
||
// Nullability - uses info class from seq_decl_plugin.h
|
||
// -------------------------------------------------------
|
||
|
||
expr_ref derive::is_nullable(expr* r) {
|
||
SASSERT(m_util.is_re(r) || m_util.is_seq(r));
|
||
expr* r1 = nullptr, * r2 = nullptr, * cond = nullptr;
|
||
sort* seq_sort = nullptr;
|
||
unsigned lo = 0, hi = 0;
|
||
zstring s1;
|
||
if (m_util.is_re(r)) {
|
||
auto info = re().get_info(r);
|
||
switch (info.nullable) {
|
||
case l_true: return expr_ref(m.mk_true(), m);
|
||
case l_false: return expr_ref(m.mk_false(), m);
|
||
default: break;
|
||
}
|
||
}
|
||
expr_ref result(m);
|
||
if (re().is_concat(r, r1, r2) ||
|
||
re().is_intersection(r, r1, r2)) {
|
||
m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
|
||
}
|
||
else if (re().is_union(r, r1, r2) || re().is_antimirov_union(r, r1, r2)) {
|
||
m_br.mk_or(is_nullable(r1), is_nullable(r2), result);
|
||
}
|
||
else if (re().is_diff(r, r1, r2)) {
|
||
m_br.mk_not(is_nullable(r2), result);
|
||
m_br.mk_and(result, is_nullable(r1), result);
|
||
}
|
||
else if (re().is_xor(r, r1, r2)) {
|
||
m_br.mk_xor(is_nullable(r1), is_nullable(r2), result);
|
||
}
|
||
else if (re().is_star(r) ||
|
||
re().is_opt(r) ||
|
||
re().is_full_seq(r) ||
|
||
re().is_epsilon(r) ||
|
||
(re().is_loop(r, r1, lo) && lo == 0) ||
|
||
(re().is_loop(r, r1, lo, hi) && lo == 0)) {
|
||
result = m.mk_true();
|
||
}
|
||
else if (re().is_full_char(r) ||
|
||
re().is_empty(r) ||
|
||
re().is_of_pred(r) ||
|
||
re().is_range(r)) {
|
||
result = m.mk_false();
|
||
}
|
||
else if (re().is_plus(r, r1) ||
|
||
(re().is_loop(r, r1, lo) && lo > 0) ||
|
||
(re().is_loop(r, r1, lo, hi) && lo > 0) ||
|
||
(re().is_reverse(r, r1))) {
|
||
result = is_nullable(r1);
|
||
}
|
||
else if (re().is_complement(r, r1)) {
|
||
m_br.mk_not(is_nullable(r1), result);
|
||
}
|
||
else if (re().is_to_re(r, r1)) {
|
||
result = is_nullable(r1);
|
||
}
|
||
else if (m.is_ite(r, cond, r1, r2)) {
|
||
m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result);
|
||
}
|
||
else if (m_util.is_re(r, seq_sort)) {
|
||
result = is_nullable_symbolic_regex(r, seq_sort);
|
||
}
|
||
else if (u().str.is_concat(r, r1, r2)) {
|
||
m_br.mk_and(is_nullable(r1), is_nullable(r2), result);
|
||
}
|
||
else if (u().str.is_empty(r)) {
|
||
result = m.mk_true();
|
||
}
|
||
else if (u().str.is_unit(r)) {
|
||
result = m.mk_false();
|
||
}
|
||
else if (u().str.is_string(r, s1)) {
|
||
result = m.mk_bool_val(s1.length() == 0);
|
||
}
|
||
else {
|
||
SASSERT(m_util.is_seq(r));
|
||
result = m.mk_eq(u().str.mk_empty(r->get_sort()), r);
|
||
}
|
||
return result;
|
||
}
|
||
|
||
expr_ref derive::is_nullable_symbolic_regex(expr* r, sort* seq_sort) {
|
||
SASSERT(m_util.is_re(r));
|
||
expr* elem = nullptr, * r1 = r, * r2 = nullptr, * s = nullptr;
|
||
expr_ref elems(u().str.mk_empty(seq_sort), m);
|
||
expr_ref result(m);
|
||
while (re().is_derivative(r1, elem, r2)) {
|
||
if (u().str.is_empty(elems))
|
||
elems = u().str.mk_unit(elem);
|
||
else
|
||
elems = u().str.mk_concat(u().str.mk_unit(elem), elems);
|
||
r1 = r2;
|
||
}
|
||
if (re().is_to_re(r1, s)) {
|
||
result = m.mk_eq(elems, s);
|
||
return result;
|
||
}
|
||
result = re().mk_in_re(u().str.mk_empty(seq_sort), r);
|
||
return result;
|
||
}
|
||
|
||
// -------------------------------------------------------
|
||
// Smart constructors with simplification
|
||
// -------------------------------------------------------
|
||
|
||
|
||
// Extract character range [lo, hi] from a derivative condition.
|
||
// Conditions are of the form:
|
||
// ele == c → range [c, c]
|
||
// char_le(lo_expr, ele) && char_le(ele, hi_expr) → range [lo, hi]
|
||
// char_le(lo_expr, ele) → range [lo, max_char]
|
||
// char_le(ele, hi_expr) → range [0, hi]
|
||
// Returns false if not a recognizable range condition.
|
||
// Predicate implication for character range conditions.
|
||
// Returns true if: whenever cond_a is true, cond_b must also be true.
|
||
// pred_implies(sign_a, a, sign_b, b): does (sign_a ? ¬a : a) imply (sign_b ? ¬b : b)?
|
||
bool derive::pred_implies(bool sign_a, expr* a, bool sign_b, expr* b) {
|
||
// Same atom: check sign compatibility
|
||
if (a == b) return sign_a == sign_b;
|
||
|
||
// Both negated: ¬a → ¬b iff b → a, i.e. pred_implies(false, b, false, a)
|
||
if (sign_a && sign_b)
|
||
return pred_implies(false, b, false, a);
|
||
|
||
unsigned lo_a, hi_a, lo_b, hi_b;
|
||
bool neg_a, neg_b;
|
||
|
||
if (!sign_a && !sign_b) {
|
||
// a → b: range_a ⊆ range_b
|
||
if (u().is_char_const_range(m_ele, a, lo_a, hi_a, neg_a) && !neg_a &&
|
||
u().is_char_const_range(m_ele, b, lo_b, hi_b, neg_b) && !neg_b)
|
||
return lo_b <= lo_a && hi_a <= hi_b;
|
||
}
|
||
else if (!sign_a && sign_b) {
|
||
// a → ¬b: range_a ∩ range_b = ∅
|
||
if (u().is_char_const_range(m_ele, a, lo_a, hi_a, neg_a) && !neg_a &&
|
||
u().is_char_const_range(m_ele, b, lo_b, hi_b, neg_b) && !neg_b)
|
||
return hi_a < lo_b || hi_b < lo_a;
|
||
}
|
||
else if (sign_a && !sign_b) {
|
||
// ¬a → b: complement of range_a ⊆ range_b
|
||
if (u().is_char_const_range(m_ele, a, lo_a, hi_a, neg_a) && !neg_a &&
|
||
u().is_char_const_range(m_ele, b, lo_b, hi_b, neg_b) && !neg_b)
|
||
return lo_b == 0 && hi_b >= u().max_char();
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
bool derive::pred_implies(expr* a, expr* b) {
|
||
bool sign_a = m.is_not(a, a);
|
||
bool sign_b = m.is_not(b, b);
|
||
return pred_implies(sign_a, a, sign_b, b);
|
||
}
|
||
|
||
expr_ref derive::mk_xor(expr *a, expr *b) {
|
||
return mk_core(OP_RE_XOR, a, b);
|
||
}
|
||
|
||
expr_ref derive::mk_xor_core(expr *a, expr *b) {
|
||
|
||
return m_re.mk_xor0(a, b);
|
||
}
|
||
|
||
expr_ref derive::mk_core(decl_kind k, expr* a, expr* b) {
|
||
expr *pe = get_path_expr();
|
||
expr *cached = nullptr;
|
||
auto& cache = k == OP_RE_UNION ? m_union_cache : k == OP_RE_INTERSECT ? m_inter_cache : m_xor_cache;
|
||
if (cache.find(a, b, pe, cached))
|
||
return expr_ref(cached, m);
|
||
expr_ref result(m);
|
||
// ITE handling with path pruning
|
||
auto inter_op = [&](expr *x, expr *y) { return mk_inter(x, y); };
|
||
auto union_op = [&](expr *x, expr *y) { return mk_union(x, y); };
|
||
auto xor_op = [&](expr *x, expr *y) { return mk_xor(x, y); };
|
||
switch (k) {
|
||
case OP_RE_UNION:
|
||
result = hoist_ite(a, b, union_op);
|
||
if (!result)
|
||
result = mk_union_core(a, b);
|
||
break;
|
||
case OP_RE_INTERSECT:
|
||
result = hoist_ite(a, b, inter_op);
|
||
if (!result)
|
||
result = mk_inter_core(a, b);
|
||
break;
|
||
case OP_RE_XOR:
|
||
result = hoist_ite(a, b, xor_op);
|
||
if (!result)
|
||
result = mk_xor_core(a, b);
|
||
break;
|
||
default:
|
||
UNREACHABLE();
|
||
break;
|
||
}
|
||
// Store in cache
|
||
cache.insert(a, b, pe, result);
|
||
m_trail.push_back(a);
|
||
m_trail.push_back(b);
|
||
m_trail.push_back(pe);
|
||
m_trail.push_back(result);
|
||
return result;
|
||
}
|
||
|
||
expr_ref derive::mk_union(expr* a, expr* b) {
|
||
return mk_core(OP_RE_UNION, a, b);
|
||
}
|
||
|
||
// Lightweight structural subsumption: checks if L(a) ⊆ L(b)
|
||
bool derive::is_subset(expr* a, expr* b) {
|
||
return m_re.is_subset(a, b);
|
||
}
|
||
|
||
bool derive::are_complements(expr* a, expr* b) {
|
||
expr* c = nullptr;
|
||
if (re().is_complement(a, c) && c == b) return true;
|
||
if (re().is_complement(b, c) && c == a) return true;
|
||
return false;
|
||
}
|
||
|
||
expr_ref derive::mk_union_core(expr* a, expr* b) {
|
||
|
||
// Prefix factoring: a·x ∪ a·y = a·(x ∪ y)
|
||
expr *a1, *a2, *b1, *b2;
|
||
if (re().is_concat(a, a1, a2) && re().is_concat(b, b1, b2) && a1 == b1) {
|
||
expr_ref tail = mk_union(a2, b2);
|
||
return mk_deriv_concat(a1, tail);
|
||
}
|
||
|
||
return m_re.mk_union(a, b);
|
||
}
|
||
|
||
expr_ref derive::mk_inter(expr* a, expr* b) {
|
||
return mk_core(OP_RE_INTERSECT, a, b);
|
||
}
|
||
|
||
expr_ref derive::mk_inter_core(expr* a, expr* b) {
|
||
|
||
// Subsumption covers: a==b, empty(a), empty(b), full(a), full(b), etc.
|
||
if (is_subset(a, b)) return expr_ref(a, m);
|
||
if (is_subset(b, a)) return expr_ref(b, m);
|
||
|
||
// Complement absorption: r ∩ ~r = ∅
|
||
expr *c = nullptr, *d = nullptr;
|
||
if (re().is_complement(a, c) && c == b)
|
||
return expr_ref(re().mk_empty(a->get_sort()), m);
|
||
if (re().is_complement(b, c) && c == a)
|
||
return expr_ref(re().mk_empty(a->get_sort()), m);
|
||
if (re().is_complement(a, c) && re().is_complement(b, d))
|
||
return expr_ref(re().mk_complement(mk_union_core(c, d)), m);
|
||
|
||
|
||
|
||
// TODO: Distribution of intersection over union
|
||
// Disabled pending performance analysis
|
||
// expr *u1, *u2;
|
||
// if (re().is_union(a, u1, u2))
|
||
// return mk_union(mk_inter(u1, b), mk_inter(u2, b));
|
||
// if (re().is_union(b, u1, u2))
|
||
// return mk_union(mk_inter(a, u1), mk_inter(a, u2));
|
||
|
||
// Base case: build raw intersection
|
||
return m_re.mk_inter(a, b);
|
||
}
|
||
|
||
|
||
expr_ref derive::mk_concat(expr* a, expr* b) {
|
||
sort* seq_s = nullptr, * ele_s = nullptr;
|
||
VERIFY(m_util.is_re(a, seq_s));
|
||
VERIFY(u().is_seq(seq_s, ele_s));
|
||
if (re().is_empty(a)) return expr_ref(a, m);
|
||
if (re().is_empty(b)) return expr_ref(b, m);
|
||
if (re().is_epsilon(a)) return expr_ref(b, m);
|
||
if (re().is_epsilon(b)) return expr_ref(a, m);
|
||
if (re().is_full_seq(a) && re().is_full_seq(b))
|
||
return expr_ref(a, m);
|
||
if (re().is_full_char(a) && re().is_full_seq(b))
|
||
return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m);
|
||
if (re().is_full_seq(a) && re().is_full_char(b))
|
||
return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m);
|
||
|
||
// to_re(s1) · to_re(s2) → to_re(s1 ++ s2)
|
||
expr* s1 = nullptr, * s2 = nullptr;
|
||
if (re().is_to_re(a, s1) && re().is_to_re(b, s2))
|
||
return expr_ref(re().mk_to_re(u().str.mk_concat(s1, s2)), m);
|
||
|
||
// r* · r* → r*
|
||
|
||
expr* a1 = nullptr, *a2 = nullptr, * b1 = nullptr;
|
||
|
||
if (re().is_star(a, a1) && re().is_star(b, b1) && a1 == b1)
|
||
return expr_ref(a, m);
|
||
|
||
// Right-associate: (a · b) · c → a · (b · c)
|
||
|
||
if (re().is_concat(a, a1, a2))
|
||
return mk_concat(a1, mk_concat(a2, b));
|
||
|
||
return expr_ref(re().mk_concat(a, b), m);
|
||
}
|
||
|
||
expr_ref derive::mk_complement(expr* a) {
|
||
// Check path-aware op cache
|
||
expr* pe = get_path_expr();
|
||
expr* cached = nullptr;
|
||
if (m_complement_cache.find(a, pe, cached))
|
||
return expr_ref(cached, m);
|
||
|
||
expr_ref result = mk_complement_core(a);
|
||
|
||
// Store in cache
|
||
m_complement_cache.insert(a, pe, result);
|
||
m_trail.push_back(a);
|
||
m_trail.push_back(pe);
|
||
m_trail.push_back(result);
|
||
return result;
|
||
}
|
||
|
||
expr_ref derive::mk_complement_core(expr* a) {
|
||
// ~~r → r
|
||
expr* r = nullptr;
|
||
if (re().is_complement(a, r))
|
||
return expr_ref(r, m);
|
||
// ~∅ → Σ*
|
||
if (re().is_empty(a))
|
||
return expr_ref(re().mk_full_seq(a->get_sort()), m);
|
||
// ~Σ* → ∅
|
||
if (re().is_full_seq(a))
|
||
return expr_ref(re().mk_empty(a->get_sort()), m);
|
||
|
||
// Push through ITE with path pruning: ~(ite(c, t, e)) → ite(c, ~t, ~e)
|
||
expr* c, * t, * e;
|
||
if (m.is_ite(a, c, t, e)) {
|
||
auto comp_op = [&](expr* x) { return mk_complement(x); };
|
||
expr_ref r = apply_ite(c, t, e, comp_op);
|
||
if (r) return r;
|
||
return expr_ref(re().mk_full_seq(a->get_sort()), m);
|
||
}
|
||
|
||
// ~ε → .+
|
||
sort* s = nullptr;
|
||
expr* r1 = nullptr;
|
||
if (re().is_to_re(a, r1) && u().str.is_empty(r1)) {
|
||
VERIFY(m_util.is_re(a, s));
|
||
return expr_ref(re().mk_plus(re().mk_full_char(a->get_sort())), m);
|
||
}
|
||
|
||
return expr_ref(re().mk_complement(a), m);
|
||
}
|
||
|
||
expr_ref derive::mk_ite(expr* c, expr* t, expr* e) {
|
||
if (m.is_true(c) || t == e)
|
||
return expr_ref(t, m);
|
||
if (m.is_false(c))
|
||
return expr_ref(e, m);
|
||
// Use path-aware condition evaluation
|
||
lbool cond_val = eval_path_cond(c);
|
||
if (cond_val == l_true) return expr_ref(t, m);
|
||
if (cond_val == l_false) return expr_ref(e, m);
|
||
return expr_ref(m.mk_ite(c, t, e), m);
|
||
}
|
||
|
||
// -------------------------------------------------------
|
||
// Distribute concat through ITE/union structure of derivative
|
||
// -------------------------------------------------------
|
||
|
||
expr_ref derive::mk_deriv_concat(expr* d, expr* tail) {
|
||
// Check op cache
|
||
expr* cached = nullptr;
|
||
if (m_concat_cache.find(d, tail, cached))
|
||
return expr_ref(cached, m);
|
||
|
||
expr_ref result = mk_deriv_concat_core(d, tail);
|
||
|
||
// Store in cache
|
||
m_concat_cache.insert(d, tail, result);
|
||
m_trail.push_back(d);
|
||
m_trail.push_back(tail);
|
||
m_trail.push_back(result);
|
||
return result;
|
||
}
|
||
|
||
expr_ref derive::mk_deriv_concat_core(expr* d, expr* tail) {
|
||
expr_ref _d(d, m), _tail(tail, m);
|
||
expr* c, * t, * e;
|
||
|
||
if (re().is_empty(d))
|
||
return expr_ref(d, m);
|
||
if (re().is_epsilon(d))
|
||
return expr_ref(tail, m);
|
||
|
||
if (m.is_ite(d, c, t, e)) {
|
||
expr_ref then_r = mk_deriv_concat(t, tail);
|
||
expr_ref else_r = mk_deriv_concat(e, tail);
|
||
return mk_ite(c, then_r, else_r);
|
||
}
|
||
|
||
if (re().is_union(d, t, e)) {
|
||
expr_ref left = mk_deriv_concat(t, tail);
|
||
expr_ref right = mk_deriv_concat(e, tail);
|
||
return mk_union(left, right);
|
||
}
|
||
|
||
return mk_concat(d, tail);
|
||
}
|
||
|
||
// -------------------------------------------------------
|
||
// Path management for inline pruning
|
||
// -------------------------------------------------------
|
||
|
||
lbool derive::push(expr* c, bool sign) {
|
||
// Check if (c, sign) is already determined by the path
|
||
lbool cv = eval_path_cond(c);
|
||
if (cv == l_true && !sign) return l_true; // c implied true, push(c,false) is redundant
|
||
if (cv == l_false && sign) return l_true; // c implied false, push(c,true) is redundant
|
||
if (cv == l_true && sign) return l_false; // c implied true, push(c,true) contradicts
|
||
if (cv == l_false && !sign) return l_false; // c implied false, push(c,false) contradicts
|
||
|
||
// Save current state
|
||
unsigned saved_path_sz = m_path.size();
|
||
unsigned saved_intervals_sz = m_intervals.size();
|
||
unsigned saved_intervals_start = m_intervals_start;
|
||
expr* saved_path_expr = m_path_expr;
|
||
|
||
// Push atoms onto path and check for contradiction or implication
|
||
lbool result = push_path_atoms(c, sign);
|
||
if (result != l_undef) {
|
||
m_path.shrink(saved_path_sz);
|
||
m_intervals.shrink(saved_intervals_sz);
|
||
m_intervals_start = saved_intervals_start;
|
||
return result;
|
||
}
|
||
|
||
// Update intervals
|
||
result = push_intervals_impl(c, sign);
|
||
if (result != l_undef) {
|
||
m_path.shrink(saved_path_sz);
|
||
m_intervals.shrink(saved_intervals_sz);
|
||
m_intervals_start = saved_intervals_start;
|
||
return result;
|
||
}
|
||
|
||
// Update path expression
|
||
expr* atom = sign ? m.mk_not(c) : c;
|
||
m_path_expr = m.mk_and(m_path_expr, atom);
|
||
m_trail.push_back(m_path_expr);
|
||
|
||
// Commit: save state for pop()
|
||
m_path_stack.push_back({ saved_path_sz, saved_intervals_sz, saved_intervals_start, saved_path_expr });
|
||
return l_undef;
|
||
}
|
||
|
||
void derive::pop() {
|
||
SASSERT(!m_path_stack.empty());
|
||
auto const& saved = m_path_stack.back();
|
||
m_path.shrink(saved.path_sz);
|
||
m_intervals.shrink(saved.intervals_sz);
|
||
m_intervals_start = saved.intervals_start;
|
||
m_path_expr = saved.path_expr;
|
||
m_path_stack.pop_back();
|
||
}
|
||
|
||
// Binary apply_ite: hoist ite(c, t, e) op r with path pruning
|
||
expr_ref derive::apply_ite(expr* c, expr* t, expr* e, expr* r, std::function<expr_ref(expr*, expr*)> apply_op) {
|
||
expr_ref then_br(m), else_br(m);
|
||
switch (push(c, false)) {
|
||
case l_true: return apply_op(t, r);
|
||
case l_undef: then_br = apply_op(t, r); pop(); break;
|
||
case l_false: break;
|
||
}
|
||
switch (push(c, true)) {
|
||
case l_true: return apply_op(e, r);
|
||
case l_undef: else_br = apply_op(e, r); pop(); break;
|
||
case l_false: break;
|
||
}
|
||
if (then_br && else_br) return mk_ite(c, then_br, else_br);
|
||
if (then_br) return then_br;
|
||
if (else_br) return else_br;
|
||
return expr_ref(nullptr, m);
|
||
}
|
||
|
||
// Same-condition merge: ite(c, t1, e1) op ite(c, t2, e2) → ite(c, t1 op t2, e1 op e2)
|
||
expr_ref derive::apply_ite(expr* c, expr* t1, expr* e1, expr* t2, expr* e2, std::function<expr_ref(expr*, expr*)> apply_op) {
|
||
expr_ref then_br(m), else_br(m);
|
||
switch (push(c, false)) {
|
||
case l_true: return apply_op(t1, t2);
|
||
case l_undef: then_br = apply_op(t1, t2); pop(); break;
|
||
case l_false: break;
|
||
}
|
||
switch (push(c, true)) {
|
||
case l_true: return apply_op(e1, e2);
|
||
case l_undef: else_br = apply_op(e1, e2); pop(); break;
|
||
case l_false: break;
|
||
}
|
||
if (then_br && else_br) return mk_ite(c, then_br, else_br);
|
||
if (then_br) return then_br;
|
||
if (else_br) return else_br;
|
||
return expr_ref(nullptr, m);
|
||
}
|
||
|
||
// Unary apply_ite: hoist ite(c, t, e) through unary op with path pruning
|
||
expr_ref derive::apply_ite(expr* c, expr* t, expr* e, std::function<expr_ref(expr*)> apply_op) {
|
||
expr_ref then_br(m), else_br(m);
|
||
switch (push(c, false)) {
|
||
case l_true: return apply_op(t);
|
||
case l_undef: then_br = apply_op(t); pop(); break;
|
||
case l_false: break;
|
||
}
|
||
switch (push(c, true)) {
|
||
case l_true: return apply_op(e);
|
||
case l_undef: else_br = apply_op(e); pop(); break;
|
||
case l_false: break;
|
||
}
|
||
if (then_br && else_br) return mk_ite(c, then_br, else_br);
|
||
if (then_br) return then_br;
|
||
if (else_br) return else_br;
|
||
return expr_ref(nullptr, m);
|
||
}
|
||
|
||
// Common ITE dispatch for binary ops (union/inter).
|
||
// Returns nullptr if neither a nor b is ITE.
|
||
expr_ref derive::hoist_ite(expr* a, expr* b, std::function<expr_ref(expr*, expr*)> apply_op) {
|
||
expr *c1, *t1, *e1, *c2, *t2, *e2;
|
||
if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2) && c1->get_id() > c2->get_id())
|
||
std::swap(a, b);
|
||
if (m.is_ite(a, c1, t1, e1) && m.is_ite(b, c2, t2, e2)) {
|
||
expr_ref r(m);
|
||
if (c1 == c2)
|
||
r = apply_ite(c1, t1, e1, t2, e2, apply_op);
|
||
else
|
||
r = apply_ite(c1, t1, e1, b, apply_op);
|
||
if (r) return r;
|
||
return expr_ref(re().mk_empty(a->get_sort()), m);
|
||
}
|
||
// Single-ITE hoisting: must always recurse to maintain path-aware
|
||
// soundness — falling back to a non-path-aware structural rewrite
|
||
// here would bake unreachable branches into the result tree.
|
||
if (m.is_ite(a, c1, t1, e1)) {
|
||
expr_ref r = apply_ite(c1, t1, e1, b, apply_op);
|
||
if (r) return r;
|
||
return expr_ref(re().mk_empty(a->get_sort()), m);
|
||
}
|
||
if (m.is_ite(b, c2, t2, e2)) {
|
||
expr_ref r = apply_ite(c2, t2, e2, a, apply_op);
|
||
if (r) return r;
|
||
return expr_ref(re().mk_empty(a->get_sort()), m);
|
||
}
|
||
return expr_ref(nullptr, m);
|
||
}
|
||
|
||
// Push signed atoms onto m_path. Returns l_true if implied, l_false if contradicted, l_undef if pushed.
|
||
lbool derive::push_path_atoms(expr* c, bool sign) {
|
||
// Check if (c, sign) is already determined by the path
|
||
for (auto const& [cond, csign] : m_path) {
|
||
if (c == cond)
|
||
return csign == sign ? l_true : l_false;
|
||
expr* lhs1 = nullptr, * rhs1 = nullptr, * lhs2 = nullptr, * rhs2 = nullptr;
|
||
// x = v, v != w |-> x != w
|
||
if (!csign && m.is_eq(cond, lhs1, rhs1) && m.is_eq(c, lhs2, rhs2)) {
|
||
if (m.is_value(lhs1)) std::swap(lhs1, rhs1);
|
||
if (m.is_value(lhs2)) std::swap(lhs2, rhs2);
|
||
if (lhs1 == lhs2 && m.are_distinct(rhs1, rhs2))
|
||
return sign ? l_true : l_false;
|
||
}
|
||
}
|
||
|
||
// Composite: conjunction assumed true, or disjunction assumed false
|
||
if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
|
||
bool all_implied = true;
|
||
for (expr* arg : *to_app(c)) {
|
||
lbool r = push_path_atoms(arg, sign);
|
||
if (r == l_false) return l_false;
|
||
if (r == l_undef) all_implied = false;
|
||
}
|
||
return all_implied ? l_true : l_undef;
|
||
}
|
||
|
||
// Atomic: push onto path
|
||
m_path.push_back({ c, sign });
|
||
return l_undef;
|
||
}
|
||
|
||
// Update m_intervals based on the condition. Returns l_true if implied, l_false if inconsistent, l_undef if pushed.
|
||
// Operates on the active suffix m_intervals[m_intervals_start..end].
|
||
// On modification, appends new intervals and updates m_intervals_start.
|
||
lbool derive::push_intervals_impl(expr* c, bool sign) {
|
||
unsigned lo = 0, hi = 0;
|
||
bool negated = false;
|
||
if (m_util.is_char_const_range(m_ele, c, lo, hi, negated)) {
|
||
bool effective_neg = (negated != sign);
|
||
if (!effective_neg) {
|
||
if (lo <= hi) {
|
||
// Check if current intervals already imply [lo,hi]
|
||
bool already_subset = true;
|
||
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
|
||
if (m_intervals[i].first < lo || m_intervals[i].second > hi) { already_subset = false; break; }
|
||
}
|
||
if (already_subset) return l_true;
|
||
intersect_intervals(lo, hi);
|
||
} else {
|
||
// lo > hi means empty range — contradiction
|
||
return l_false;
|
||
}
|
||
} else {
|
||
if (lo <= hi) {
|
||
// Check if current intervals already exclude [lo,hi]
|
||
bool already_excluded = true;
|
||
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
|
||
if (m_intervals[i].first <= hi && m_intervals[i].second >= lo) { already_excluded = false; break; }
|
||
}
|
||
if (already_excluded) return l_true;
|
||
exclude_interval(lo, hi);
|
||
}
|
||
}
|
||
} else if ((!sign && m.is_and(c)) || (sign && m.is_or(c))) {
|
||
bool all_implied = true;
|
||
for (expr* arg : *to_app(c)) {
|
||
lbool r = push_intervals_impl(arg, sign);
|
||
if (r == l_false) return l_false;
|
||
if (r == l_undef) all_implied = false;
|
||
}
|
||
unsigned n = m_intervals.size() - m_intervals_start;
|
||
return all_implied ? l_true : (n == 0 ? l_false : l_undef);
|
||
}
|
||
unsigned n = m_intervals.size() - m_intervals_start;
|
||
return n == 0 ? l_false : l_undef;
|
||
}
|
||
|
||
// Evaluate a condition against the current path and intervals.
|
||
lbool derive::eval_path_cond(expr* c) {
|
||
// First try static evaluation (concrete m_ele, tautologies)
|
||
lbool v = eval_cond(c);
|
||
if (v != l_undef) return v;
|
||
|
||
// Check against path atoms
|
||
for (auto const& [cond, sign] : m_path) {
|
||
if (c == cond)
|
||
return sign ? l_false : l_true;
|
||
}
|
||
|
||
// Check against intervals
|
||
v = eval_range_cond(c);
|
||
if (v != l_undef) return v;
|
||
|
||
// Check pred_implies from path atoms
|
||
for (auto const& [cond, sign] : m_path) {
|
||
if (pred_implies(sign, cond, false, c))
|
||
return l_true;
|
||
if (pred_implies(sign, cond, true, c))
|
||
return l_false;
|
||
}
|
||
|
||
return l_undef;
|
||
}
|
||
|
||
// -------------------------------------------------------
|
||
// Condition evaluation helpers
|
||
// -------------------------------------------------------
|
||
|
||
lbool derive::eval_cond(expr* cond) {
|
||
expr* e1 = nullptr;
|
||
|
||
if (m.is_true(cond)) return l_true;
|
||
if (m.is_false(cond)) return l_false;
|
||
|
||
// Use is_char_const_range to evaluate conditions involving m_ele
|
||
unsigned lo = 0, hi = 0, ele_val = 0;
|
||
bool negated = false;
|
||
if (m_util.is_char_const_range(m_ele, cond, lo, hi, negated) && u().is_const_char(m_ele, ele_val)) {
|
||
bool in_range = (lo <= ele_val && ele_val <= hi);
|
||
return (in_range != negated) ? l_true : l_false;
|
||
}
|
||
|
||
// Handle self-equality and constant comparisons not involving m_ele
|
||
expr* lhs = nullptr, * rhs = nullptr;
|
||
if (m.is_eq(cond, lhs, rhs) && lhs == rhs)
|
||
return l_true;
|
||
|
||
unsigned vl = 0, vr = 0;
|
||
if (u().is_char_le(cond, lhs, rhs)) {
|
||
if (u().is_const_char(lhs, vl) && u().is_const_char(rhs, vr))
|
||
return vl <= vr ? l_true : l_false;
|
||
if (u().is_const_char(lhs, vl) && vl == 0)
|
||
return l_true;
|
||
if (u().is_const_char(rhs, vr) && vr == u().max_char())
|
||
return l_true;
|
||
}
|
||
|
||
// not(e1)
|
||
if (m.is_not(cond, e1))
|
||
return ~eval_cond(e1);
|
||
|
||
// and(...)
|
||
if (m.is_and(cond)) {
|
||
lbool r = l_true;
|
||
for (expr* arg : *to_app(cond)) {
|
||
lbool v = eval_cond(arg);
|
||
if (v == l_false) return l_false;
|
||
if (v == l_undef) r = l_undef;
|
||
}
|
||
return r;
|
||
}
|
||
|
||
// or(...)
|
||
if (m.is_or(cond)) {
|
||
lbool r = l_false;
|
||
for (expr* arg : *to_app(cond)) {
|
||
lbool v = eval_cond(arg);
|
||
if (v == l_true) return l_true;
|
||
if (v == l_undef) r = l_undef;
|
||
}
|
||
return r;
|
||
}
|
||
|
||
return l_undef;
|
||
}
|
||
|
||
lbool derive::eval_range_cond(expr* c) {
|
||
unsigned n = m_intervals.size() - m_intervals_start;
|
||
if (n == 0)
|
||
return l_false;
|
||
unsigned lo = 0, hi = 0;
|
||
bool negated = false;
|
||
if (!m_util.is_char_const_range(m_ele, c, lo, hi, negated))
|
||
return l_undef;
|
||
if (lo > hi) {
|
||
return negated ? l_true : l_false;
|
||
}
|
||
// Check if [lo, hi] overlaps with intervals and/or contains all intervals
|
||
bool any_overlap = false;
|
||
bool all_contained = true;
|
||
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
|
||
auto [r_lo, r_hi] = m_intervals[i];
|
||
if (std::max(r_lo, lo) <= std::min(r_hi, hi))
|
||
any_overlap = true;
|
||
if (r_lo < lo || r_hi > hi)
|
||
all_contained = false;
|
||
}
|
||
if (!negated) {
|
||
if (!any_overlap) return l_false;
|
||
if (all_contained) return l_true;
|
||
} else {
|
||
if (all_contained) return l_false;
|
||
if (!any_overlap) return l_true;
|
||
}
|
||
return l_undef;
|
||
}
|
||
|
||
// Intersect the active suffix m_intervals[m_intervals_start..end] with [lo, hi]
|
||
void derive::intersect_intervals(unsigned lo, unsigned hi) {
|
||
// Copy active suffix to end, update start, then filter
|
||
unsigned old_sz = m_intervals.size();
|
||
for (unsigned i = m_intervals_start; i < old_sz; ++i)
|
||
m_intervals.push_back(m_intervals[i]);
|
||
m_intervals_start = old_sz;
|
||
// Filter in-place within new suffix: drop intervals disjoint from [lo,hi],
|
||
// keep the intersection for overlapping ones.
|
||
unsigned j = m_intervals_start;
|
||
for (unsigned i = m_intervals_start; i < m_intervals.size(); ++i) {
|
||
auto [lo1, hi1] = m_intervals[i];
|
||
if (hi < lo1 || lo > hi1)
|
||
continue; // disjoint with this interval — drop it
|
||
m_intervals[j++] = {std::max(lo1, lo), std::min(hi1, hi)};
|
||
}
|
||
m_intervals.shrink(j);
|
||
}
|
||
|
||
// Exclude [lo, hi] from the active suffix m_intervals[m_intervals_start..end]
|
||
void derive::exclude_interval(unsigned lo, unsigned hi) {
|
||
unsigned max_char = u().max_char();
|
||
if (lo == 0 && hi >= max_char) { m_intervals_start = m_intervals.size(); return; }
|
||
if (lo == 0) { intersect_intervals(hi + 1, max_char); return; }
|
||
if (hi >= max_char) { intersect_intervals(0, lo - 1); return; }
|
||
// Each interval [ilo, ihi] minus [lo, hi] → up to 2 pieces
|
||
// Append new results past the end, then move start
|
||
unsigned old_start = m_intervals_start;
|
||
unsigned old_sz = m_intervals.size();
|
||
for (unsigned i = old_start; i < old_sz; ++i) {
|
||
auto [ilo, ihi] = m_intervals[i];
|
||
if (ihi < lo || ilo > hi) {
|
||
m_intervals.push_back(m_intervals[i]);
|
||
} else {
|
||
if (ilo < lo)
|
||
m_intervals.push_back({ilo, lo - 1});
|
||
if (ihi > hi)
|
||
m_intervals.push_back({hi + 1, ihi});
|
||
}
|
||
}
|
||
m_intervals_start = old_sz;
|
||
}
|
||
|
||
}
|