3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-27 19:08:49 +00:00

Merge branch 'master' into c3

This commit is contained in:
CEisenhofer 2026-06-10 13:41:31 +02:00
commit e3b80fc578
104 changed files with 10253 additions and 4367 deletions

View file

@ -21,6 +21,7 @@ Revision History:
#include "ast/for_each_ast.h"
#include "ast/arith_decl_plugin.h"
#include "ast/datatype_decl_plugin.h"
#include "ast/ast_smt2_pp.h"
// #define AST_LL_PP_SHOW_FAMILY_NAME
@ -44,7 +45,7 @@ class ll_printer {
}
void display_name(func_decl * decl) {
m_out << decl->get_name();
m_out << ensure_quote(decl->get_name());
}
bool process_numeral(expr * n) {

View file

@ -507,6 +507,7 @@ class smt_printer {
case forall_k: m_out << "forall "; break;
case exists_k: m_out << "exists "; break;
case lambda_k: m_out << "lambda "; break;
case choice_k: m_out << "choice "; break;
}
m_out << "(";
for (unsigned i = 0; i < q->get_num_decls(); ++i) {

View file

@ -1951,14 +1951,12 @@ namespace euf {
enode * get_next_f_app(func_decl * lbl, unsigned num_expected_args, enode * first, enode * curr) {
curr = curr->get_next();
enode *matching_cgr = nullptr, *min_gen_match = nullptr;
while (curr != first) {
get_f_app(lbl, num_expected_args, curr, matching_cgr, min_gen_match);
if (curr->get_decl() == lbl && curr->num_args() == num_expected_args && curr->is_cgr())
return curr;
curr = curr->get_next();
}
if (matching_cgr)
update_max_generation(min_gen_match, first);
return matching_cgr;
return nullptr;
}
/**

View file

@ -39,6 +39,7 @@ z3_add_component(rewriter
rewriter.cpp
seq_axioms.cpp
seq_eq_solver.cpp
seq_subset.cpp
seq_rewriter.cpp
seq_skolem.cpp
th_rewriter.cpp

View file

@ -4525,10 +4525,60 @@ br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) {
r* ++ r -> r ++ r*
*/
br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
auto accepts_empty_word = [&](expr* r) {
auto info = re().get_info(r);
return info.interpreted && info.nullable == l_true && info.min_length == 0;
};
auto starts_with_full_seq = [&](expr* r) {
expr* r1 = nullptr, *r2 = nullptr;
return re().is_full_seq(r) || (re().is_concat(r, r1, r2) && re().is_full_seq(r1));
};
auto ends_with_full_seq = [&](expr* r) {
expr* r1 = nullptr, *r2 = nullptr;
while (re().is_concat(r, r1, r2))
r = r2;
return re().is_full_seq(r);
};
auto all_inter_arms_end_with_full_seq = [&](expr* r) {
ptr_buffer<expr> todo;
todo.push_back(r);
while (!todo.empty()) {
expr* r1 = nullptr, *r2 = nullptr;
expr* t = todo.back();
todo.pop_back();
if (re().is_intersection(t, r1, r2)) {
todo.push_back(r1);
todo.push_back(r2);
}
else if (!ends_with_full_seq(t)) {
return false;
}
}
return true;
};
if (re().is_full_seq(a) && re().is_full_seq(b)) {
result = a;
return BR_DONE;
}
if (re().is_full_seq(a) && accepts_empty_word(b)) {
result = a;
return BR_DONE;
}
if (re().is_full_seq(b) && accepts_empty_word(a)) {
result = b;
return BR_DONE;
}
expr* u1 = nullptr, *u2 = nullptr;
if (re().is_full_seq(a) && re().is_union(b, u1, u2) &&
(starts_with_full_seq(u1) || starts_with_full_seq(u2))) {
result = mk_regex_union_normalize(mk_regex_concat(a, u1), mk_regex_concat(a, u2));
return BR_REWRITE2;
}
if (re().is_intersection(a, u1, u2) && re().is_full_seq(b) &&
all_inter_arms_end_with_full_seq(a)) {
result = a;
return BR_DONE;
}
if (re().is_empty(a)) {
result = a;
return BR_DONE;
@ -4564,7 +4614,8 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
result = re().mk_to_re(str().mk_concat(a_str, b_str));
return BR_REWRITE2;
}
expr* a1 = nullptr, *b1 = nullptr;
expr* a1 = nullptr;
expr* b1 = nullptr;
if (re().is_to_re(a, a1) && re().is_to_re(b, b1)) {
result = re().mk_to_re(str().mk_concat(a1, b1));
return BR_DONE;
@ -4573,6 +4624,11 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
result = a;
return BR_DONE;
}
expr* b2 = nullptr, *b3 = nullptr;
if (re().is_star(a, a1) && re().is_concat(b, b1, b2) && re().is_star(b1, b3) && a1 == b3) {
result = b;
return BR_DONE;
}
if (re().is_star(a, a1) && a1 == b) {
result = re().mk_concat(b, a);
return BR_DONE;
@ -4626,51 +4682,7 @@ bool seq_rewriter::are_complements(expr* r1, expr* r2) const {
* basic subset checker.
*/
bool seq_rewriter::is_subset(expr* r1, expr* r2) const {
// return false;
expr* ra1 = nullptr, *ra2 = nullptr, *ra3 = nullptr;
expr* rb1 = nullptr, *rb2 = nullptr, *rb3 = nullptr;
unsigned la, ua, lb, ub;
if (re().is_complement(r1, ra1) &&
re().is_complement(r2, rb1)) {
return is_subset(rb1, ra1);
}
auto is_concat = [&](expr* r, expr*& a, expr*& b, expr*& c) {
return re().is_concat(r, a, b) && re().is_concat(b, b, c);
};
while (true) {
if (r1 == r2)
return true;
if (re().is_full_seq(r2))
return true;
if (re().is_dot_plus(r2) && re().get_info(r1).nullable == l_false)
return true;
if (is_concat(r1, ra1, ra2, ra3) &&
is_concat(r2, rb1, rb2, rb3) && ra1 == rb1 && ra2 == rb2) {
r1 = ra3;
r2 = rb3;
continue;
}
if (re().is_concat(r1, ra1, ra2) &&
re().is_concat(r2, rb1, rb2) && re().is_full_seq(rb1)) {
r1 = ra2;
continue;
}
// r1=ra3{la,ua}ra2, r2=rb3{lb,ub}rb2, ra3=rb3, lb<=la, ua<=ub
if (re().is_concat(r1, ra1, ra2) && re().is_loop(ra1, ra3, la, ua) &&
re().is_concat(r2, rb1, rb2) && re().is_loop(rb1, rb3, lb, ub) &&
ra3 == rb3 && lb <= la && ua <= ub) {
r1 = ra2;
r2 = rb2;
continue;
}
// ra1=ra3{la,ua}, r2=rb3{lb,ub}, ra3=rb3, lb<=la, ua<=ub
if (re().is_loop(r1, ra3, la, ua) &&
re().is_loop(r2, rb3, lb, ub) &&
ra3 == rb3 && lb <= la && ua <= ub) {
return true;
}
return false;
}
return m_subset.is_subset(r1, r2);
}
br_status seq_rewriter::mk_re_union0(expr* a, expr* b, expr_ref& result) {
@ -6256,4 +6268,3 @@ bool seq_rewriter::get_bounds(expr* e, unsigned& low, unsigned& high) {
}
return low <= high;
}

View file

@ -23,6 +23,7 @@ Notes:
#include "ast/arith_decl_plugin.h"
#include "ast/rewriter/rewriter_types.h"
#include "ast/rewriter/bool_rewriter.h"
#include "ast/rewriter/seq_subset.h"
#include "util/params.h"
#include "util/lbool.h"
#include "util/sign.h"
@ -128,6 +129,7 @@ class seq_rewriter {
};
seq_util m_util;
seq_subset m_subset;
arith_util m_autil;
bool_rewriter m_br;
// re2automaton m_re2aut;
@ -340,7 +342,7 @@ class seq_rewriter {
public:
seq_rewriter(ast_manager & m, params_ref const & p = params_ref()):
m_util(m), m_autil(m), m_br(m, p), // m_re2aut(m),
m_util(m), m_subset(m_util.re), m_autil(m), m_br(m, p), // m_re2aut(m),
m_op_cache(m), m_es(m),
m_lhs(m), m_rhs(m), m_coalesce_chars(true) {
}
@ -436,4 +438,3 @@ public:
*/
lbool some_string_in_re(expr* r, zstring& s);
};

View file

@ -0,0 +1,146 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_subset.cpp
Abstract:
Heuristic regular-expression subset checks used by seq_rewriter.
Author:
Nikolaj Bjorner (nbjorner) 2026-6-8
--*/
#include "ast/rewriter/seq_subset.h"
bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const {
while (true) {
if (a == b)
return true;
if (m_re.is_empty(a))
return true;
if (m_re.is_full_seq(b))
return true;
if (m_re.is_epsilon(a) && m_re.get_info(b).nullable == l_true)
return true;
if (depth >= m_max_depth)
return false;
expr* a1 = nullptr, * a2 = nullptr, * b1 = nullptr, * b2 = nullptr;
unsigned la, ua, lb, ub;
// a ⊆ .+ iff a is non-nullable
if (m_re.is_dot_plus(b) && m_re.get_info(a).nullable == l_false)
return true;
// a ⊆ a*
if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth))
return true;
// e ⊆ a*
if (m_re.is_epsilon(a) && m_re.is_star(b, b1))
return true;
// R ⊆ R*
if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth + 1))
return true;
// R1* ⊆ R2* if R1 ⊆ R2
if (m_re.is_star(a, a1) && m_re.is_star(b, b1) && is_subset_rec(a1, b1, depth + 1))
return true;
// R1+ ⊆ R2+ if R1 ⊆ R2
if (m_re.is_plus(a, a1) && m_re.is_plus(b, b1) && is_subset_rec(a1, b1, depth))
return true;
// R ⊆ R+
if (m_re.is_plus(b, b1) && is_subset_rec(a, b1, depth))
return true;
// R+ ⊆ R*
if (m_re.is_plus(a, a1) && m_re.is_star(b, b1) && is_subset_rec(a1, b1, depth + 1))
return true;
// range containment
if (m_re.is_range(a, la, ua) && m_re.is_range(b, lb, ub) && lb <= la && ua <= ub)
return true;
// to_re(s) ⊆ range
if (m_re.is_to_re(a, a1) && m_re.is_range(b, lb, ub) && is_app(a1)) {
func_decl* f = to_app(a1)->get_decl();
if (f->get_decl_kind() == OP_STRING_CONST && f->get_num_parameters() == 1) {
zstring const& s = f->get_parameter(0).get_zstring();
if (s.length() == 1 && lb <= s[0] && s[0] <= ub)
return true;
}
}
// a ⊆ b1 b2 if a ⊆ b1 or a ⊆ b2
if (m_re.is_union(b, b1, b2) && (is_subset_rec(a, b1, depth + 1) || is_subset_rec(a, b2, depth + 1)))
return true;
// a1 a2 ⊆ b if a1 ⊆ b and a2 ⊆ b
if (m_re.is_union(a, a1, a2) && is_subset_rec(a1, b, depth + 1) && is_subset_rec(a2, b, depth + 1))
return true;
// a1 ∩ a2 ⊆ b if a1 ⊆ b or a2 ⊆ b
if (m_re.is_intersection(a, a1, a2) && (is_subset_rec(a1, b, depth + 1) || is_subset_rec(a2, b, depth + 1)))
return true;
// a ⊆ b1 ∩ b2 if a ⊆ b1 and a ⊆ b2
if (m_re.is_intersection(b, b1, b2) && is_subset_rec(a, b1, depth + 1) && is_subset_rec(a, b2, depth + 1))
return true;
// R{la,ua} ⊆ R'{lb,ub} if R ⊆ R', lb<=la, ua<=ub
if (m_re.is_loop(a, a1, la, ua) &&
m_re.is_loop(b, b1, lb, ub) &&
lb <= la && ua <= ub && is_subset_rec(a1, b1, depth + 1)) {
return true;
}
// a1 \ a2 ⊆ b if a1 ⊆ b
if (m_re.is_diff(a, a1, a2) && is_subset_rec(a1, b, depth + 1))
return true;
// R ⊆ Σ*·R' if R ⊆ R'
if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b1) && is_subset_rec(a, b2, depth))
return true;
// R ⊆ R'·Σ* if R ⊆ R'
if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b2) && is_subset_rec(a, b1, depth))
return true;
// star absorption: R·R* ⊆ R*, R*·R ⊆ R*
bool const is_concat_star = m_re.is_concat(a, a1, a2) && m_re.is_star(b, b1);
if (is_concat_star &&
is_subset_rec(a1, b1, depth + 1) && is_subset_rec(a2, b, depth + 1))
return true;
if (is_concat_star &&
is_subset_rec(a2, b1, depth + 1) && is_subset_rec(a1, b, depth + 1))
return true;
// concat monotonicity:
// tail-recursive on second arguments (without increasing depth bound).
if (m_re.is_concat(a, a1, a2) && m_re.is_concat(b, b1, b2) && is_subset_rec(a1, b1, depth + 1)) {
a = a2;
b = b2;
continue;
}
// complement: ~a ⊆ ~b if b ⊆ a
if (m_re.is_complement(a, a1) && m_re.is_complement(b, b1))
return is_subset_rec(b1, a1, depth + 1);
return false;
}
}
bool seq_subset::is_subset(expr* a, expr* b) const {
return is_subset_rec(a, b, 0);
}

View file

@ -0,0 +1,30 @@
/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_subset.h
Abstract:
Heuristic regular-expression subset checks used by seq_rewriter.
Author:
Nikolaj Bjorner (nbjorner) 2026-6-8
--*/
#pragma once
#include "ast/seq_decl_plugin.h"
class seq_subset {
seq_util::rex& m_re;
static constexpr unsigned m_max_depth = 3;
bool is_subset_rec(expr* a, expr* b, unsigned depth) const;
public:
explicit seq_subset(seq_util::rex& re) : m_re(re) {}
bool is_subset(expr* a, expr* b) const;
};

View file

@ -1656,9 +1656,9 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
if (e->get_family_id() == u.get_family_id()) {
switch (e->get_decl()->get_decl_kind()) {
case OP_RE_EMPTY_SET:
return info(true, l_false, UINT_MAX);
return info(true, l_false, UINT_MAX, false);
case OP_RE_FULL_SEQ_SET:
return info(true, l_true, 0);
return info(true, l_true, 0, true);
case OP_RE_STAR:
i1 = get_info_rec(e->get_arg(0));
return i1.star();
@ -1670,7 +1670,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
case OP_RE_OF_PRED:
//TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
//TBD: check if the range is unsat
return info(true, l_false, 1);
return info(true, l_false, 1, false);
case OP_RE_CONCAT:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
@ -1687,7 +1687,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
min_length = u.str.min_length(e->get_arg(0));
is_value = m.is_value(e->get_arg(0));
nullable = (is_value && min_length == 0 ? l_true : (min_length > 0 ? l_false : l_undef));
return info(is_value, nullable, min_length);
return info(is_value, nullable, min_length, true);
case OP_RE_REVERSE:
return get_info_rec(e->get_arg(0));
case OP_RE_PLUS:
@ -1723,7 +1723,8 @@ std::ostream& seq_util::rex::info::display(std::ostream& out) const {
if (is_known()) {
out << "info("
<< "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", "
<< "min_length=" << min_length << ")";
<< "min_length=" << min_length << ", "
<< "classical=" << (classical ? "T" : "F") << ")";
}
else if (is_valid())
out << "UNKNOWN";
@ -1743,13 +1744,13 @@ std::string seq_util::rex::info::str() const {
seq_util::rex::info seq_util::rex::info::star() const {
//if is_known() is false then all mentioned properties will remain false
return seq_util::rex::info(interpreted, l_true, 0);
return seq_util::rex::info(interpreted, l_true, 0, classical);
}
seq_util::rex::info seq_util::rex::info::plus() const {
if (is_known()) {
//plus never occurs in a normalized regex
return info(interpreted, nullable, min_length);
return info(interpreted, nullable, min_length, classical);
}
else
return *this;
@ -1758,14 +1759,14 @@ seq_util::rex::info seq_util::rex::info::plus() const {
seq_util::rex::info seq_util::rex::info::opt() const {
// if is_known() is false then all mentioned properties will remain false
// optional construct never occurs in a normalized regex
return seq_util::rex::info(interpreted, l_true, 0);
return seq_util::rex::info(interpreted, l_true, 0, classical);
}
seq_util::rex::info seq_util::rex::info::complement() const {
if (is_known()) {
lbool compl_nullable = (nullable == l_true ? l_false : (nullable == l_false ? l_true : l_undef));
unsigned compl_min_length = (compl_nullable == l_false ? 1 : 0);
return info(interpreted, compl_nullable, compl_min_length);
return info(interpreted, compl_nullable, compl_min_length, false);
}
else
return *this;
@ -1779,7 +1780,8 @@ seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs,
m = UINT_MAX;
return info(interpreted && rhs.interpreted,
((nullable == l_false || rhs.nullable == l_false) ? l_false : ((nullable == l_true && rhs.nullable == l_true) ? l_true : l_undef)),
m);
m,
classical && rhs.classical);
}
else
return rhs;
@ -1793,7 +1795,8 @@ seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) co
//works correctly if one of the arguments is unknown
return info(interpreted && rhs.interpreted,
((nullable == l_true || rhs.nullable == l_true) ? l_true : ((nullable == l_false && rhs.nullable == l_false) ? l_false : l_undef)),
std::min(min_length, rhs.min_length));
std::min(min_length, rhs.min_length),
classical && rhs.classical);
}
else
return rhs;
@ -1804,7 +1807,8 @@ seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) co
if (rhs.is_known()) {
return info(interpreted && rhs.interpreted,
((nullable == l_true && rhs.nullable == l_true) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
std::max(min_length, rhs.min_length));
std::max(min_length, rhs.min_length),
false);
}
else
return rhs;
@ -1818,7 +1822,8 @@ seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) co
if (rhs.is_known()) {
return info(interpreted & rhs.interpreted,
((nullable == l_true && rhs.nullable == l_false) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
std::max(min_length, rhs.min_length));
std::max(min_length, rhs.min_length),
false);
}
else
return rhs;
@ -1835,7 +1840,8 @@ seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) co
// TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
return info(false,
((nullable == l_true && i.nullable == l_true) ? l_true : ((nullable == l_false && i.nullable == l_false) ? l_false : l_undef)),
std::min(min_length, i.min_length));
std::min(min_length, i.min_length),
classical && i.classical);
}
else
return i;
@ -1851,7 +1857,7 @@ seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) co
if (m > 0 && (m < min_length || m < lower))
m = UINT_MAX;
lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
return info(interpreted, loop_nullable, m);
return info(interpreted, loop_nullable, m, classical);
}
else
return *this;
@ -1866,6 +1872,7 @@ seq_util::rex::info& seq_util::rex::info::operator=(info const& other) {
interpreted = other.interpreted;
nullable = other.nullable;
min_length = other.min_length;
classical = other.classical;
return *this;
}

View file

@ -447,6 +447,8 @@ public:
lbool nullable { l_undef };
/* Lower bound on the length of all accepted words. */
unsigned min_length { 0 };
/* Classical regular expression: does not use complement, intersection, diff, or the empty language (fail). */
bool classical { true };
/*
Default constructor of invalid info.
@ -463,11 +465,13 @@ public:
*/
info(bool is_interpreted,
lbool is_nullable,
unsigned min_l) :
unsigned min_l,
bool is_classical) :
known(l_true),
interpreted(is_interpreted),
nullable(is_nullable),
min_length(min_l) {}
min_length(min_l),
classical(is_classical) {}
/*
Appends a string representation of the info into the stream.

View file

@ -172,9 +172,7 @@ namespace sls {
return false;
if (r > sx.length() && update(x, sx + zstring(random_char())))
return false;
// This case seems to imply unsat
verbose_stream() << "The input might be unsat\n"; // example to trigger: (assert (and (>= (str.len X) 2) (= (str.substr X 0 1) "")))
VERIFY(false);
// Both updates failed. Treat as unsatisfied and let outer search continue.
return false;
}
@ -198,8 +196,16 @@ namespace sls {
return false;
}
if (seq.str.is_last_index(e, x, y) && seq.is_string(x->get_sort())) {
// TODO
NOT_IMPLEMENTED_YET();
auto sx = strval0(x);
auto sy = strval0(y);
rational val_e;
if (!a.is_numeral(ctx.get_value(e), val_e))
return false;
rational actual(sx.last_indexof(sy));
if (val_e == actual)
continue;
update(e, actual);
return false;
}
if (seq.str.is_stoi(e, x) && seq.is_string(x->get_sort())) {
auto sx = strval0(x);
@ -753,7 +759,7 @@ namespace sls {
for (unsigned j = 1; j <= val_other.length() - i; ++j) {
zstring sub = val_other.extract(i, j);
if (set.contains(sub))
break;
continue;
set.insert(sub);
}
}
@ -906,7 +912,7 @@ namespace sls {
m_string_updates.reset();
u[i][j] = d[i - 1][j];
}
if (d[i][j - 1] < u[i][j] && b.can_add(i - 1)) {
if (d[i][j - 1] < u[i][j] && b.can_add(j - 1)) {
m_string_updates.reset();
u[i][j] = d[i][j - 1];
}