diff --git a/gmon.out b/gmon.out new file mode 100644 index 000000000..cba10a19b Binary files /dev/null and b/gmon.out differ diff --git a/src/ast/rewriter/CMakeLists.txt b/src/ast/rewriter/CMakeLists.txt index 9d529f9b5..ba7eda277 100644 --- a/src/ast/rewriter/CMakeLists.txt +++ b/src/ast/rewriter/CMakeLists.txt @@ -39,6 +39,7 @@ z3_add_component(rewriter rewriter.cpp seq_axioms.cpp seq_eq_solver.cpp + seq_subset.cpp seq_rewriter.cpp seq_skolem.cpp th_rewriter.cpp diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 4453c94a7..26455e2f1 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -4587,51 +4587,7 @@ bool seq_rewriter::are_complements(expr* r1, expr* r2) const { * basic subset checker. */ bool seq_rewriter::is_subset(expr* r1, expr* r2) const { - // return false; - expr* ra1 = nullptr, *ra2 = nullptr, *ra3 = nullptr; - expr* rb1 = nullptr, *rb2 = nullptr, *rb3 = nullptr; - unsigned la, ua, lb, ub; - if (re().is_complement(r1, ra1) && - re().is_complement(r2, rb1)) { - return is_subset(rb1, ra1); - } - auto is_concat = [&](expr* r, expr*& a, expr*& b, expr*& c) { - return re().is_concat(r, a, b) && re().is_concat(b, b, c); - }; - while (true) { - if (r1 == r2) - return true; - if (re().is_full_seq(r2)) - return true; - if (re().is_dot_plus(r2) && re().get_info(r1).nullable == l_false) - return true; - if (is_concat(r1, ra1, ra2, ra3) && - is_concat(r2, rb1, rb2, rb3) && ra1 == rb1 && ra2 == rb2) { - r1 = ra3; - r2 = rb3; - continue; - } - if (re().is_concat(r1, ra1, ra2) && - re().is_concat(r2, rb1, rb2) && re().is_full_seq(rb1)) { - r1 = ra2; - continue; - } - // r1=ra3{la,ua}ra2, r2=rb3{lb,ub}rb2, ra3=rb3, lb<=la, ua<=ub - if (re().is_concat(r1, ra1, ra2) && re().is_loop(ra1, ra3, la, ua) && - re().is_concat(r2, rb1, rb2) && re().is_loop(rb1, rb3, lb, ub) && - ra3 == rb3 && lb <= la && ua <= ub) { - r1 = ra2; - r2 = rb2; - continue; - } - // ra1=ra3{la,ua}, r2=rb3{lb,ub}, ra3=rb3, lb<=la, ua<=ub - if (re().is_loop(r1, ra3, la, ua) && - re().is_loop(r2, rb3, lb, ub) && - ra3 == rb3 && lb <= la && ua <= ub) { - return true; - } - return false; - } + return m_subset.is_subset(r1, r2); } br_status seq_rewriter::mk_re_union0(expr* a, expr* b, expr_ref& result) { @@ -6163,4 +6119,3 @@ bool seq_rewriter::get_bounds(expr* e, unsigned& low, unsigned& high) { } return low <= high; } - diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 583720911..4ebd770a7 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -23,6 +23,7 @@ Notes: #include "ast/arith_decl_plugin.h" #include "ast/rewriter/rewriter_types.h" #include "ast/rewriter/bool_rewriter.h" +#include "ast/rewriter/seq_subset.h" #include "util/params.h" #include "util/lbool.h" #include "util/sign.h" @@ -128,6 +129,7 @@ class seq_rewriter { }; seq_util m_util; + seq_subset m_subset; arith_util m_autil; bool_rewriter m_br; // re2automaton m_re2aut; @@ -340,7 +342,7 @@ class seq_rewriter { public: seq_rewriter(ast_manager & m, params_ref const & p = params_ref()): - m_util(m), m_autil(m), m_br(m, p), // m_re2aut(m), + m_util(m), m_subset(m_util.re), m_autil(m), m_br(m, p), // m_re2aut(m), m_op_cache(m), m_es(m), m_lhs(m), m_rhs(m), m_coalesce_chars(true) { } @@ -424,4 +426,3 @@ public: */ lbool some_string_in_re(expr* r, zstring& s); }; - diff --git a/src/ast/rewriter/seq_subset.cpp b/src/ast/rewriter/seq_subset.cpp new file mode 100644 index 000000000..2fc4d1f71 --- /dev/null +++ b/src/ast/rewriter/seq_subset.cpp @@ -0,0 +1,146 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_subset.cpp + +Abstract: + + Heuristic regular-expression subset checks used by seq_rewriter. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-6-8 + +--*/ + +#include "ast/rewriter/seq_subset.h" + +bool seq_subset::is_subset_rec(expr* a, expr* b, unsigned depth) const { + while (true) { + + if (a == b) + return true; + if (m_re.is_empty(a)) + return true; + if (m_re.is_full_seq(b)) + return true; + if (m_re.is_epsilon(a) && m_re.get_info(b).nullable == l_true) + return true; + + if (depth >= m_max_depth) + return false; + + expr* a1 = nullptr, * a2 = nullptr, * b1 = nullptr, * b2 = nullptr; + unsigned la, ua, lb, ub; + + // a ⊆ .+ iff a is non-nullable + if (m_re.is_dot_plus(b) && m_re.get_info(a).nullable == l_false) + return true; + + // a ⊆ a* + if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth)) + return true; + + // e ⊆ a* + if (m_re.is_epsilon(a) && m_re.is_star(b, b1)) + return true; + + // R ⊆ R* + if (m_re.is_star(b, b1) && is_subset_rec(a, b1, depth + 1)) + return true; + + // R1* ⊆ R2* if R1 ⊆ R2 + if (m_re.is_star(a, a1) && m_re.is_star(b, b1) && is_subset_rec(a1, b1, depth + 1)) + return true; + + // R1+ ⊆ R2+ if R1 ⊆ R2 + if (m_re.is_plus(a, a1) && m_re.is_plus(b, b1) && is_subset_rec(a1, b1, depth)) + return true; + + // R ⊆ R+ + if (m_re.is_plus(b, b1) && is_subset_rec(a, b1, depth)) + return true; + + // R+ ⊆ R* + if (m_re.is_plus(a, a1) && m_re.is_star(b, b1) && is_subset_rec(a1, b1, depth + 1)) + return true; + + // range containment + if (m_re.is_range(a, la, ua) && m_re.is_range(b, lb, ub) && lb <= la && ua <= ub) + return true; + + // to_re(s) ⊆ range + if (m_re.is_to_re(a, a1) && m_re.is_range(b, lb, ub) && is_app(a1)) { + func_decl* f = to_app(a1)->get_decl(); + if (f->get_decl_kind() == OP_STRING_CONST && f->get_num_parameters() == 1) { + zstring const& s = f->get_parameter(0).get_zstring(); + if (s.length() == 1 && lb <= s[0] && s[0] <= ub) + return true; + } + } + + // a ⊆ b1 ∪ b2 if a ⊆ b1 or a ⊆ b2 + if (m_re.is_union(b, b1, b2) && (is_subset_rec(a, b1, depth + 1) || is_subset_rec(a, b2, depth + 1))) + return true; + + // a1 ∪ a2 ⊆ b if a1 ⊆ b and a2 ⊆ b + if (m_re.is_union(a, a1, a2) && is_subset_rec(a1, b, depth + 1) && is_subset_rec(a2, b, depth + 1)) + return true; + + // a1 ∩ a2 ⊆ b if a1 ⊆ b or a2 ⊆ b + if (m_re.is_intersection(a, a1, a2) && (is_subset_rec(a1, b, depth + 1) || is_subset_rec(a2, b, depth + 1))) + return true; + + // a ⊆ b1 ∩ b2 if a ⊆ b1 and a ⊆ b2 + if (m_re.is_intersection(b, b1, b2) && is_subset_rec(a, b1, depth + 1) && is_subset_rec(a, b2, depth + 1)) + return true; + + // R{la,ua} ⊆ R'{lb,ub} if R ⊆ R', lb<=la, ua<=ub + if (m_re.is_loop(a, a1, la, ua) && + m_re.is_loop(b, b1, lb, ub) && + lb <= la && ua <= ub && is_subset_rec(a1, b1, depth + 1)) { + return true; + } + + // a1 \ a2 ⊆ b if a1 ⊆ b + if (m_re.is_diff(a, a1, a2) && is_subset_rec(a1, b, depth + 1)) + return true; + + // R ⊆ Σ*·R' if R ⊆ R' + if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b1) && is_subset_rec(a, b2, depth)) + return true; + + // R ⊆ R'·Σ* if R ⊆ R' + if (m_re.is_concat(b, b1, b2) && m_re.is_full_seq(b2) && is_subset_rec(a, b1, depth)) + return true; + + // star absorption: R·R* ⊆ R*, R*·R ⊆ R* + bool const is_concat_star = m_re.is_concat(a, a1, a2) && m_re.is_star(b, b1); + if (is_concat_star && + is_subset_rec(a1, b1, depth + 1) && is_subset_rec(a2, b, depth + 1)) + return true; + if (is_concat_star && + is_subset_rec(a2, b1, depth + 1) && is_subset_rec(a1, b, depth + 1)) + return true; + + // concat monotonicity: + // tail-recursive on second arguments (without increasing depth bound). + if (m_re.is_concat(a, a1, a2) && m_re.is_concat(b, b1, b2) && is_subset_rec(a1, b1, depth + 1)) { + a = a2; + b = b2; + continue; + } + + // complement: ~a ⊆ ~b if b ⊆ a + if (m_re.is_complement(a, a1) && m_re.is_complement(b, b1)) + return is_subset_rec(b1, a1, depth + 1); + + return false; + } +} + +bool seq_subset::is_subset(expr* a, expr* b) const { + return is_subset_rec(a, b, 0); +} diff --git a/src/ast/rewriter/seq_subset.h b/src/ast/rewriter/seq_subset.h new file mode 100644 index 000000000..7329c898e --- /dev/null +++ b/src/ast/rewriter/seq_subset.h @@ -0,0 +1,30 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_subset.h + +Abstract: + + Heuristic regular-expression subset checks used by seq_rewriter. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-6-8 + +--*/ +#pragma once + +#include "ast/seq_decl_plugin.h" + +class seq_subset { + seq_util::rex& m_re; + static constexpr unsigned m_max_depth = 3; + + bool is_subset_rec(expr* a, expr* b, unsigned depth) const; + +public: + explicit seq_subset(seq_util::rex& re) : m_re(re) {} + bool is_subset(expr* a, expr* b) const; +};