diff --git a/src/ast/rewriter/seq_derive.cpp b/src/ast/rewriter/seq_derive.cpp index ea374d6c3..ecc3df701 100644 --- a/src/ast/rewriter/seq_derive.cpp +++ b/src/ast/rewriter/seq_derive.cpp @@ -252,7 +252,7 @@ namespace seq { // δ(reverse(r1)) - normalize by pushing reverse inward, then derive if (re().is_reverse(r, r1)) { - expr_ref norm = normalize_reverse(r1); + expr_ref norm = mk_regex_reverse(r1); if (norm != r) return derive_rec(norm); return expr_ref(re().mk_derivative(m_ele, r), m); @@ -423,96 +423,54 @@ namespace seq { } // ------------------------------------------------------- - // Normalize reverse by pushing it inward + // Normalize reverse // ------------------------------------------------------- - expr_ref derive::normalize_reverse(expr* r) { - expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * p = nullptr; + expr_ref derive::mk_regex_reverse(expr* r) { + expr* r1 = nullptr, * r2 = nullptr, * c = nullptr; unsigned lo = 0, hi = 0; - zstring zs; - - // reverse(reverse(r1)) = r1 - if (re().is_reverse(r, r1)) - return expr_ref(r1, m); - - // reverse(r1 · r2) = reverse(r2) · reverse(r1) - if (re().is_concat(r, r1, r2)) { - expr_ref a(re().mk_reverse(r2), m); - expr_ref b(re().mk_reverse(r1), m); - return expr_ref(re().mk_concat(a, b), m); + expr_ref result(m); + if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) || + re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r)) + result = r; + else if (re().is_to_re(r)) + result = re().mk_reverse(r); + else if (re().is_reverse(r, r1)) + result = r1; + else if (re().is_concat(r, r1, r2)) + result = mk_concat(mk_regex_reverse(r2), mk_regex_reverse(r1)); + else if (m.is_ite(r, c, r1, r2)) + result = m.mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2)); + else if (re().is_union(r, r1, r2)) { + auto a1 = mk_regex_reverse(r1); + auto b1 = mk_regex_reverse(r2); + result = re().mk_union(a1, b1); } - - // reverse(r1 ∪ r2) = reverse(r1) ∪ reverse(r2) - if (re().is_union(r, r1, r2)) { - expr_ref a(re().mk_reverse(r1), m); - expr_ref b(re().mk_reverse(r2), m); - return expr_ref(re().mk_union(a, b), m); + else if (re().is_intersection(r, r1, r2)) { + auto a1 = mk_regex_reverse(r1); + auto b1 = mk_regex_reverse(r2); + result = re().mk_inter(a1, b1); } - - // reverse(r1 ∩ r2) = reverse(r1) ∩ reverse(r2) - if (re().is_intersection(r, r1, r2)) { - expr_ref a(re().mk_reverse(r1), m); - expr_ref b(re().mk_reverse(r2), m); - return m_re.mk_inter(a, b); + else if (re().is_diff(r, r1, r2)) { + auto a1 = mk_regex_reverse(r1); + auto b1 = mk_regex_reverse(r2); + result = re().mk_diff(a1, b1); } - - // reverse(r1 \ r2) = reverse(r1) \ reverse(r2) - if (re().is_diff(r, r1, r2)) { - expr_ref a(re().mk_reverse(r1), m); - expr_ref b(re().mk_reverse(r2), m); - return expr_ref(re().mk_diff(a, b), m); - } - - // reverse(ite(c, r1, r2)) = ite(c, reverse(r1), reverse(r2)) - if (m.is_ite(r, p, r1, r2)) - return expr_ref(m.mk_ite(p, re().mk_reverse(r1), re().mk_reverse(r2)), m); - - // reverse(r1?) = reverse(r1)? - if (re().is_opt(r, r1)) - return expr_ref(re().mk_opt(re().mk_reverse(r1)), m); - - // reverse(~r1) = ~reverse(r1) - if (re().is_complement(r, r1)) - return expr_ref(re().mk_complement(re().mk_reverse(r1)), m); - - // reverse(r1*) = reverse(r1)* - if (re().is_star(r, r1)) - return expr_ref(re().mk_star(re().mk_reverse(r1)), m); - - // reverse(r1+) = reverse(r1)+ - if (re().is_plus(r, r1)) - return expr_ref(re().mk_plus(re().mk_reverse(r1)), m); - - // reverse(r1{lo,}) = reverse(r1){lo,} - if (re().is_loop(r, r1, lo)) - return expr_ref(re().mk_loop(re().mk_reverse(r1), lo), m); - - // reverse(r1{lo,hi}) = reverse(r1){lo,hi} - if (re().is_loop(r, r1, lo, hi)) - return expr_ref(re().mk_loop_proper(re().mk_reverse(r1), lo, hi), m); - - // Symmetric: full_seq, empty, range, full_char, of_pred - if (re().is_full_seq(r) || re().is_empty(r) || re().is_range(r) || - re().is_full_char(r) || re().is_of_pred(r)) - return expr_ref(r, m); - - // reverse(to_re(s)) where s is a string literal - if (re().is_to_re(r, s) && u().str.is_string(s, zs)) - return expr_ref(re().mk_to_re(u().str.mk_string(zs.reverse())), m); - - // reverse(to_re(unit)) = to_re(unit) - if (re().is_to_re(r, s) && u().str.is_unit(s)) - return expr_ref(r, m); - - // reverse(to_re(s1 ++ s2)) = reverse(to_re(s2)) · reverse(to_re(s1)) - if (re().is_to_re(r, s) && u().str.is_concat(s, r1, r2)) { - expr_ref a(re().mk_reverse(re().mk_to_re(r2)), m); - expr_ref b(re().mk_reverse(re().mk_to_re(r1)), m); - return expr_ref(re().mk_concat(a, b), m); - } - - // Stuck — cannot normalize further - return expr_ref(re().mk_reverse(r), m); + else if (re().is_star(r, r1)) + result = re().mk_star(mk_regex_reverse(r1)); + else if (re().is_plus(r, r1)) + result = re().mk_plus(mk_regex_reverse(r1)); + else if (re().is_loop(r, r1, lo)) + result = re().mk_loop(mk_regex_reverse(r1), lo); + else if (re().is_loop(r, r1, lo, hi)) + result = re().mk_loop_proper(mk_regex_reverse(r1), lo, hi); + else if (re().is_opt(r, r1)) + result = re().mk_opt(mk_regex_reverse(r1)); + else if (re().is_complement(r, r1)) + result = re().mk_complement(mk_regex_reverse(r1)); + else + result = re().mk_reverse(r); + return result; } // ------------------------------------------------------- @@ -520,16 +478,92 @@ namespace seq { // ------------------------------------------------------- expr_ref derive::is_nullable(expr* r) { - // First, try the static info which handles ground/interpreted regex - lbool nb = re().get_info(r).nullable; - if (nb == l_true) - return expr_ref(m.mk_true(), m); - if (nb == l_false) - return expr_ref(m.mk_false(), m); - // For symbolic regexes, return a membership predicate - sort* s = nullptr; - VERIFY(m_util.is_re(r, s)); - return expr_ref(re().mk_in_re(u().str.mk_empty(s), r), m); + SASSERT(m_util.is_re(r) || m_util.is_seq(r)); + expr* r1 = nullptr, * r2 = nullptr, * cond = nullptr; + sort* seq_sort = nullptr; + unsigned lo = 0, hi = 0; + zstring s1; + expr_ref result(m); + if (re().is_concat(r, r1, r2) || + re().is_intersection(r, r1, r2)) { + m_br.mk_and(is_nullable(r1), is_nullable(r2), result); + } + else if (re().is_union(r, r1, r2) || re().is_antimirov_union(r, r1, r2)) { + m_br.mk_or(is_nullable(r1), is_nullable(r2), result); + } + else if (re().is_diff(r, r1, r2)) { + m_br.mk_not(is_nullable(r2), result); + m_br.mk_and(result, is_nullable(r1), result); + } + else if (re().is_star(r) || + re().is_opt(r) || + re().is_full_seq(r) || + re().is_epsilon(r) || + (re().is_loop(r, r1, lo) && lo == 0) || + (re().is_loop(r, r1, lo, hi) && lo == 0)) { + result = m.mk_true(); + } + else if (re().is_full_char(r) || + re().is_empty(r) || + re().is_of_pred(r) || + re().is_range(r)) { + result = m.mk_false(); + } + else if (re().is_plus(r, r1) || + (re().is_loop(r, r1, lo) && lo > 0) || + (re().is_loop(r, r1, lo, hi) && lo > 0) || + (re().is_reverse(r, r1))) { + result = is_nullable(r1); + } + else if (re().is_complement(r, r1)) { + m_br.mk_not(is_nullable(r1), result); + } + else if (re().is_to_re(r, r1)) { + result = is_nullable(r1); + } + else if (m.is_ite(r, cond, r1, r2)) { + m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result); + } + else if (m_util.is_re(r, seq_sort)) { + result = is_nullable_symbolic_regex(r, seq_sort); + } + else if (u().str.is_concat(r, r1, r2)) { + m_br.mk_and(is_nullable(r1), is_nullable(r2), result); + } + else if (u().str.is_empty(r)) { + result = m.mk_true(); + } + else if (u().str.is_unit(r)) { + result = m.mk_false(); + } + else if (u().str.is_string(r, s1)) { + result = m.mk_bool_val(s1.length() == 0); + } + else { + SASSERT(m_util.is_seq(r)); + result = m.mk_eq(u().str.mk_empty(r->get_sort()), r); + } + return result; + } + + expr_ref derive::is_nullable_symbolic_regex(expr* r, sort* seq_sort) { + SASSERT(m_util.is_re(r)); + expr* elem = nullptr, * r1 = r, * r2 = nullptr, * s = nullptr; + expr_ref elems(u().str.mk_empty(seq_sort), m); + expr_ref result(m); + while (re().is_derivative(r1, elem, r2)) { + if (u().str.is_empty(elems)) + elems = u().str.mk_unit(elem); + else + elems = u().str.mk_concat(u().str.mk_unit(elem), elems); + r1 = r2; + } + if (re().is_to_re(r1, s)) { + result = m.mk_eq(elems, s); + return result; + } + result = re().mk_in_re(u().str.mk_empty(seq_sort), r); + return result; } // ------------------------------------------------------- @@ -695,10 +729,19 @@ namespace seq { expr_ref derive::mk_concat(expr* a, expr* b) { + sort* seq_s = nullptr, * ele_s = nullptr; + VERIFY(m_util.is_re(a, seq_s)); + VERIFY(u().is_seq(seq_s, ele_s)); if (re().is_empty(a)) return expr_ref(a, m); if (re().is_empty(b)) return expr_ref(b, m); if (re().is_epsilon(a)) return expr_ref(b, m); if (re().is_epsilon(b)) return expr_ref(a, m); + if (re().is_full_seq(a) && re().is_full_seq(b)) + return expr_ref(a, m); + if (re().is_full_char(a) && re().is_full_seq(b)) + return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m); + if (re().is_full_seq(a) && re().is_full_char(b)) + return expr_ref(re().mk_plus(re().mk_full_char(ele_s)), m); // to_re(s1) · to_re(s2) → to_re(s1 ++ s2) expr* s1 = nullptr, * s2 = nullptr; diff --git a/src/ast/rewriter/seq_derive.h b/src/ast/rewriter/seq_derive.h index 0c981d52f..69f38e72f 100644 --- a/src/ast/rewriter/seq_derive.h +++ b/src/ast/rewriter/seq_derive.h @@ -125,6 +125,7 @@ namespace seq { // Nullable check: returns a Boolean expression expr_ref is_nullable(expr* r); + expr_ref is_nullable_symbolic_regex(expr* r, sort* seq_sort); // Smart constructors with path-aware simplification and ACI canonicalization expr_ref mk_union(expr* a, expr* b); @@ -150,8 +151,8 @@ namespace seq { bool pred_implies(bool sign_a, expr* a, bool sign_b, expr* b); bool pred_implies(expr* a, expr* b); - // Normalize reverse(r) by pushing reverse inward - expr_ref normalize_reverse(expr* r); + // Normalize reverse(r) + expr_ref mk_regex_reverse(expr* r); // Condition evaluation helpers lbool eval_cond(expr* cond); @@ -184,4 +185,3 @@ namespace seq { }; } - diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 08c7887ee..fa200f849 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3080,98 +3080,6 @@ expr_ref seq_rewriter::merge_regex_sets(expr* r1, expr* r2, expr* unit, } } -expr_ref seq_rewriter::mk_regex_reverse(expr* r) { - expr* r1 = nullptr, * r2 = nullptr, * c = nullptr; - unsigned lo = 0, hi = 0; - expr_ref result(m()); - if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) || - re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r)) - result = r; - else if (re().is_to_re(r)) - result = re().mk_reverse(r); - else if (re().is_reverse(r, r1)) - result = r1; - else if (re().is_concat(r, r1, r2)) - result = mk_regex_concat(mk_regex_reverse(r2), mk_regex_reverse(r1)); - else if (m().is_ite(r, c, r1, r2)) - result = m().mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2)); - else if (re().is_union(r, r1, r2)) { - // enforce deterministic evaluation order - auto a1 = mk_regex_reverse(r1); - auto b1 = mk_regex_reverse(r2); - result = re().mk_union(a1, b1); - } - else if (re().is_intersection(r, r1, r2)) { - auto a1 = mk_regex_reverse(r1); - auto b1 = mk_regex_reverse(r2); - result = re().mk_inter(a1, b1); - } - else if (re().is_diff(r, r1, r2)) { - auto a1 = mk_regex_reverse(r1); - auto b1 = mk_regex_reverse(r2); - result = re().mk_diff(a1, b1); - } - else if (re().is_star(r, r1)) - result = re().mk_star(mk_regex_reverse(r1)); - else if (re().is_plus(r, r1)) - result = re().mk_plus(mk_regex_reverse(r1)); - else if (re().is_loop(r, r1, lo)) - result = re().mk_loop(mk_regex_reverse(r1), lo); - else if (re().is_loop(r, r1, lo, hi)) - result = re().mk_loop_proper(mk_regex_reverse(r1), lo, hi); - else if (re().is_opt(r, r1)) - result = re().mk_opt(mk_regex_reverse(r1)); - else if (re().is_complement(r, r1)) - result = re().mk_complement(mk_regex_reverse(r1)); - else - //stuck cases: such as r being a regex variable - //observe that re().mk_reverse(to_re(s)) is not a stuck case - result = re().mk_reverse(r); - return result; -} - -expr_ref seq_rewriter::mk_regex_concat(expr* r, expr* s) { - sort* seq_sort = nullptr, * ele_sort = nullptr; - VERIFY(m_util.is_re(r, seq_sort)); - VERIFY(u().is_seq(seq_sort, ele_sort)); - SASSERT(r->get_sort() == s->get_sort()); - expr_ref result(m()); - expr* r1, * r2; - if (re().is_epsilon(r) || re().is_empty(s)) - result = s; - else if (re().is_epsilon(s) || re().is_empty(r)) - result = r; - else if (re().is_full_seq(r) && re().is_full_seq(s)) - result = r; - else if (re().is_full_char(r) && re().is_full_seq(s)) - // ..* = .+ - result = re().mk_plus(re().mk_full_char(ele_sort)); - else if (re().is_full_seq(r) && re().is_full_char(s)) - // .*. = .+ - result = re().mk_plus(re().mk_full_char(ele_sort)); - else if (re().is_concat(r, r1, r2)) - // create the resulting concatenation in right-associative form except for the following case - // TODO: maintain the following invariant for A ++ B{m,n} + C - // concat(concat(A, B{m,n}), C) (if A != () and C != ()) - // concat(B{m,n}, C) (if A == () and C != ()) - // where A, B, C are regexes - // Using & below for Intersection and | for Union - // In other words, do not make A ++ B{m,n} into right-assoc form, but keep B{m,n} at the top - // This will help to identify this situation in the merge routine: - // concat(concat(A, B{0,m}), C) | concat(concat(A, B{0,n}), C) - // simplifies to - // concat(concat(A, B{0,max(m,n)}), C) - // analogously: - // concat(concat(A, B{0,m}), C) & concat(concat(A, B{0,n}), C) - // simplifies to - // concat(concat(A, B{0,min(m,n)}), C) - result = mk_regex_concat(r1, mk_regex_concat(r2, s)); - else { - result = re().mk_concat(r, s); - } - return result; -} - /* * calls elim_condition */ diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 30134d1cd..8b4bfd5b7 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -195,9 +195,6 @@ class seq_rewriter { bool check_deriv_normal_form(expr* r, int level = 3); #endif - expr_ref mk_regex_reverse(expr* r); - expr_ref mk_regex_concat(expr* r1, expr* r2); - expr_ref merge_regex_sets(expr* r1, expr* r2, expr* unit, std::function& decompose, std::function& compose); // elem is (:var 0) and path a condition that may have (:var 0) as a free variable diff --git a/src/model/datatype_factory.cpp b/src/model/datatype_factory.cpp index fac858ccc..b93703acd 100644 --- a/src/model/datatype_factory.cpp +++ b/src/model/datatype_factory.cpp @@ -139,9 +139,6 @@ expr * datatype_factory::get_almost_fresh_value(sort * s) { expr * datatype_factory::get_fresh_value(sort * s) { if (!m_util.is_datatype(s)) return m_model.get_fresh_value(s); - if (m_fresh_depth >= m_max_fresh_depth) - return get_last_fresh_value(s); - struct depth_guard { unsigned& d; depth_guard(unsigned& d) : d(d) { ++d; } ~depth_guard() { --d; } } _dg(m_fresh_depth); TRACE(datatype, tout << "generating fresh value for: " << s->get_name() << "\n";); auto& [set, values] = get_value_set(s); // Approach 0) diff --git a/src/model/datatype_factory.h b/src/model/datatype_factory.h index 2d8f216b4..b2a6b75d3 100644 --- a/src/model/datatype_factory.h +++ b/src/model/datatype_factory.h @@ -24,8 +24,6 @@ Revision History: class datatype_factory : public struct_factory { datatype_util m_util; obj_map m_last_fresh_value; - unsigned m_fresh_depth = 0; - static const unsigned m_max_fresh_depth = 512; expr * get_last_fresh_value(sort * s); expr * get_almost_fresh_value(sort * s); diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index a3c56159d..64487a21e 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -22,7 +22,6 @@ Author: #include "ast/ast_util.h" #include "ast/for_each_expr.h" #include -#include namespace smt { @@ -224,40 +223,6 @@ namespace smt { th.add_axiom(~lit); return true; } - // Second pass: deeper exploration for intersection/complement/diff regexes - // These are candidates for dead state detection (the result may be empty) - // For these, do unlimited depth exploration with a time budget - unsigned r_id = get_state_id(r); - expr* r1 = nullptr, *r2 = nullptr; - if (!m_state_graph.is_dead(r_id) && !m_state_graph.is_live(r_id) && - (re().is_intersection(r, r1, r2) || re().is_complement(r, r1) || re().is_diff(r, r1, r2))) { - // Collect all unexplored states and explore them iteratively - // with a time budget - auto pass2_start = std::chrono::steady_clock::now(); - bool changed = true; - while (changed && !m_state_graph.is_dead(r_id)) { - auto elapsed = std::chrono::duration_cast( - std::chrono::steady_clock::now() - pass2_start).count(); - if (elapsed > 100) break; - changed = false; - for (unsigned i = 0; i < m_state_to_expr.size() && !m_state_graph.is_dead(r_id); ++i) { - unsigned st_id = i + 1; - if (m_state_graph.is_done(st_id) || m_state_graph.is_live(st_id) || m_state_graph.is_dead(st_id)) - continue; - // This is an unexplored state — explore it - expr* st = m_state_to_expr.get(i); - if (re().get_info(st).nullable == l_true) - continue; - if (update_state_graph(st, 1)) - changed = true; - } - } - if (m_state_graph.is_dead(r_id)) { - STRACE(seq_regex_brief, tout << "(dead2) ";); - th.add_axiom(~lit); - return true; - } - } } return false; } @@ -851,7 +816,7 @@ namespace smt { /* Update the state graph with expression r and all its derivatives. */ - bool seq_regex::update_state_graph(expr* r, unsigned depth) { + bool seq_regex::update_state_graph(expr* r) { unsigned r_id = get_state_id(r); if (m_state_graph.is_done(r_id)) return false; if (m_state_graph.get_size() >= m_max_state_graph_size) { @@ -894,38 +859,6 @@ namespace smt { m_state_graph.add_edge(r_id, dr_id, maybecycle); } m_state_graph.mark_done(r_id); - // Explore direct targets for dead state detection (depth 1 only) - // This compensates for less-canonical derivative representations - if (depth < 1) { - for (auto const& dr: derivatives) { - unsigned dr_id = get_state_id(dr); - if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id)) - continue; - if (re().get_info(dr).nullable == l_true) - continue; - update_state_graph(dr, depth + 1); - } - } - else if (depth == 1) { - // At depth 1, do lightweight exploration: compute derivatives - // of this state's targets but only to check if they're all dead. - // Don't add complex states to the graph — just mark them dead if - // their get_info says min_length == UINT_MAX or is_empty. - for (auto const& dr: derivatives) { - unsigned dr_id = get_state_id(dr); - if (m_state_graph.is_done(dr_id) || m_state_graph.is_live(dr_id)) - continue; - auto dr_info = re().get_info(dr); - if (dr_info.nullable == l_true) { - m_state_graph.add_state(dr_id); - m_state_graph.mark_live(dr_id); - } - else if (re().is_empty(dr) || dr_info.min_length == UINT_MAX) { - m_state_graph.add_state(dr_id); - m_state_graph.mark_done(dr_id); - } - } - } } STRACE(seq_regex, m_state_graph.display(tout);); diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index af03b3c50..5c3fddd25 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -124,7 +124,7 @@ namespace smt { // Note: Doesn't need to be sound or complete (doesn't affect soundness) bool can_be_in_cycle(expr* r1, expr* r2); // Update the graph - bool update_state_graph(expr* r, unsigned depth = 0); + bool update_state_graph(expr* r); // Printing expressions for seq_regex_brief std::string state_str(expr* e);