diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index ab20120df..7db7de370 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -46,6 +46,11 @@ namespace euf { } } + // Saturating unsigned addition: returns UINT_MAX instead of wrapping. + static unsigned saturating_add(unsigned a, unsigned b) { + return (b > UINT_MAX - a) ? UINT_MAX : a + b; + } + unsigned enode_concat_hash::operator()(enode* n) const { snode* sn = sg.find(n->get_expr()); if (sn && sn->has_cached_hash()) @@ -64,6 +69,15 @@ namespace euf { if (a == b) return true; if (!is_any_concat(a, seq) || !is_any_concat(b, seq)) return false; + // fast-path: snode length check (O(1), avoids leaf allocation) + snode* sa = sg.find(a->get_expr()); + snode* sb = sg.find(b->get_expr()); + if (sa && sb && sa->length() != sb->length()) + return false; + // fast-path: cached associativity hash (O(1)) + bool both_have_hash = sa && sa->has_cached_hash() && sb && sb->has_cached_hash(); + if (both_have_hash && sa->assoc_hash() != sb->assoc_hash()) + return false; enode_vector la, lb; collect_enode_leaves(a, seq, la); collect_enode_leaves(b, seq, lb); @@ -82,7 +96,7 @@ namespace euf { m_sg(sg ? *sg : *alloc(sgraph, g.get_manager(), g, false)), m_sg_owned(sg == nullptr), m_concat_hash(m_seq, m_sg), - m_concat_eq(m_seq), + m_concat_eq(m_seq, m_sg), m_concat_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_concat_hash, m_concat_eq) { } @@ -168,6 +182,35 @@ namespace euf { if (is_concat(n)) propagate_simplify(n); } + + // Re-apply identity and absorption rules over all tracked concat nodes. + // This handles the case where the merge caused a child to become equivalent + // to an identity (ε) or absorbing element (∅) that was not known at + // registration time (e.g. b ~ "" discovered after concat(x, b) was registered). + // Also re-simplifies RE concat nodes when a child's root has become full_seq, + // to handle nullable absorption through nested concats: + // concat(.*, concat(v, w)) = concat(.*, w) when v is nullable but w is not. + for (enode* n : m_concats) { + enode *na, *nb; + if (is_str_concat(n, na, nb)) { + if (is_str_empty(na)) + push_merge(n, nb); + else if (is_str_empty(nb)) + push_merge(n, na); + } + if (is_re_concat(n, na, nb)) { + if (is_re_epsilon(na)) + push_merge(n, nb); + else if (is_re_epsilon(nb)) + push_merge(n, na); + else if (is_re_empty(na)) + push_merge(n, na); // absorb: concat(∅, b) = ∅ + else if (is_re_empty(nb)) + push_merge(n, nb); // absorb: concat(a, ∅) = ∅ + else if (is_full_seq(na->get_root()) || is_full_seq(nb->get_root())) + propagate_simplify(n); + } + } } // @@ -202,6 +245,9 @@ namespace euf { // 2. Nullable absorption: concat(u, .*, v, w) = concat(u, .*, w) // when v is nullable and adjacent to full_seq (.*). // + // 3. Loop merging: concat(body{lo1,hi1}, body{lo2,hi2}) = body{lo1+lo2, hi1+hi2} + // when both children are loops over congruent bodies. + // void seq_plugin::propagate_simplify(enode* n) { enode* a, *b; if (!is_concat(n, a, b)) @@ -236,6 +282,20 @@ namespace euf { // concat(concat(u, v), .*) = concat(u, .*) when v nullable // handled by associativity + nullable absorption on sub-concats + + // Rule 3: Loop merging + // concat(v{l1,h1}, v{l2,h2}) = v{l1+l2,h1+h2} + unsigned lo1, hi1, lo2, hi2; + if (same_loop_body(a, b, lo1, hi1, lo2, hi2)) { + ast_manager& m = g.get_manager(); + enode* body_n = a->get_arg(0); + // saturating add: prevent silent unsigned wrap-around on large bounds + unsigned lo_merged = saturating_add(lo1, lo2); + unsigned hi_merged = saturating_add(hi1, hi2); + expr_ref merged(m_seq.re.mk_loop(body_n->get_expr(), lo_merged, hi_merged), m); + enode* merged_n = mk(merged, 1, &body_n); + push_merge(n, merged_n); + } } bool seq_plugin::is_nullable(expr* e) { diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index 1bbb58630..d03d34286 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -63,7 +63,8 @@ namespace euf { // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT). struct enode_concat_eq { seq_util const& seq; - enode_concat_eq(seq_util const& s) : seq(s) {} + sgraph& sg; + enode_concat_eq(seq_util const& s, sgraph& sg) : seq(s), sg(sg) {} bool operator()(enode* a, enode* b) const; }; @@ -120,11 +121,14 @@ namespace euf { bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); } // string empty: ε for str.++ - bool is_str_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); } + // Follows the union-find root so that merges are correctly reflected. + bool is_str_empty(enode* n) const { return m_seq.str.is_empty(n->get_root()->get_expr()); } // regex empty set: ∅ for re.++ (absorbing element) - bool is_re_empty(enode* n) const { return m_seq.re.is_empty(n->get_expr()); } + // Follows the union-find root so that merges are correctly reflected. + bool is_re_empty(enode* n) const { return m_seq.re.is_empty(n->get_root()->get_expr()); } // regex epsilon: to_re("") for re.++ (identity element) - bool is_re_epsilon(enode* n) const { return m_seq.re.is_epsilon(n->get_expr()); } + // Follows the union-find root so that merges are correctly reflected. + bool is_re_epsilon(enode* n) const { return m_seq.re.is_epsilon(n->get_root()->get_expr()); } bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); } bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); } diff --git a/src/test/euf_seq_plugin.cpp b/src/test/euf_seq_plugin.cpp index 877eb5f94..05ef2dbf2 100644 --- a/src/test/euf_seq_plugin.cpp +++ b/src/test/euf_seq_plugin.cpp @@ -227,6 +227,69 @@ static void test_sgraph_egraph_sync() { SASSERT(nx->get_root() != ny->get_root()); } +// test seq_plugin: identity elimination fires after a post-registration merge +// When b ~ "" is learned AFTER concat(a, b) is registered, the plugin must +// re-check identity rules and conclude concat(a, b) ~ a. +static void test_seq_plugin_identity_after_merge() { + std::cout << "test_seq_plugin_identity_after_merge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref a(m.mk_const("a", str_sort), m); + expr_ref b(m.mk_const("b", str_sort), m); + expr_ref empty(seq.str.mk_empty(str_sort), m); + expr_ref ab(seq.str.mk_concat(a, b), m); + + // Register concat(a, b) first — at this point b is not yet empty. + auto* nab = get_node(g, seq, ab); + auto* na = g.find(a); + auto* nb = g.find(b); + auto* nempty = get_node(g, seq, empty); + g.propagate(); + + // Now learn b ~ "" via a merge. + g.merge(nb, nempty, nullptr); + g.propagate(); + + // After propagation, concat(a, b) should be equivalent to a. + SASSERT(nab->get_root() == na->get_root()); + std::cout << g << "\n"; +} + +// test seq_plugin: loop merging — concat(r{lo1,hi1}, r{lo2,hi2}) = r{lo1+lo2, hi1+hi2} +static void test_seq_plugin_loop_merge() { + std::cout << "test_seq_plugin_loop_merge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + euf::egraph& g = sg.get_egraph(); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref x(m.mk_const("x", str_sort), m); + expr_ref r(seq.re.mk_to_re(x), m); + // r{2,3} and r{1,2} — bounds are chosen so the merged form r{3,5} is a distinct loop. + expr_ref first_loop(seq.re.mk_loop_proper(r, 2, 3), m); + expr_ref second_loop(seq.re.mk_loop_proper(r, 1, 2), m); + expr_ref expected_merged_loop(seq.re.mk_loop_proper(r, 3, 5), m); + // concat(r{2,3}, r{1,2}) + expr_ref concat_loops(seq.re.mk_concat(first_loop, second_loop), m); + + auto* nc = get_node(g, seq, concat_loops); + auto* nl35 = get_node(g, seq, expected_merged_loop); + g.propagate(); + + // After propagation, concat(r{2,3}, r{1,2}) should be equivalent to r{3,5}. + SASSERT(nc->get_root() == nl35->get_root()); + std::cout << g << "\n"; +} + void tst_euf_seq_plugin() { s_var = 0; test_sgraph_basic(); s_var = 0; test_sgraph_backtrack(); @@ -235,4 +298,6 @@ void tst_euf_seq_plugin() { s_var = 0; test_seq_plugin_star_merge(); s_var = 0; test_seq_plugin_nullable_absorb(); s_var = 0; test_sgraph_egraph_sync(); + s_var = 0; test_seq_plugin_identity_after_merge(); + s_var = 0; test_seq_plugin_loop_merge(); }