diff --git a/src/ast/euf/CMakeLists.txt b/src/ast/euf/CMakeLists.txt index adacfe5d1..edd99487f 100644 --- a/src/ast/euf/CMakeLists.txt +++ b/src/ast/euf/CMakeLists.txt @@ -9,6 +9,7 @@ z3_add_component(euf euf_justification.cpp euf_mam.cpp euf_plugin.cpp + euf_seq_plugin.cpp euf_sgraph.cpp euf_specrel_plugin.cpp ho_matcher.cpp diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp new file mode 100644 index 000000000..531591336 --- /dev/null +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -0,0 +1,259 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_seq_plugin.cpp + +Abstract: + + Plugin structure for sequences/strings. + + Merges equivalence classes taking into account associativity + of concatenation and algebraic properties of strings and + regular expressions. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 + +--*/ + +#include "ast/euf/euf_seq_plugin.h" +#include "ast/euf/euf_egraph.h" +#include "ast/ast_pp.h" + +namespace euf { + + seq_plugin::seq_plugin(egraph& g): + plugin(g), + m_seq(g.get_manager()), + m_sg(g.get_manager()) { + } + + void seq_plugin::register_node(enode* n) { + m_queue.push_back(n); + push_undo(undo_kind::undo_add_concat); + } + + void seq_plugin::merge_eh(enode* n1, enode* n2) { + m_queue.push_back(enode_pair(n1, n2)); + push_undo(undo_kind::undo_add_concat); + } + + void seq_plugin::push_undo(undo_kind k) { + m_undo.push_back(k); + push_plugin_undo(get_id()); + } + + void seq_plugin::propagate() { + if (m_qhead == m_queue.size()) + return; + // save qhead for undo + unsigned old_qhead = m_qhead; + for (; m_qhead < m_queue.size(); ++m_qhead) { + if (g.inconsistent()) + break; + if (std::holds_alternative(m_queue[m_qhead])) { + auto n = *std::get_if(&m_queue[m_qhead]); + propagate_register_node(n); + } + else { + auto [a, b] = *std::get_if(&m_queue[m_qhead]); + propagate_merge(a, b); + } + } + (void)old_qhead; + } + + void seq_plugin::propagate_register_node(enode* n) { + if (!m_seq.is_seq(n->get_expr()) && !m_seq.is_re(n->get_expr())) + return; + + TRACE(seq, tout << "seq register " << g.bpp(n) << "\n"); + + // register in sgraph + m_sg.mk(n->get_expr()); + + if (is_concat(n)) { + m_concats.push_back(n); + propagate_assoc(n); + propagate_simplify(n); + } + + // n-ary concat: concat(a, b, c) => concat(a, concat(b, c)) + if (is_concat(n) && n->num_args() > 2) { + auto last = n->get_arg(n->num_args() - 1); + for (unsigned i = n->num_args() - 1; i-- > 0; ) + last = mk_concat(n->get_arg(i), last); + push_merge(last, n); + } + + // empty concat: concat(a, empty) = a, concat(empty, b) = b + enode* a, *b; + if (is_concat(n, a, b)) { + if (is_empty(a)) + push_merge(n, b); + else if (is_empty(b)) + push_merge(n, a); + } + } + + void seq_plugin::propagate_merge(enode* a, enode* b) { + if (!m_seq.is_seq(a->get_expr()) && !m_seq.is_re(a->get_expr())) + return; + + TRACE(seq, tout << "seq merge " << g.bpp(a) << " == " << g.bpp(b) << "\n"); + + // when equivalence classes merge, re-check concat simplifications + for (enode* n : enode_class(a)) { + if (is_concat(n)) + propagate_simplify(n); + } + } + + // + // Concat associativity: + // concat(concat(a, b), c) = concat(a, concat(b, c)) + // + // This normalizes to right-associated form. + // + void seq_plugin::propagate_assoc(enode* n) { + enode* a, *b; + if (!is_concat(n, a, b)) + return; + + enode* a1, *a2; + if (is_concat(a, a1, a2)) { + // concat(concat(a1, a2), b) => concat(a1, concat(a2, b)) + enode* inner = mk_concat(a2, b); + enode* outer = mk_concat(a1, inner); + push_merge(outer, n); + } + } + + // + // Concat simplification rules from ZIPT: + // + // 1. Kleene star merging: concat(u, v*, v*, w) = concat(u, v*, w) + // when adjacent children in a concat chain have congruent star bodies. + // + // 2. Nullable absorption: concat(u, .*, v, w) = concat(u, .*, w) + // when v is nullable and adjacent to full_seq (.*). + // + void seq_plugin::propagate_simplify(enode* n) { + enode* a, *b; + if (!is_concat(n, a, b)) + return; + + // Rule 1: Kleene star merging + // concat(v*, v*) = v* + if (same_star_body(a, b)) + push_merge(n, a); + + // Rule 1 extended: concat(v*, concat(v*, c)) = concat(v*, c) + enode* b1, *b2; + if (is_concat(b, b1, b2) && same_star_body(a, b1)) + push_merge(n, b); + + // Rule 2: Nullable absorption by .* + // concat(.*, v) = .* when v is nullable + if (is_full_seq(a) && is_nullable(b)) + push_merge(n, a); + + // concat(v, .*) = .* when v is nullable + if (is_nullable(a) && is_full_seq(b)) + push_merge(n, b); + + // concat(.*, concat(v, w)) = concat(.*, w) when v nullable + if (is_full_seq(a) && is_concat(b, b1, b2) && is_nullable(b1)) { + enode* simplified = mk_concat(a, b2); + push_merge(n, simplified); + } + + // concat(concat(u, v), .*) = concat(u, .*) when v nullable + enode* a1, *a2; + if (is_concat(a, a1, a2) && is_nullable(a2) && is_full_seq(b)) { + enode* simplified = mk_concat(a1, b); + push_merge(n, simplified); + } + } + + bool seq_plugin::is_nullable(enode* n) { + snode* s = m_sg.find(n->get_expr()); + if (s) + return s->is_nullable(); + // empty string is nullable + if (is_empty(n)) + return true; + return false; + } + + bool seq_plugin::same_star_body(enode* a, enode* b) { + if (!is_star(a) || !is_star(b)) + return false; + // re.star(x) and re.star(y) have congruent bodies if x ~ y + return a->get_arg(0)->get_root() == b->get_arg(0)->get_root(); + } + + bool seq_plugin::same_loop_body(enode* a, enode* b, + unsigned& lo1, unsigned& hi1, + unsigned& lo2, unsigned& hi2) { + if (!is_loop(a) || !is_loop(b)) + return false; + expr* body_a, *body_b; + if (!m_seq.re.is_loop(a->get_expr(), body_a, lo1, hi1)) + return false; + if (!m_seq.re.is_loop(b->get_expr(), body_b, lo2, hi2)) + return false; + enode* na = g.find(body_a); + enode* nb = g.find(body_b); + if (!na || !nb) + return false; + return na->get_root() == nb->get_root(); + } + + enode* seq_plugin::mk_concat(enode* a, enode* b) { + expr* e = m_seq.str.mk_concat(a->get_expr(), b->get_expr()); + enode* args[2] = { a, b }; + return mk(e, 2, args); + } + + enode* seq_plugin::mk_empty(sort* s) { + expr* e = m_seq.str.mk_empty(s); + return mk(e, 0, nullptr); + } + + void seq_plugin::undo() { + auto k = m_undo.back(); + m_undo.pop_back(); + switch (k) { + case undo_kind::undo_add_concat: + SASSERT(!m_queue.empty()); + m_queue.pop_back(); + if (m_qhead > m_queue.size()) + m_qhead = m_queue.size(); + break; + case undo_kind::undo_push_scope: + m_sg.pop(1); + break; + } + } + + void seq_plugin::push_scope_eh() { + m_sg.push(); + m_undo.push_back(undo_kind::undo_push_scope); + push_plugin_undo(get_id()); + } + + std::ostream& seq_plugin::display(std::ostream& out) const { + out << "seq\n"; + m_sg.display(out); + return out; + } + + void seq_plugin::collect_statistics(statistics& st) const { + m_sg.collect_statistics(st); + } +} diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h new file mode 100644 index 000000000..90c55c39e --- /dev/null +++ b/src/ast/euf/euf_seq_plugin.h @@ -0,0 +1,127 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + euf_seq_plugin.h + +Abstract: + + Plugin structure for sequences/strings. + + Merges equivalence classes taking into account associativity + of concatenation and algebraic properties of strings and + regular expressions. Implements features from ZIPT: + + -- Concat associativity: str.++ is associative, so + concat(a, concat(b, c)) = concat(concat(a, b), c). + Handled via an AC-style plugin for the concat operator. + + -- Kleene star merging: adjacent identical Kleene stars + in a concatenation are collapsed, u.v*.v*.w = u.v*.w + + -- Loop merging: adjacent loops over the same body are + merged, v{l1,h1}.v{l2,h2} = v{l1+l2,h1+h2} + + -- Nullable absorption: a nullable token adjacent to .* + is absorbed, u.*.v.w = u.*.w when v is nullable. + + The plugin integrates with euf_egraph for congruence closure + and with sgraph for snode-based string representation. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + Clemens Eisenhofer 2026-03-01 + +--*/ + +#pragma once + +#include "ast/seq_decl_plugin.h" +#include "ast/euf/euf_plugin.h" +#include "ast/euf/euf_sgraph.h" + +namespace euf { + + class egraph; + + class seq_plugin : public plugin { + + enum class undo_kind { + undo_add_concat, + undo_push_scope + }; + + seq_util m_seq; + sgraph m_sg; + svector m_undo; + + // queue of merges and registrations to process + vector> m_queue; + unsigned m_qhead = 0; + + // track registered concat nodes for simplification + enode_vector m_concats; + + bool is_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } + bool is_concat(enode* n, enode*& a, enode*& b) { + return is_concat(n) && n->num_args() == 2 && + (a = n->get_arg(0), b = n->get_arg(1), true); + } + bool is_star(enode* n) const { return m_seq.re.is_star(n->get_expr()); } + bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); } + bool is_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); } + bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); } + bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); } + + enode* mk_concat(enode* a, enode* b); + enode* mk_empty(sort* s); + + void push_undo(undo_kind k); + + void propagate_register_node(enode* n); + void propagate_merge(enode* a, enode* b); + + // concat associativity: ensure right-associated normal form + // concat(concat(a, b), c) = concat(a, concat(b, c)) + void propagate_assoc(enode* n); + + // concat simplification: + // merging Kleene stars, merging loops, absorbing nullables + void propagate_simplify(enode* n); + + // check if snode is nullable via sgraph metadata + bool is_nullable(enode* n); + + // check if two enodes have congruent star bodies + bool same_star_body(enode* a, enode* b); + + // check if two enodes have congruent loop bodies and extract bounds + bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2); + + public: + seq_plugin(egraph& g); + + theory_id get_id() const override { return m_seq.get_family_id(); } + + void register_node(enode* n) override; + + void merge_eh(enode* n1, enode* n2) override; + + void diseq_eh(enode*) override {} + + void propagate() override; + + void undo() override; + + void push_scope_eh() override; + + std::ostream& display(std::ostream& out) const override; + + void collect_statistics(statistics& st) const override; + + sgraph& get_sgraph() { return m_sg; } + sgraph const& get_sgraph() const { return m_sg; } + }; +}