mirror of
https://github.com/Z3Prover/z3
synced 2026-03-07 13:54:53 +00:00
Add euf_seq_plugin: string plugin for egraph with concat associativity and ZIPT simplifications
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
parent
51f4f918ea
commit
78a4e3001b
3 changed files with 387 additions and 0 deletions
|
|
@ -9,6 +9,7 @@ z3_add_component(euf
|
|||
euf_justification.cpp
|
||||
euf_mam.cpp
|
||||
euf_plugin.cpp
|
||||
euf_seq_plugin.cpp
|
||||
euf_sgraph.cpp
|
||||
euf_specrel_plugin.cpp
|
||||
ho_matcher.cpp
|
||||
|
|
|
|||
259
src/ast/euf/euf_seq_plugin.cpp
Normal file
259
src/ast/euf/euf_seq_plugin.cpp
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
euf_seq_plugin.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Plugin structure for sequences/strings.
|
||||
|
||||
Merges equivalence classes taking into account associativity
|
||||
of concatenation and algebraic properties of strings and
|
||||
regular expressions.
|
||||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
#include "ast/euf/euf_seq_plugin.h"
|
||||
#include "ast/euf/euf_egraph.h"
|
||||
#include "ast/ast_pp.h"
|
||||
|
||||
namespace euf {
|
||||
|
||||
seq_plugin::seq_plugin(egraph& g):
|
||||
plugin(g),
|
||||
m_seq(g.get_manager()),
|
||||
m_sg(g.get_manager()) {
|
||||
}
|
||||
|
||||
void seq_plugin::register_node(enode* n) {
|
||||
m_queue.push_back(n);
|
||||
push_undo(undo_kind::undo_add_concat);
|
||||
}
|
||||
|
||||
void seq_plugin::merge_eh(enode* n1, enode* n2) {
|
||||
m_queue.push_back(enode_pair(n1, n2));
|
||||
push_undo(undo_kind::undo_add_concat);
|
||||
}
|
||||
|
||||
void seq_plugin::push_undo(undo_kind k) {
|
||||
m_undo.push_back(k);
|
||||
push_plugin_undo(get_id());
|
||||
}
|
||||
|
||||
void seq_plugin::propagate() {
|
||||
if (m_qhead == m_queue.size())
|
||||
return;
|
||||
// save qhead for undo
|
||||
unsigned old_qhead = m_qhead;
|
||||
for (; m_qhead < m_queue.size(); ++m_qhead) {
|
||||
if (g.inconsistent())
|
||||
break;
|
||||
if (std::holds_alternative<enode*>(m_queue[m_qhead])) {
|
||||
auto n = *std::get_if<enode*>(&m_queue[m_qhead]);
|
||||
propagate_register_node(n);
|
||||
}
|
||||
else {
|
||||
auto [a, b] = *std::get_if<enode_pair>(&m_queue[m_qhead]);
|
||||
propagate_merge(a, b);
|
||||
}
|
||||
}
|
||||
(void)old_qhead;
|
||||
}
|
||||
|
||||
void seq_plugin::propagate_register_node(enode* n) {
|
||||
if (!m_seq.is_seq(n->get_expr()) && !m_seq.is_re(n->get_expr()))
|
||||
return;
|
||||
|
||||
TRACE(seq, tout << "seq register " << g.bpp(n) << "\n");
|
||||
|
||||
// register in sgraph
|
||||
m_sg.mk(n->get_expr());
|
||||
|
||||
if (is_concat(n)) {
|
||||
m_concats.push_back(n);
|
||||
propagate_assoc(n);
|
||||
propagate_simplify(n);
|
||||
}
|
||||
|
||||
// n-ary concat: concat(a, b, c) => concat(a, concat(b, c))
|
||||
if (is_concat(n) && n->num_args() > 2) {
|
||||
auto last = n->get_arg(n->num_args() - 1);
|
||||
for (unsigned i = n->num_args() - 1; i-- > 0; )
|
||||
last = mk_concat(n->get_arg(i), last);
|
||||
push_merge(last, n);
|
||||
}
|
||||
|
||||
// empty concat: concat(a, empty) = a, concat(empty, b) = b
|
||||
enode* a, *b;
|
||||
if (is_concat(n, a, b)) {
|
||||
if (is_empty(a))
|
||||
push_merge(n, b);
|
||||
else if (is_empty(b))
|
||||
push_merge(n, a);
|
||||
}
|
||||
}
|
||||
|
||||
void seq_plugin::propagate_merge(enode* a, enode* b) {
|
||||
if (!m_seq.is_seq(a->get_expr()) && !m_seq.is_re(a->get_expr()))
|
||||
return;
|
||||
|
||||
TRACE(seq, tout << "seq merge " << g.bpp(a) << " == " << g.bpp(b) << "\n");
|
||||
|
||||
// when equivalence classes merge, re-check concat simplifications
|
||||
for (enode* n : enode_class(a)) {
|
||||
if (is_concat(n))
|
||||
propagate_simplify(n);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Concat associativity:
|
||||
// concat(concat(a, b), c) = concat(a, concat(b, c))
|
||||
//
|
||||
// This normalizes to right-associated form.
|
||||
//
|
||||
void seq_plugin::propagate_assoc(enode* n) {
|
||||
enode* a, *b;
|
||||
if (!is_concat(n, a, b))
|
||||
return;
|
||||
|
||||
enode* a1, *a2;
|
||||
if (is_concat(a, a1, a2)) {
|
||||
// concat(concat(a1, a2), b) => concat(a1, concat(a2, b))
|
||||
enode* inner = mk_concat(a2, b);
|
||||
enode* outer = mk_concat(a1, inner);
|
||||
push_merge(outer, n);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Concat simplification rules from ZIPT:
|
||||
//
|
||||
// 1. Kleene star merging: concat(u, v*, v*, w) = concat(u, v*, w)
|
||||
// when adjacent children in a concat chain have congruent star bodies.
|
||||
//
|
||||
// 2. Nullable absorption: concat(u, .*, v, w) = concat(u, .*, w)
|
||||
// when v is nullable and adjacent to full_seq (.*).
|
||||
//
|
||||
void seq_plugin::propagate_simplify(enode* n) {
|
||||
enode* a, *b;
|
||||
if (!is_concat(n, a, b))
|
||||
return;
|
||||
|
||||
// Rule 1: Kleene star merging
|
||||
// concat(v*, v*) = v*
|
||||
if (same_star_body(a, b))
|
||||
push_merge(n, a);
|
||||
|
||||
// Rule 1 extended: concat(v*, concat(v*, c)) = concat(v*, c)
|
||||
enode* b1, *b2;
|
||||
if (is_concat(b, b1, b2) && same_star_body(a, b1))
|
||||
push_merge(n, b);
|
||||
|
||||
// Rule 2: Nullable absorption by .*
|
||||
// concat(.*, v) = .* when v is nullable
|
||||
if (is_full_seq(a) && is_nullable(b))
|
||||
push_merge(n, a);
|
||||
|
||||
// concat(v, .*) = .* when v is nullable
|
||||
if (is_nullable(a) && is_full_seq(b))
|
||||
push_merge(n, b);
|
||||
|
||||
// concat(.*, concat(v, w)) = concat(.*, w) when v nullable
|
||||
if (is_full_seq(a) && is_concat(b, b1, b2) && is_nullable(b1)) {
|
||||
enode* simplified = mk_concat(a, b2);
|
||||
push_merge(n, simplified);
|
||||
}
|
||||
|
||||
// concat(concat(u, v), .*) = concat(u, .*) when v nullable
|
||||
enode* a1, *a2;
|
||||
if (is_concat(a, a1, a2) && is_nullable(a2) && is_full_seq(b)) {
|
||||
enode* simplified = mk_concat(a1, b);
|
||||
push_merge(n, simplified);
|
||||
}
|
||||
}
|
||||
|
||||
bool seq_plugin::is_nullable(enode* n) {
|
||||
snode* s = m_sg.find(n->get_expr());
|
||||
if (s)
|
||||
return s->is_nullable();
|
||||
// empty string is nullable
|
||||
if (is_empty(n))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool seq_plugin::same_star_body(enode* a, enode* b) {
|
||||
if (!is_star(a) || !is_star(b))
|
||||
return false;
|
||||
// re.star(x) and re.star(y) have congruent bodies if x ~ y
|
||||
return a->get_arg(0)->get_root() == b->get_arg(0)->get_root();
|
||||
}
|
||||
|
||||
bool seq_plugin::same_loop_body(enode* a, enode* b,
|
||||
unsigned& lo1, unsigned& hi1,
|
||||
unsigned& lo2, unsigned& hi2) {
|
||||
if (!is_loop(a) || !is_loop(b))
|
||||
return false;
|
||||
expr* body_a, *body_b;
|
||||
if (!m_seq.re.is_loop(a->get_expr(), body_a, lo1, hi1))
|
||||
return false;
|
||||
if (!m_seq.re.is_loop(b->get_expr(), body_b, lo2, hi2))
|
||||
return false;
|
||||
enode* na = g.find(body_a);
|
||||
enode* nb = g.find(body_b);
|
||||
if (!na || !nb)
|
||||
return false;
|
||||
return na->get_root() == nb->get_root();
|
||||
}
|
||||
|
||||
enode* seq_plugin::mk_concat(enode* a, enode* b) {
|
||||
expr* e = m_seq.str.mk_concat(a->get_expr(), b->get_expr());
|
||||
enode* args[2] = { a, b };
|
||||
return mk(e, 2, args);
|
||||
}
|
||||
|
||||
enode* seq_plugin::mk_empty(sort* s) {
|
||||
expr* e = m_seq.str.mk_empty(s);
|
||||
return mk(e, 0, nullptr);
|
||||
}
|
||||
|
||||
void seq_plugin::undo() {
|
||||
auto k = m_undo.back();
|
||||
m_undo.pop_back();
|
||||
switch (k) {
|
||||
case undo_kind::undo_add_concat:
|
||||
SASSERT(!m_queue.empty());
|
||||
m_queue.pop_back();
|
||||
if (m_qhead > m_queue.size())
|
||||
m_qhead = m_queue.size();
|
||||
break;
|
||||
case undo_kind::undo_push_scope:
|
||||
m_sg.pop(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void seq_plugin::push_scope_eh() {
|
||||
m_sg.push();
|
||||
m_undo.push_back(undo_kind::undo_push_scope);
|
||||
push_plugin_undo(get_id());
|
||||
}
|
||||
|
||||
std::ostream& seq_plugin::display(std::ostream& out) const {
|
||||
out << "seq\n";
|
||||
m_sg.display(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
void seq_plugin::collect_statistics(statistics& st) const {
|
||||
m_sg.collect_statistics(st);
|
||||
}
|
||||
}
|
||||
127
src/ast/euf/euf_seq_plugin.h
Normal file
127
src/ast/euf/euf_seq_plugin.h
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
euf_seq_plugin.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Plugin structure for sequences/strings.
|
||||
|
||||
Merges equivalence classes taking into account associativity
|
||||
of concatenation and algebraic properties of strings and
|
||||
regular expressions. Implements features from ZIPT:
|
||||
|
||||
-- Concat associativity: str.++ is associative, so
|
||||
concat(a, concat(b, c)) = concat(concat(a, b), c).
|
||||
Handled via an AC-style plugin for the concat operator.
|
||||
|
||||
-- Kleene star merging: adjacent identical Kleene stars
|
||||
in a concatenation are collapsed, u.v*.v*.w = u.v*.w
|
||||
|
||||
-- Loop merging: adjacent loops over the same body are
|
||||
merged, v{l1,h1}.v{l2,h2} = v{l1+l2,h1+h2}
|
||||
|
||||
-- Nullable absorption: a nullable token adjacent to .*
|
||||
is absorbed, u.*.v.w = u.*.w when v is nullable.
|
||||
|
||||
The plugin integrates with euf_egraph for congruence closure
|
||||
and with sgraph for snode-based string representation.
|
||||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/euf/euf_plugin.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
|
||||
namespace euf {
|
||||
|
||||
class egraph;
|
||||
|
||||
class seq_plugin : public plugin {
|
||||
|
||||
enum class undo_kind {
|
||||
undo_add_concat,
|
||||
undo_push_scope
|
||||
};
|
||||
|
||||
seq_util m_seq;
|
||||
sgraph m_sg;
|
||||
svector<undo_kind> m_undo;
|
||||
|
||||
// queue of merges and registrations to process
|
||||
vector<std::variant<enode*, enode_pair>> m_queue;
|
||||
unsigned m_qhead = 0;
|
||||
|
||||
// track registered concat nodes for simplification
|
||||
enode_vector m_concats;
|
||||
|
||||
bool is_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); }
|
||||
bool is_concat(enode* n, enode*& a, enode*& b) {
|
||||
return is_concat(n) && n->num_args() == 2 &&
|
||||
(a = n->get_arg(0), b = n->get_arg(1), true);
|
||||
}
|
||||
bool is_star(enode* n) const { return m_seq.re.is_star(n->get_expr()); }
|
||||
bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); }
|
||||
bool is_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); }
|
||||
bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); }
|
||||
bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); }
|
||||
|
||||
enode* mk_concat(enode* a, enode* b);
|
||||
enode* mk_empty(sort* s);
|
||||
|
||||
void push_undo(undo_kind k);
|
||||
|
||||
void propagate_register_node(enode* n);
|
||||
void propagate_merge(enode* a, enode* b);
|
||||
|
||||
// concat associativity: ensure right-associated normal form
|
||||
// concat(concat(a, b), c) = concat(a, concat(b, c))
|
||||
void propagate_assoc(enode* n);
|
||||
|
||||
// concat simplification:
|
||||
// merging Kleene stars, merging loops, absorbing nullables
|
||||
void propagate_simplify(enode* n);
|
||||
|
||||
// check if snode is nullable via sgraph metadata
|
||||
bool is_nullable(enode* n);
|
||||
|
||||
// check if two enodes have congruent star bodies
|
||||
bool same_star_body(enode* a, enode* b);
|
||||
|
||||
// check if two enodes have congruent loop bodies and extract bounds
|
||||
bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2);
|
||||
|
||||
public:
|
||||
seq_plugin(egraph& g);
|
||||
|
||||
theory_id get_id() const override { return m_seq.get_family_id(); }
|
||||
|
||||
void register_node(enode* n) override;
|
||||
|
||||
void merge_eh(enode* n1, enode* n2) override;
|
||||
|
||||
void diseq_eh(enode*) override {}
|
||||
|
||||
void propagate() override;
|
||||
|
||||
void undo() override;
|
||||
|
||||
void push_scope_eh() override;
|
||||
|
||||
std::ostream& display(std::ostream& out) const override;
|
||||
|
||||
void collect_statistics(statistics& st) const override;
|
||||
|
||||
sgraph& get_sgraph() { return m_sg; }
|
||||
sgraph const& get_sgraph() const { return m_sg; }
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue