3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 22:04:53 +00:00
z3/src/ast/euf/euf_seq_plugin.h
copilot-swe-agent[bot] 150f1fe2ea Revert is_concat changes, add ZIPT URL, implement snode/sgraph operations and tests
- Revert is_str_concat/is_re_concat to original form (PR #8820 review)
- Add ZIPT URL (https://github.com/CEisenhofer/ZIPT) to euf_sgraph.h
- Add snode::at() for token indexing and collect_tokens() for enumeration
- Add sgraph factory methods: mk_var, mk_char, mk_empty, mk_concat
- Add sgraph drop operations: drop_first, drop_last, drop_left, drop_right
- Add sgraph substitution: subst(snode*, snode*, snode*)
- Add Brzozowski derivative via seq_rewriter::mk_derivative
- Add minterm computation from regex predicates
- Add 7 new unit tests covering all new operations with complex concats

Co-authored-by: NikolajBjorner <56730610+NikolajBjorner@users.noreply.github.com>
2026-03-02 19:28:32 +00:00

171 lines
5.9 KiB
C++

/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
euf_seq_plugin.h
Abstract:
Plugin structure for sequences/strings.
Merges equivalence classes taking into account associativity
of concatenation and algebraic properties of strings and
regular expressions. Implements features from ZIPT:
-- Concat associativity: str.++ is associative, so
concat(a, concat(b, c)) = concat(concat(a, b), c).
Handled via an AC-style plugin for the concat operator.
-- Kleene star merging: adjacent identical Kleene stars
in a concatenation are collapsed, u.v*.v*.w = u.v*.w
-- Loop merging: adjacent loops over the same body are
merged, v{l1,h1}.v{l2,h2} = v{l1+l2,h1+h2}
-- Nullable absorption: a nullable token adjacent to .*
is absorbed, u.*.v.w = u.*.w when v is nullable.
The plugin integrates with euf_egraph for congruence closure.
Node registration in sgraph is handled by sgraph itself via
the egraph's on_make callback, not by the plugin.
Author:
Nikolaj Bjorner (nbjorner) 2026-03-01
Clemens Eisenhofer 2026-03-01
--*/
#pragma once
#include "ast/seq_decl_plugin.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/euf/euf_plugin.h"
#include "util/hashtable.h"
namespace euf {
class egraph;
// Associativity-respecting hash for enode concat trees.
// Flattens concat(concat(a,b),c) and concat(a,concat(b,c))
// to the same leaf sequence [a,b,c] before hashing.
// Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT).
struct enode_concat_hash {
seq_util const& seq;
enode_concat_hash(seq_util const& s) : seq(s) {}
unsigned operator()(enode* n) const;
};
// Associativity-respecting equality for enode concat trees.
// Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT).
struct enode_concat_eq {
seq_util const& seq;
enode_concat_eq(seq_util const& s) : seq(s) {}
bool operator()(enode* a, enode* b) const;
};
class seq_plugin : public plugin {
enum class undo_kind {
undo_add_concat,
undo_add_to_table,
};
seq_util m_seq;
seq_rewriter m_rewriter;
svector<undo_kind> m_undo;
// queue of merges and registrations to process
vector<std::variant<enode*, enode_pair>> m_queue;
unsigned m_qhead = 0;
// track registered concat nodes for simplification
enode_vector m_concats;
// associativity-respecting hash table for concat nodes
enode_concat_hash m_concat_hash;
enode_concat_eq m_concat_eq;
hashtable<enode*, enode_concat_hash, enode_concat_eq> m_concat_table;
// string concat predicates
bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); }
bool is_str_concat(enode* n, enode*& a, enode*& b) {
expr* ea = nullptr, *eb = nullptr;
return m_seq.str.is_concat(n->get_expr(), ea, eb) &&
n->num_args() == 2 &&
(a = n->get_arg(0), b = n->get_arg(1), true);
}
// regex concat predicates
bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); }
bool is_re_concat(enode* n, enode*& a, enode*& b) {
expr* ea = nullptr, *eb = nullptr;
return m_seq.re.is_concat(n->get_expr(), ea, eb) &&
n->num_args() == 2 &&
(a = n->get_arg(0), b = n->get_arg(1), true);
}
// any concat, string or regex
bool is_concat(enode* n) const { return is_str_concat(n) || is_re_concat(n); }
bool is_concat(enode* n, enode*& a, enode*& b) {
return is_str_concat(n, a, b) || is_re_concat(n, a, b);
}
bool is_star(enode* n) const { return m_seq.re.is_star(n->get_expr()); }
bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); }
// string empty: ε for str.++
bool is_str_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); }
// regex empty set: ∅ for re.++ (absorbing element)
bool is_re_empty(enode* n) const { return m_seq.re.is_empty(n->get_expr()); }
// regex epsilon: to_re("") for re.++ (identity element)
bool is_re_epsilon(enode* n) const { return m_seq.re.is_epsilon(n->get_expr()); }
bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); }
bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); }
void push_undo(undo_kind k);
void propagate_register_node(enode* n);
void propagate_merge(enode* a, enode* b);
// concat associativity: maintain hash table of concat nodes,
// merge nodes that are equal modulo associativity
void propagate_assoc(enode* n);
// concat simplification:
// merging Kleene stars, merging loops, absorbing nullables
void propagate_simplify(enode* n);
// check if expression is nullable using existing seq_rewriter
bool is_nullable(expr* e);
bool is_nullable(enode* n) { return is_nullable(n->get_expr()); }
// check if two enodes have congruent star bodies
bool same_star_body(enode* a, enode* b);
// check if two enodes have congruent loop bodies and extract bounds
bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2);
public:
seq_plugin(egraph& g);
theory_id get_id() const override { return m_seq.get_family_id(); }
void register_node(enode* n) override;
void merge_eh(enode* n1, enode* n2) override;
void diseq_eh(enode*) override {}
void propagate() override;
void undo() override;
std::ostream& display(std::ostream& out) const override;
void collect_statistics(statistics& st) const override;
};
}