3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-21 08:00:27 +00:00
z3/src/ast/euf/euf_sgraph.h

222 lines
8.9 KiB
C++

/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
euf_sgraph.h
Abstract:
Sequence/string graph layer
Encapsulates string and regex expressions for the string solver.
Implements the string graph layer from ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT).
The sgraph maps Z3 sequence/regex AST expressions to snode structures
organized as binary concatenation trees with metadata, and owns an
egraph with a seq_plugin for congruence closure.
-- snode classification: empty, char, variable, unit, concat, power,
star, loop, union, intersection, complement, fail, full_char,
full_seq, range, to_re, in_re, other.
-- Metadata computation: ground, regex_free, nullable, level, length.
-- Expression registration via mk(expr*), lookup via find(expr*).
-- Scope management: push/pop with backtracking.
-- egraph ownership with seq_plugin for:
* concat associativity via associativity-respecting hash table,
* Kleene star merging (u.v*.v*.w = u.v*.w),
* nullable absorption next to .* (u.*.v.w = u.*.w when v nullable),
* str.++ identity elimination (concat(a, ε) = a),
* re.++ identity/absorption (concat(a, epsilon) = a, concat(a, ∅) = ∅).
-- enode registration via mk_enode(expr*).
ZIPT features not yet ported:
-- Str operations: normalisation with union-find representatives and
cache migration, balanced tree maintenance, drop left/right with
caching, substitution, indexed access, iteration, ToList caching,
simplification, derivative computation, structural equality with
associative hashing, rotation equality, expression reconstruction,
Graphviz export.
-- StrToken subclasses: SymCharToken, StrAtToken, SubStrToken,
SetToken, PostToken/PreToken.
-- StrToken features: Nielsen-style GetDecomposition with side
constraints, NamedStrToken extension tracking for variable
splitting with PowerExtension, CollectSymbols for Parikh analysis,
MinTerms for character class analysis, token ordering, Derivable
and BasicRegex flags.
Author:
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#pragma once
#include "util/region.h"
#include "util/statistics.h"
#include "util/lbool.h"
#include "ast/ast.h"
#include "ast/seq_decl_plugin.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/euf/euf_snode.h"
#include "ast/euf/euf_egraph.h"
namespace euf {
class seq_plugin;
// Oracle queried by the projection-aware derivative of sgraph.
// The projection operator π_{Q,F}(state) (a re.proj skolem) has its set of
// explored states Q stored externally (the nielsen_graph partial DFA), keyed
// by a snapshot index nu. The sgraph consults this oracle to decide whether
// the current state lies in Q when deriving a projection.
class projection_oracle {
public:
virtual ~projection_oracle() = default;
// true iff `state` (a regex expression) belongs to the explored
// subautomaton snapshot identified by `nu`.
virtual bool projection_state_in_Q(expr* state, unsigned nu) = 0;
};
class sgraph {
struct stats {
unsigned m_num_nodes;
unsigned m_num_concat;
unsigned m_num_power;
unsigned m_num_hash_hits;
stats() { reset(); }
void reset() { memset(this, 0, sizeof(*this)); }
};
ast_manager& m;
seq_util m_seq;
seq_rewriter m_rewriter;
egraph& m_egraph;
region m_region;
snode_vector m_nodes;
sort_ref m_str_sort; // cached string sort
unsigned_vector m_scopes;
unsigned m_num_scopes = 0;
stats m_stats;
// Pins every expression that any (live or popped) snode references via
// m_expr. snodes are allocated in m_region — which is never freed —
// but their m_expr field is owned by the egraph trail. Without this
// pin the egraph would release expressions on pop while clients still
// hold the matching snode* (e.g. inside nielsen_node str_mems, edge
// substitutions, or the partial-DFA cache), turning every later
// get_expr() into a use-after-free. The pin grows monotonically; it
// is dropped only when sgraph itself is destroyed.
expr_ref_vector m_pin;
// maps expression id to snode
ptr_vector<snode> m_expr2snode;
// trail of alias entries (string constant → decomposed snode) for pop
unsigned_vector m_alias_trail; // expression ids
unsigned_vector m_alias_trail_lim; // scope boundaries
// Oracle answering "state ∈ Q_nu" for projection derivatives. Not owned.
projection_oracle* m_proj_oracle = nullptr;
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
snode_kind classify(expr* e) const;
void compute_metadata(snode* n);
void compute_hash_matrix(snode* n);
void collect_re_predicates(snode* re, expr_ref_vector& preds);
public:
sgraph(ast_manager& m, egraph& eg, bool add_plugin = true);
~sgraph();
ast_manager& get_manager() const { return m; }
seq_util& get_seq_util() { return m_seq; }
egraph& get_egraph() { return m_egraph; }
egraph const& get_egraph() const { return m_egraph; }
// register an expression and return its snode
snode* mk(expr* e);
// lookup an already-registered expression
snode* find(expr* e) const;
// register expression in both sgraph and egraph
enode* mk_enode(expr* e);
sort* get_str_sort() const { return m_str_sort; }
// return true if a, b are of the same length and distinct
bool are_unit_distinct(snode *a, snode *b) const;
// factory methods for creating snodes with corresponding expressions
snode* mk_var(symbol const& name, sort* s);
snode* mk_char(unsigned ch);
snode *mk_empty_seq(sort *s);
snode* mk_concat(snode* a, snode* b);
// drop operations: remove tokens from the front/back of a concat tree
snode* drop_first(snode* n);
snode* drop_last(snode* n);
snode* drop_left(snode* n, unsigned count);
snode* drop_right(snode* n, unsigned count);
// substitution: replace all occurrences of var in n by replacement
snode* subst(snode* n, snode* var, snode* replacement);
// Brzozowski derivative of regex re with respect to element elem.
// allowed_range can explicitly provide a concrete character or range to use
// for deriving symbolic variables.
snode* brzozowski_deriv(snode* re, snode* elem);
// Register the oracle consulted when deriving projection operators.
// Passing nullptr unregisters. Not owned.
void set_projection_oracle(projection_oracle* o) { m_proj_oracle = o; }
// Projection operator support (π_{Q,F}(state) modeled as re.proj skolem).
// Recognize and destructure a re.proj skolem expression.
bool is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const;
// Build the re.proj skolem expression for π_{{root}}(state) at snapshot nu.
expr_ref mk_re_proj(expr* state, expr* root, unsigned nu);
// True if e is a symbolic-character dispatch skeleton (ite / union of
// dispatch, bottoming out in ∅) rather than a concrete regex state.
bool is_char_dispatch(expr* e) const;
// Wrap a (possibly ite/union-structured) symbolic-derivative result in
// the projection operator, propagating π into every dispatch leaf (§4).
expr_ref wrap_proj(expr* e, expr* root, unsigned nu);
// Projection-aware Brzozowski derivative w.r.t. a character expr
// (concrete or symbolic).
snode* deriv_proj(snode* re, expr* ch);
// Projection-aware nullability: lifts re.get_info().nullable to regexes
// that may contain projection operators. Returns l_true / l_false
// (l_undef only if an underlying projection-free subterm is undef).
lbool re_nullable(snode* re);
// Decode a character expression that may be represented as a const-char,
// a unit string containing a const-char, or a one-character string literal.
bool decode_re_char(expr* ex, unsigned& out) const;
// compute minterms (character class partition) from a regex
void compute_minterms(snode* re, snode_vector& minterms);
// scope management for backtracking
void push();
void pop(unsigned num_scopes);
// access
snode_vector const& nodes() const { return m_nodes; }
unsigned num_nodes() const { return m_nodes.size(); }
// display
std::ostream& display(std::ostream& out) const;
void collect_statistics(statistics& st) const;
};
}