mirror of
https://github.com/Z3Prover/z3
synced 2026-03-07 05:44:51 +00:00
Add sgraph/seq_plugin + fix re.concat classification and loop nullability
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
parent
7e286fc5e1
commit
9882bf0205
12 changed files with 2531 additions and 0 deletions
|
|
@ -9,6 +9,8 @@ z3_add_component(euf
|
|||
euf_justification.cpp
|
||||
euf_mam.cpp
|
||||
euf_plugin.cpp
|
||||
euf_seq_plugin.cpp
|
||||
euf_sgraph.cpp
|
||||
euf_specrel_plugin.cpp
|
||||
ho_matcher.cpp
|
||||
COMPONENT_DEPENDENCIES
|
||||
|
|
|
|||
291
src/ast/euf/euf_seq_plugin.cpp
Normal file
291
src/ast/euf/euf_seq_plugin.cpp
Normal file
|
|
@ -0,0 +1,291 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
euf_seq_plugin.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Plugin structure for sequences/strings.
|
||||
|
||||
Merges equivalence classes taking into account associativity
|
||||
of concatenation and algebraic properties of strings and
|
||||
regular expressions.
|
||||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
#include "ast/euf/euf_seq_plugin.h"
|
||||
#include "ast/euf/euf_egraph.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "ast/ast_pp.h"
|
||||
|
||||
namespace euf {
|
||||
|
||||
// Check if enode is any kind of concat (str.++ or re.++)
|
||||
static bool is_any_concat(enode* n, seq_util const& seq) {
|
||||
expr* a = nullptr, *b = nullptr;
|
||||
return seq.str.is_concat(n->get_expr(), a, b) || seq.re.is_concat(n->get_expr(), a, b);
|
||||
}
|
||||
|
||||
// Collect leaves of a concat tree in left-to-right order.
|
||||
// For non-concat nodes, the node itself is a leaf.
|
||||
// Handles both str.++ and re.++.
|
||||
static void collect_enode_leaves(enode* n, seq_util const& seq, enode_vector& leaves) {
|
||||
if (is_any_concat(n, seq)) {
|
||||
collect_enode_leaves(n->get_arg(0), seq, leaves);
|
||||
collect_enode_leaves(n->get_arg(1), seq, leaves);
|
||||
}
|
||||
else {
|
||||
leaves.push_back(n);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned enode_concat_hash::operator()(enode* n) const {
|
||||
snode* sn = sg.find(n->get_expr());
|
||||
if (sn && sn->has_cached_hash())
|
||||
return sn->assoc_hash();
|
||||
if (!is_any_concat(n, seq))
|
||||
return n->get_id();
|
||||
enode_vector leaves;
|
||||
collect_enode_leaves(n, seq, leaves);
|
||||
unsigned h = 0;
|
||||
for (enode* l : leaves)
|
||||
h = combine_hash(h, l->get_id());
|
||||
return h;
|
||||
}
|
||||
|
||||
bool enode_concat_eq::operator()(enode* a, enode* b) const {
|
||||
if (a == b) return true;
|
||||
if (!is_any_concat(a, seq) || !is_any_concat(b, seq))
|
||||
return false;
|
||||
enode_vector la, lb;
|
||||
collect_enode_leaves(a, seq, la);
|
||||
collect_enode_leaves(b, seq, lb);
|
||||
if (la.size() != lb.size())
|
||||
return false;
|
||||
for (unsigned i = 0; i < la.size(); ++i)
|
||||
if (la[i] != lb[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
seq_plugin::seq_plugin(egraph& g, sgraph* sg):
|
||||
plugin(g),
|
||||
m_seq(g.get_manager()),
|
||||
m_rewriter(g.get_manager()),
|
||||
m_sg(sg ? *sg : *alloc(sgraph, g.get_manager(), g, false)),
|
||||
m_sg_owned(sg == nullptr),
|
||||
m_concat_hash(m_seq, m_sg),
|
||||
m_concat_eq(m_seq),
|
||||
m_concat_table(DEFAULT_HASHTABLE_INITIAL_CAPACITY, m_concat_hash, m_concat_eq) {
|
||||
}
|
||||
|
||||
seq_plugin::~seq_plugin() {
|
||||
if (m_sg_owned)
|
||||
dealloc(&m_sg);
|
||||
}
|
||||
|
||||
void seq_plugin::register_node(enode* n) {
|
||||
m_queue.push_back(n);
|
||||
push_undo(undo_kind::undo_add_concat);
|
||||
}
|
||||
|
||||
void seq_plugin::merge_eh(enode* n1, enode* n2) {
|
||||
m_queue.push_back(enode_pair(n1, n2));
|
||||
push_undo(undo_kind::undo_add_concat);
|
||||
}
|
||||
|
||||
void seq_plugin::push_undo(undo_kind k) {
|
||||
m_undo.push_back(k);
|
||||
push_plugin_undo(get_id());
|
||||
}
|
||||
|
||||
void seq_plugin::propagate() {
|
||||
if (m_qhead == m_queue.size())
|
||||
return;
|
||||
for (; m_qhead < m_queue.size(); ++m_qhead) {
|
||||
if (g.inconsistent())
|
||||
break;
|
||||
if (std::holds_alternative<enode*>(m_queue[m_qhead])) {
|
||||
auto n = std::get<enode*>(m_queue[m_qhead]);
|
||||
propagate_register_node(n);
|
||||
}
|
||||
else {
|
||||
auto [a, b] = std::get<enode_pair>(m_queue[m_qhead]);
|
||||
propagate_merge(a, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void seq_plugin::propagate_register_node(enode* n) {
|
||||
if (!m_seq.is_seq(n->get_expr()) && !m_seq.is_re(n->get_expr()))
|
||||
return;
|
||||
|
||||
TRACE(seq, tout << "seq register " << g.bpp(n) << "\n");
|
||||
|
||||
if (is_concat(n)) {
|
||||
propagate_assoc(n);
|
||||
propagate_simplify(n);
|
||||
}
|
||||
|
||||
// str.++ identity: concat(a, ε) = a, concat(ε, b) = b
|
||||
enode* a, *b;
|
||||
if (is_str_concat(n, a, b)) {
|
||||
if (is_str_empty(a))
|
||||
push_merge(n, b);
|
||||
else if (is_str_empty(b))
|
||||
push_merge(n, a);
|
||||
}
|
||||
|
||||
// re.++ identity: concat(a, epsilon) = a, concat(epsilon, b) = b
|
||||
// re.++ absorption: concat(a, ∅) = ∅, concat(∅, b) = ∅
|
||||
if (is_re_concat(n, a, b)) {
|
||||
if (is_re_epsilon(a))
|
||||
push_merge(n, b);
|
||||
else if (is_re_epsilon(b))
|
||||
push_merge(n, a);
|
||||
else if (is_re_empty(a))
|
||||
push_merge(n, a);
|
||||
else if (is_re_empty(b))
|
||||
push_merge(n, b);
|
||||
}
|
||||
}
|
||||
|
||||
void seq_plugin::propagate_merge(enode* a, enode* b) {
|
||||
if (!m_seq.is_seq(a->get_expr()) && !m_seq.is_re(a->get_expr()))
|
||||
return;
|
||||
|
||||
TRACE(seq, tout << "seq merge " << g.bpp(a) << " == " << g.bpp(b) << "\n");
|
||||
|
||||
// when equivalence classes merge, re-check concat simplifications
|
||||
for (enode* n : enode_class(a)) {
|
||||
if (is_concat(n))
|
||||
propagate_simplify(n);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Concat associativity:
|
||||
// Instead of creating new expressions, maintain a hash table
|
||||
// that respects associativity. When a concat is registered,
|
||||
// look up existing concats with the same leaf sequence.
|
||||
// If found, merge the existing node with the new one.
|
||||
//
|
||||
void seq_plugin::propagate_assoc(enode* n) {
|
||||
if (!is_concat(n))
|
||||
return;
|
||||
|
||||
enode* existing = nullptr;
|
||||
if (m_concat_table.find(n, existing)) {
|
||||
if (existing != n)
|
||||
push_merge(n, existing);
|
||||
}
|
||||
else {
|
||||
m_concat_table.insert(n);
|
||||
m_concats.push_back(n);
|
||||
push_undo(undo_kind::undo_add_to_table);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Concat simplification rules from ZIPT:
|
||||
//
|
||||
// 1. Kleene star merging: concat(u, v*, v*, w) = concat(u, v*, w)
|
||||
// when adjacent children in a concat chain have congruent star bodies.
|
||||
//
|
||||
// 2. Nullable absorption: concat(u, .*, v, w) = concat(u, .*, w)
|
||||
// when v is nullable and adjacent to full_seq (.*).
|
||||
//
|
||||
void seq_plugin::propagate_simplify(enode* n) {
|
||||
enode* a, *b;
|
||||
if (!is_concat(n, a, b))
|
||||
return;
|
||||
|
||||
// Rule 1: Kleene star merging
|
||||
// concat(v*, v*) = v*
|
||||
if (same_star_body(a, b))
|
||||
push_merge(n, a);
|
||||
|
||||
// Rule 1 extended: concat(v*, concat(v*, c)) = concat(v*, c)
|
||||
enode* b1, *b2;
|
||||
if (is_concat(b, b1, b2) && same_star_body(a, b1))
|
||||
push_merge(n, b);
|
||||
|
||||
// Rule 2: Nullable absorption by .*
|
||||
// concat(.*, v) = .* when v is nullable
|
||||
if (is_full_seq(a) && is_nullable(b))
|
||||
push_merge(n, a);
|
||||
|
||||
// concat(v, .*) = .* when v is nullable
|
||||
if (is_nullable(a) && is_full_seq(b))
|
||||
push_merge(n, b);
|
||||
|
||||
// concat(.*, concat(v, w)) = concat(.*, w) when v nullable
|
||||
// handled by associativity + nullable absorption on sub-concats
|
||||
|
||||
// concat(concat(u, v), .*) = concat(u, .*) when v nullable
|
||||
// handled by associativity + nullable absorption on sub-concats
|
||||
}
|
||||
|
||||
bool seq_plugin::is_nullable(expr* e) {
|
||||
expr_ref result = m_rewriter.is_nullable(e);
|
||||
return g.get_manager().is_true(result);
|
||||
}
|
||||
|
||||
bool seq_plugin::same_star_body(enode* a, enode* b) {
|
||||
if (!is_star(a) || !is_star(b))
|
||||
return false;
|
||||
// re.star(x) and re.star(y) have congruent bodies if x ~ y
|
||||
return a->get_arg(0)->get_root() == b->get_arg(0)->get_root();
|
||||
}
|
||||
|
||||
bool seq_plugin::same_loop_body(enode* a, enode* b,
|
||||
unsigned& lo1, unsigned& hi1,
|
||||
unsigned& lo2, unsigned& hi2) {
|
||||
if (!is_loop(a) || !is_loop(b))
|
||||
return false;
|
||||
expr* body_a, *body_b;
|
||||
if (!m_seq.re.is_loop(a->get_expr(), body_a, lo1, hi1))
|
||||
return false;
|
||||
if (!m_seq.re.is_loop(b->get_expr(), body_b, lo2, hi2))
|
||||
return false;
|
||||
enode* na = g.find(body_a);
|
||||
enode* nb = g.find(body_b);
|
||||
if (!na || !nb)
|
||||
return false;
|
||||
return na->get_root() == nb->get_root();
|
||||
}
|
||||
|
||||
void seq_plugin::undo() {
|
||||
auto k = m_undo.back();
|
||||
m_undo.pop_back();
|
||||
switch (k) {
|
||||
case undo_kind::undo_add_concat:
|
||||
SASSERT(!m_queue.empty());
|
||||
m_queue.pop_back();
|
||||
if (m_qhead > m_queue.size())
|
||||
m_qhead = m_queue.size();
|
||||
break;
|
||||
case undo_kind::undo_add_to_table:
|
||||
SASSERT(!m_concats.empty());
|
||||
m_concat_table.remove(m_concats.back());
|
||||
m_concats.pop_back();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& seq_plugin::display(std::ostream& out) const {
|
||||
out << "seq-plugin\n";
|
||||
return out;
|
||||
}
|
||||
|
||||
void seq_plugin::collect_statistics(statistics& st) const {
|
||||
// statistics are collected by sgraph which owns us
|
||||
}
|
||||
}
|
||||
175
src/ast/euf/euf_seq_plugin.h
Normal file
175
src/ast/euf/euf_seq_plugin.h
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
euf_seq_plugin.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Plugin structure for sequences/strings.
|
||||
|
||||
Merges equivalence classes taking into account associativity
|
||||
of concatenation and algebraic properties of strings and
|
||||
regular expressions. Implements features from ZIPT:
|
||||
|
||||
-- Concat associativity: str.++ is associative, so
|
||||
concat(a, concat(b, c)) = concat(concat(a, b), c).
|
||||
Handled via an AC-style plugin for the concat operator.
|
||||
|
||||
-- Kleene star merging: adjacent identical Kleene stars
|
||||
in a concatenation are collapsed, u.v*.v*.w = u.v*.w
|
||||
|
||||
-- Loop merging: adjacent loops over the same body are
|
||||
merged, v{l1,h1}.v{l2,h2} = v{l1+l2,h1+h2}
|
||||
|
||||
-- Nullable absorption: a nullable token adjacent to .*
|
||||
is absorbed, u.*.v.w = u.*.w when v is nullable.
|
||||
|
||||
The plugin integrates with euf_egraph for congruence closure.
|
||||
Node registration in sgraph is handled by sgraph itself via
|
||||
the egraph's on_make callback, not by the plugin.
|
||||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
#include "ast/euf/euf_plugin.h"
|
||||
#include "util/hashtable.h"
|
||||
|
||||
namespace euf {
|
||||
|
||||
class egraph;
|
||||
class sgraph;
|
||||
|
||||
// Associativity-respecting hash for enode concat trees.
|
||||
// Uses cached snode hash matrices from the sgraph for O(1) hashing.
|
||||
// Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT).
|
||||
struct enode_concat_hash {
|
||||
seq_util const& seq;
|
||||
sgraph& sg;
|
||||
enode_concat_hash(seq_util const& s, sgraph& sg) : seq(s), sg(sg) {}
|
||||
unsigned operator()(enode* n) const;
|
||||
};
|
||||
|
||||
// Associativity-respecting equality for enode concat trees.
|
||||
// Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT).
|
||||
struct enode_concat_eq {
|
||||
seq_util const& seq;
|
||||
enode_concat_eq(seq_util const& s) : seq(s) {}
|
||||
bool operator()(enode* a, enode* b) const;
|
||||
};
|
||||
|
||||
class seq_plugin : public plugin {
|
||||
|
||||
enum class undo_kind {
|
||||
undo_add_concat,
|
||||
undo_add_to_table,
|
||||
};
|
||||
|
||||
seq_util m_seq;
|
||||
seq_rewriter m_rewriter;
|
||||
sgraph& m_sg;
|
||||
bool m_sg_owned = false; // whether we own the sgraph
|
||||
svector<undo_kind> m_undo;
|
||||
|
||||
// queue of merges and registrations to process
|
||||
vector<std::variant<enode*, enode_pair>> m_queue;
|
||||
unsigned m_qhead = 0;
|
||||
|
||||
// track registered concat nodes for simplification
|
||||
enode_vector m_concats;
|
||||
|
||||
// associativity-respecting hash table for concat nodes
|
||||
enode_concat_hash m_concat_hash;
|
||||
enode_concat_eq m_concat_eq;
|
||||
hashtable<enode*, enode_concat_hash, enode_concat_eq> m_concat_table;
|
||||
|
||||
// string concat predicates
|
||||
bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); }
|
||||
bool is_str_concat(enode* n, enode*& a, enode*& b) {
|
||||
expr* ea = nullptr, *eb = nullptr;
|
||||
return m_seq.str.is_concat(n->get_expr(), ea, eb) &&
|
||||
n->num_args() == 2 &&
|
||||
(a = n->get_arg(0), b = n->get_arg(1), true);
|
||||
}
|
||||
|
||||
// regex concat predicates
|
||||
bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); }
|
||||
bool is_re_concat(enode* n, enode*& a, enode*& b) {
|
||||
expr* ea = nullptr, *eb = nullptr;
|
||||
return m_seq.re.is_concat(n->get_expr(), ea, eb) &&
|
||||
n->num_args() == 2 &&
|
||||
(a = n->get_arg(0), b = n->get_arg(1), true);
|
||||
}
|
||||
|
||||
// any concat, string or regex
|
||||
bool is_concat(enode* n) const { return is_str_concat(n) || is_re_concat(n); }
|
||||
bool is_concat(enode* n, enode*& a, enode*& b) {
|
||||
return is_str_concat(n, a, b) || is_re_concat(n, a, b);
|
||||
}
|
||||
|
||||
bool is_star(enode* n) const { return m_seq.re.is_star(n->get_expr()); }
|
||||
bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); }
|
||||
|
||||
// string empty: ε for str.++
|
||||
bool is_str_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); }
|
||||
// regex empty set: ∅ for re.++ (absorbing element)
|
||||
bool is_re_empty(enode* n) const { return m_seq.re.is_empty(n->get_expr()); }
|
||||
// regex epsilon: to_re("") for re.++ (identity element)
|
||||
bool is_re_epsilon(enode* n) const { return m_seq.re.is_epsilon(n->get_expr()); }
|
||||
|
||||
bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); }
|
||||
bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); }
|
||||
|
||||
void push_undo(undo_kind k);
|
||||
|
||||
void propagate_register_node(enode* n);
|
||||
void propagate_merge(enode* a, enode* b);
|
||||
|
||||
// concat associativity: maintain hash table of concat nodes,
|
||||
// merge nodes that are equal modulo associativity
|
||||
void propagate_assoc(enode* n);
|
||||
|
||||
// concat simplification:
|
||||
// merging Kleene stars, merging loops, absorbing nullables
|
||||
void propagate_simplify(enode* n);
|
||||
|
||||
// check if expression is nullable using existing seq_rewriter
|
||||
bool is_nullable(expr* e);
|
||||
bool is_nullable(enode* n) { return is_nullable(n->get_expr()); }
|
||||
|
||||
// check if two enodes have congruent star bodies
|
||||
bool same_star_body(enode* a, enode* b);
|
||||
|
||||
// check if two enodes have congruent loop bodies and extract bounds
|
||||
bool same_loop_body(enode* a, enode* b, unsigned& lo1, unsigned& hi1, unsigned& lo2, unsigned& hi2);
|
||||
|
||||
public:
|
||||
seq_plugin(egraph& g, sgraph* sg = nullptr);
|
||||
~seq_plugin() override;
|
||||
|
||||
theory_id get_id() const override { return m_seq.get_family_id(); }
|
||||
|
||||
void register_node(enode* n) override;
|
||||
|
||||
void merge_eh(enode* n1, enode* n2) override;
|
||||
|
||||
void diseq_eh(enode*) override {}
|
||||
|
||||
void propagate() override;
|
||||
|
||||
void undo() override;
|
||||
|
||||
std::ostream& display(std::ostream& out) const override;
|
||||
|
||||
void collect_statistics(statistics& st) const override;
|
||||
};
|
||||
}
|
||||
634
src/ast/euf/euf_sgraph.cpp
Normal file
634
src/ast/euf/euf_sgraph.cpp
Normal file
|
|
@ -0,0 +1,634 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
euf_sgraph.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Sequence/string graph implementation
|
||||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "ast/euf/euf_seq_plugin.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/ast_pp.h"
|
||||
|
||||
namespace euf {
|
||||
|
||||
// substitution cache stored on snode for ZIPT-style optimization
|
||||
struct snode_subst_cache {
|
||||
struct entry {
|
||||
unsigned var_id;
|
||||
unsigned repl_id;
|
||||
snode* result;
|
||||
};
|
||||
svector<entry> m_entries;
|
||||
snode* find(unsigned var_id, unsigned repl_id) const {
|
||||
for (auto const& e : m_entries)
|
||||
if (e.var_id == var_id && e.repl_id == repl_id)
|
||||
return e.result;
|
||||
return nullptr;
|
||||
}
|
||||
void insert(unsigned var_id, unsigned repl_id, snode* result) {
|
||||
m_entries.push_back({var_id, repl_id, result});
|
||||
}
|
||||
};
|
||||
|
||||
sgraph::sgraph(ast_manager& m, egraph& eg, bool add_plugin):
|
||||
m(m),
|
||||
m_seq(m),
|
||||
m_rewriter(m),
|
||||
m_egraph(eg),
|
||||
m_str_sort(m_seq.str.mk_string_sort(), m),
|
||||
m_add_plugin(add_plugin) {
|
||||
// create seq_plugin and register it with the egraph
|
||||
if (add_plugin)
|
||||
m_egraph.add_plugin(alloc(seq_plugin, m_egraph, this));
|
||||
// register on_make callback so sgraph creates snodes for new enodes
|
||||
std::function<void(enode*)> on_make = [this](enode* n) {
|
||||
expr* e = n->get_expr();
|
||||
if (m_seq.is_seq(e) || m_seq.is_re(e))
|
||||
mk(e);
|
||||
};
|
||||
m_egraph.set_on_make(on_make);
|
||||
}
|
||||
|
||||
sgraph::~sgraph() {
|
||||
for (auto* c : m_subst_caches)
|
||||
dealloc(c);
|
||||
}
|
||||
|
||||
snode_kind sgraph::classify(expr* e) const {
|
||||
if (!is_app(e))
|
||||
return snode_kind::s_other;
|
||||
|
||||
if (m_seq.str.is_empty(e))
|
||||
return snode_kind::s_empty;
|
||||
|
||||
if (m_seq.str.is_string(e)) {
|
||||
zstring s;
|
||||
if (m_seq.str.is_string(e, s) && s.empty())
|
||||
return snode_kind::s_empty;
|
||||
return snode_kind::s_other;
|
||||
}
|
||||
|
||||
if (m_seq.str.is_concat(e) || m_seq.re.is_concat(e))
|
||||
return snode_kind::s_concat;
|
||||
|
||||
if (m_seq.str.is_unit(e)) {
|
||||
expr* ch = to_app(e)->get_arg(0);
|
||||
if (m_seq.is_const_char(ch))
|
||||
return snode_kind::s_char;
|
||||
return snode_kind::s_unit;
|
||||
}
|
||||
|
||||
if (m_seq.str.is_power(e))
|
||||
return snode_kind::s_power;
|
||||
|
||||
if (m_seq.re.is_star(e))
|
||||
return snode_kind::s_star;
|
||||
|
||||
if (m_seq.re.is_loop(e))
|
||||
return snode_kind::s_loop;
|
||||
|
||||
if (m_seq.re.is_union(e))
|
||||
return snode_kind::s_union;
|
||||
|
||||
if (m_seq.re.is_intersection(e))
|
||||
return snode_kind::s_intersect;
|
||||
|
||||
if (m_seq.re.is_complement(e))
|
||||
return snode_kind::s_complement;
|
||||
|
||||
if (m_seq.re.is_empty(e))
|
||||
return snode_kind::s_fail;
|
||||
|
||||
if (m_seq.re.is_full_char(e))
|
||||
return snode_kind::s_full_char;
|
||||
|
||||
if (m_seq.re.is_full_seq(e))
|
||||
return snode_kind::s_full_seq;
|
||||
|
||||
if (m_seq.re.is_to_re(e))
|
||||
return snode_kind::s_to_re;
|
||||
|
||||
if (m_seq.str.is_in_re(e))
|
||||
return snode_kind::s_in_re;
|
||||
|
||||
// uninterpreted constants of string sort are variables
|
||||
if (is_uninterp_const(e) && m_seq.is_seq(e->get_sort()))
|
||||
return snode_kind::s_var;
|
||||
|
||||
return snode_kind::s_other;
|
||||
}
|
||||
|
||||
void sgraph::compute_metadata(snode* n) {
|
||||
switch (n->m_kind) {
|
||||
case snode_kind::s_empty:
|
||||
n->m_ground = true;
|
||||
n->m_regex_free = true;
|
||||
n->m_nullable = true;
|
||||
n->m_level = 0;
|
||||
n->m_length = 0;
|
||||
break;
|
||||
|
||||
case snode_kind::s_char:
|
||||
n->m_ground = true;
|
||||
n->m_regex_free = true;
|
||||
n->m_nullable = false;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_var:
|
||||
n->m_ground = false;
|
||||
n->m_regex_free = true;
|
||||
n->m_nullable = false;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_unit:
|
||||
n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true;
|
||||
n->m_regex_free = true;
|
||||
n->m_nullable = false;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_concat: {
|
||||
SASSERT(n->num_args() == 2);
|
||||
snode* l = n->arg(0);
|
||||
snode* r = n->arg(1);
|
||||
n->m_ground = l->is_ground() && r->is_ground();
|
||||
n->m_regex_free = l->is_regex_free() && r->is_regex_free();
|
||||
n->m_nullable = l->is_nullable() && r->is_nullable();
|
||||
n->m_level = std::max(l->level(), r->level()) + 1;
|
||||
n->m_length = l->length() + r->length();
|
||||
++m_stats.m_num_concat;
|
||||
break;
|
||||
}
|
||||
|
||||
case snode_kind::s_power: {
|
||||
// s^n: nullable follows base, consistent with ZIPT's PowerToken
|
||||
// the exponent n is assumed to be a symbolic integer, may or may not be zero
|
||||
SASSERT(n->num_args() >= 1);
|
||||
snode* base = n->arg(0);
|
||||
n->m_ground = base->is_ground();
|
||||
n->m_regex_free = base->is_regex_free();
|
||||
n->m_nullable = base->is_nullable();
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
++m_stats.m_num_power;
|
||||
break;
|
||||
}
|
||||
|
||||
case snode_kind::s_star:
|
||||
SASSERT(n->num_args() == 1);
|
||||
n->m_ground = n->arg(0)->is_ground();
|
||||
n->m_regex_free = false;
|
||||
n->m_nullable = true;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_loop: {
|
||||
n->m_ground = n->num_args() > 0 ? n->arg(0)->is_ground() : true;
|
||||
n->m_regex_free = false;
|
||||
// nullable iff lower bound is 0: r{0,n} accepts the empty string
|
||||
unsigned lo = 1, hi = 0;
|
||||
expr* loop_body = nullptr;
|
||||
if (n->get_expr() &&
|
||||
!m_seq.re.is_loop(n->get_expr(), loop_body, lo, hi))
|
||||
m_seq.re.is_loop(n->get_expr(), loop_body, lo);
|
||||
n->m_nullable = (lo == 0);
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
case snode_kind::s_union:
|
||||
SASSERT(n->num_args() == 2);
|
||||
n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
|
||||
n->m_regex_free = false;
|
||||
n->m_nullable = n->arg(0)->is_nullable() || n->arg(1)->is_nullable();
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_intersect:
|
||||
SASSERT(n->num_args() == 2);
|
||||
n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
|
||||
n->m_regex_free = false;
|
||||
n->m_nullable = n->arg(0)->is_nullable() && n->arg(1)->is_nullable();
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_complement:
|
||||
SASSERT(n->num_args() == 1);
|
||||
n->m_ground = n->arg(0)->is_ground();
|
||||
n->m_regex_free = false;
|
||||
n->m_nullable = !n->arg(0)->is_nullable();
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_fail:
|
||||
n->m_ground = true;
|
||||
n->m_regex_free = false;
|
||||
n->m_nullable = false;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_full_char:
|
||||
n->m_ground = true;
|
||||
n->m_regex_free = false;
|
||||
n->m_nullable = false;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_full_seq:
|
||||
n->m_ground = true;
|
||||
n->m_regex_free = false;
|
||||
n->m_nullable = true;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_to_re:
|
||||
SASSERT(n->num_args() == 1);
|
||||
n->m_ground = n->arg(0)->is_ground();
|
||||
n->m_regex_free = false;
|
||||
n->m_nullable = n->arg(0)->is_nullable();
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_in_re:
|
||||
SASSERT(n->num_args() == 2);
|
||||
n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
|
||||
n->m_regex_free = false;
|
||||
n->m_nullable = false;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
n->m_ground = true;
|
||||
n->m_regex_free = true;
|
||||
n->m_nullable = false;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static const unsigned HASH_BASE = 31;
|
||||
|
||||
// Compute a 2x2 polynomial hash matrix for associativity-respecting hashing.
|
||||
// Unsigned overflow is intentional and well-defined (mod 2^32).
|
||||
// M[0][0] tracks HASH_BASE^(num_leaves), which is always nonzero since
|
||||
// HASH_BASE is odd. M[0][1] is the actual hash value.
|
||||
void sgraph::compute_hash_matrix(snode* n) {
|
||||
if (n->is_empty()) {
|
||||
// identity matrix: concat with empty is identity
|
||||
n->m_hash_matrix[0][0] = 1;
|
||||
n->m_hash_matrix[0][1] = 0;
|
||||
n->m_hash_matrix[1][0] = 0;
|
||||
n->m_hash_matrix[1][1] = 1;
|
||||
}
|
||||
else if (n->is_concat()) {
|
||||
snode* l = n->arg(0);
|
||||
snode* r = n->arg(1);
|
||||
if (l->has_cached_hash() && r->has_cached_hash()) {
|
||||
// 2x2 matrix multiplication: M(L) * M(R)
|
||||
n->m_hash_matrix[0][0] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][0];
|
||||
n->m_hash_matrix[0][1] = l->m_hash_matrix[0][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[0][1] * r->m_hash_matrix[1][1];
|
||||
n->m_hash_matrix[1][0] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][0] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][0];
|
||||
n->m_hash_matrix[1][1] = l->m_hash_matrix[1][0] * r->m_hash_matrix[0][1] + l->m_hash_matrix[1][1] * r->m_hash_matrix[1][1];
|
||||
}
|
||||
}
|
||||
else {
|
||||
// leaf/token: [[HASH_BASE, value], [0, 1]]
|
||||
// +1 avoids zero hash values; wraps safely on unsigned overflow
|
||||
unsigned v = n->get_expr() ? n->get_expr()->get_id() + 1 : n->id() + 1;
|
||||
n->m_hash_matrix[0][0] = HASH_BASE;
|
||||
n->m_hash_matrix[0][1] = v;
|
||||
n->m_hash_matrix[1][0] = 0;
|
||||
n->m_hash_matrix[1][1] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
snode* sgraph::mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args) {
|
||||
unsigned id = m_nodes.size();
|
||||
snode* n = snode::mk(m_region, e, k, id, num_args, args);
|
||||
compute_metadata(n);
|
||||
compute_hash_matrix(n);
|
||||
m_nodes.push_back(n);
|
||||
if (e) {
|
||||
unsigned eid = e->get_id();
|
||||
m_expr2snode.reserve(eid + 1, nullptr);
|
||||
m_expr2snode[eid] = n;
|
||||
// pin expression via egraph (the egraph has an expr trail)
|
||||
mk_enode(e);
|
||||
}
|
||||
++m_stats.m_num_nodes;
|
||||
return n;
|
||||
}
|
||||
|
||||
snode* sgraph::mk(expr* e) {
|
||||
SASSERT(e);
|
||||
snode* n = find(e);
|
||||
if (n)
|
||||
return n;
|
||||
|
||||
snode_kind k = classify(e);
|
||||
|
||||
if (!is_app(e))
|
||||
return mk_snode(e, k, 0, nullptr);
|
||||
|
||||
app* a = to_app(e);
|
||||
unsigned arity = a->get_num_args();
|
||||
|
||||
// recursively register children
|
||||
// for seq/re children, create classified snodes
|
||||
// for other children (e.g. integer exponents), create s_other snodes
|
||||
snode_vector child_nodes;
|
||||
for (unsigned i = 0; i < arity; ++i) {
|
||||
expr* ch = a->get_arg(i);
|
||||
snode* cn = mk(ch);
|
||||
child_nodes.push_back(cn);
|
||||
}
|
||||
|
||||
return mk_snode(e, k, child_nodes.size(), child_nodes.data());
|
||||
}
|
||||
|
||||
snode* sgraph::find(expr* e) const {
|
||||
if (!e)
|
||||
return nullptr;
|
||||
unsigned eid = e->get_id();
|
||||
if (eid < m_expr2snode.size())
|
||||
return m_expr2snode[eid];
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enode* sgraph::mk_enode(expr* e) {
|
||||
enode* n = m_egraph.find(e);
|
||||
if (n) return n;
|
||||
enode_vector args;
|
||||
if (is_app(e)) {
|
||||
for (expr* arg : *to_app(e)) {
|
||||
enode* a = mk_enode(arg);
|
||||
args.push_back(a);
|
||||
}
|
||||
}
|
||||
return m_egraph.mk(e, 0, args.size(), args.data());
|
||||
}
|
||||
|
||||
void sgraph::push() {
|
||||
m_scopes.push_back(m_nodes.size());
|
||||
++m_num_scopes;
|
||||
m_egraph.push();
|
||||
}
|
||||
|
||||
void sgraph::pop(unsigned num_scopes) {
|
||||
if (num_scopes == 0)
|
||||
return;
|
||||
SASSERT(num_scopes <= m_num_scopes);
|
||||
unsigned new_lvl = m_num_scopes - num_scopes;
|
||||
unsigned old_sz = m_scopes[new_lvl];
|
||||
for (unsigned i = m_nodes.size(); i-- > old_sz; ) {
|
||||
snode* n = m_nodes[i];
|
||||
if (n->get_expr()) {
|
||||
unsigned eid = n->get_expr()->get_id();
|
||||
if (eid < m_expr2snode.size())
|
||||
m_expr2snode[eid] = nullptr;
|
||||
}
|
||||
}
|
||||
m_nodes.shrink(old_sz);
|
||||
m_scopes.shrink(new_lvl);
|
||||
m_num_scopes = new_lvl;
|
||||
m_egraph.pop(num_scopes);
|
||||
}
|
||||
|
||||
snode* sgraph::mk_var(symbol const& name) {
|
||||
expr_ref e(m.mk_const(name, m_str_sort), m);
|
||||
return mk(e);
|
||||
}
|
||||
|
||||
snode* sgraph::mk_char(unsigned ch) {
|
||||
expr_ref c(m_seq.str.mk_char(ch), m);
|
||||
expr_ref u(m_seq.str.mk_unit(c), m);
|
||||
return mk(u);
|
||||
}
|
||||
|
||||
snode* sgraph::mk_empty() {
|
||||
expr_ref e(m_seq.str.mk_empty(m_str_sort), m);
|
||||
return mk(e);
|
||||
}
|
||||
|
||||
snode* sgraph::mk_concat(snode* a, snode* b) {
|
||||
if (a->is_empty()) return b;
|
||||
if (b->is_empty()) return a;
|
||||
expr_ref e(m_seq.str.mk_concat(a->get_expr(), b->get_expr()), m);
|
||||
return mk(e);
|
||||
}
|
||||
|
||||
snode* sgraph::drop_first(snode* n) {
|
||||
if (n->is_empty() || n->is_token())
|
||||
return mk_empty();
|
||||
SASSERT(n->is_concat());
|
||||
snode* l = n->arg(0);
|
||||
snode* r = n->arg(1);
|
||||
if (l->is_token() || l->is_empty())
|
||||
return r;
|
||||
return mk_concat(drop_first(l), r);
|
||||
}
|
||||
|
||||
snode* sgraph::drop_last(snode* n) {
|
||||
if (n->is_empty() || n->is_token())
|
||||
return mk_empty();
|
||||
SASSERT(n->is_concat());
|
||||
snode* l = n->arg(0);
|
||||
snode* r = n->arg(1);
|
||||
if (r->is_token() || r->is_empty())
|
||||
return l;
|
||||
return mk_concat(l, drop_last(r));
|
||||
}
|
||||
|
||||
snode* sgraph::drop_left(snode* n, unsigned count) {
|
||||
for (unsigned i = 0; i < count && !n->is_empty(); ++i)
|
||||
n = drop_first(n);
|
||||
return n;
|
||||
}
|
||||
|
||||
snode* sgraph::drop_right(snode* n, unsigned count) {
|
||||
for (unsigned i = 0; i < count && !n->is_empty(); ++i)
|
||||
n = drop_last(n);
|
||||
return n;
|
||||
}
|
||||
|
||||
snode* sgraph::subst(snode* n, snode* var, snode* replacement) {
|
||||
if (n == var)
|
||||
return replacement;
|
||||
if (n->is_empty() || n->is_char())
|
||||
return n;
|
||||
if (n->is_concat()) {
|
||||
// check substitution cache (ZIPT-style optimization)
|
||||
if (n->m_subst_cache) {
|
||||
snode* cached = n->m_subst_cache->find(var->id(), replacement->id());
|
||||
if (cached)
|
||||
return cached;
|
||||
}
|
||||
snode* result = mk_concat(subst(n->arg(0), var, replacement),
|
||||
subst(n->arg(1), var, replacement));
|
||||
// cache the result
|
||||
if (!n->m_subst_cache) {
|
||||
n->m_subst_cache = alloc(snode_subst_cache);
|
||||
m_subst_caches.push_back(n->m_subst_cache);
|
||||
}
|
||||
n->m_subst_cache->insert(var->id(), replacement->id(), result);
|
||||
return result;
|
||||
}
|
||||
// for non-concat compound nodes (power, star, etc.), no substitution into children
|
||||
return n;
|
||||
}
|
||||
|
||||
snode* sgraph::brzozowski_deriv(snode* re, snode* elem) {
|
||||
expr* re_expr = re->get_expr();
|
||||
expr* elem_expr = elem->get_expr();
|
||||
if (!re_expr || !elem_expr)
|
||||
return nullptr;
|
||||
// unwrap str.unit to get the character expression
|
||||
expr* ch = nullptr;
|
||||
if (m_seq.str.is_unit(elem_expr, ch))
|
||||
elem_expr = ch;
|
||||
expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr);
|
||||
if (!result)
|
||||
return nullptr;
|
||||
return mk(result);
|
||||
}
|
||||
|
||||
void sgraph::collect_re_predicates(snode* re, expr_ref_vector& preds) {
|
||||
if (!re || !re->get_expr())
|
||||
return;
|
||||
expr* e = re->get_expr();
|
||||
expr* ch = nullptr, *lo = nullptr, *hi = nullptr;
|
||||
// leaf regex predicates: character ranges and single characters
|
||||
if (m_seq.re.is_range(e, lo, hi)) {
|
||||
preds.push_back(e);
|
||||
return;
|
||||
}
|
||||
if (m_seq.re.is_to_re(e))
|
||||
return;
|
||||
if (m_seq.re.is_full_char(e))
|
||||
return;
|
||||
if (m_seq.re.is_full_seq(e))
|
||||
return;
|
||||
if (m_seq.re.is_empty(e))
|
||||
return;
|
||||
// recurse into compound regex operators
|
||||
for (unsigned i = 0; i < re->num_args(); ++i)
|
||||
collect_re_predicates(re->arg(i), preds);
|
||||
}
|
||||
|
||||
void sgraph::compute_minterms(snode* re, snode_vector& minterms) {
|
||||
// extract character predicates from the regex
|
||||
expr_ref_vector preds(m);
|
||||
collect_re_predicates(re, preds);
|
||||
if (preds.empty()) {
|
||||
// no predicates means the whole alphabet is one minterm
|
||||
// represented by full_char
|
||||
expr_ref fc(m_seq.re.mk_full_char(m_str_sort), m);
|
||||
minterms.push_back(mk(fc));
|
||||
return;
|
||||
}
|
||||
// generate minterms as conjunctions/negations of predicates
|
||||
// for n predicates, there are up to 2^n minterms
|
||||
unsigned n = preds.size();
|
||||
// cap at reasonable size to prevent exponential blowup
|
||||
if (n > 20)
|
||||
n = 20;
|
||||
for (unsigned mask = 0; mask < (1u << n); ++mask) {
|
||||
expr_ref_vector conj(m);
|
||||
for (unsigned i = 0; i < n; ++i) {
|
||||
if (mask & (1u << i))
|
||||
conj.push_back(preds.get(i));
|
||||
else
|
||||
conj.push_back(m_seq.re.mk_complement(preds.get(i)));
|
||||
}
|
||||
SASSERT(!conj.empty());
|
||||
// intersect all terms
|
||||
expr_ref mt(conj.get(0), m);
|
||||
for (unsigned i = 1; i < conj.size(); ++i)
|
||||
mt = m_seq.re.mk_inter(mt, conj.get(i));
|
||||
minterms.push_back(mk(mt));
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& sgraph::display(std::ostream& out) const {
|
||||
auto kind_str = [](snode_kind k) -> char const* {
|
||||
switch (k) {
|
||||
case snode_kind::s_empty: return "empty";
|
||||
case snode_kind::s_char: return "char";
|
||||
case snode_kind::s_var: return "var";
|
||||
case snode_kind::s_unit: return "unit";
|
||||
case snode_kind::s_concat: return "concat";
|
||||
case snode_kind::s_power: return "power";
|
||||
case snode_kind::s_star: return "star";
|
||||
case snode_kind::s_loop: return "loop";
|
||||
case snode_kind::s_union: return "union";
|
||||
case snode_kind::s_intersect: return "intersect";
|
||||
case snode_kind::s_complement: return "complement";
|
||||
case snode_kind::s_fail: return "fail";
|
||||
case snode_kind::s_full_char: return "full_char";
|
||||
case snode_kind::s_full_seq: return "full_seq";
|
||||
case snode_kind::s_to_re: return "to_re";
|
||||
case snode_kind::s_in_re: return "in_re";
|
||||
case snode_kind::s_other: return "other";
|
||||
}
|
||||
return "?";
|
||||
};
|
||||
for (snode* n : m_nodes) {
|
||||
out << "snode[" << n->id() << "] "
|
||||
<< kind_str(n->kind())
|
||||
<< " level=" << n->level()
|
||||
<< " len=" << n->length()
|
||||
<< " ground=" << n->is_ground()
|
||||
<< " rfree=" << n->is_regex_free()
|
||||
<< " nullable=" << n->is_nullable();
|
||||
if (n->num_args() > 0) {
|
||||
out << " args=(";
|
||||
for (unsigned i = 0; i < n->num_args(); ++i) {
|
||||
if (i > 0) out << ", ";
|
||||
out << n->arg(i)->id();
|
||||
}
|
||||
out << ")";
|
||||
}
|
||||
if (n->get_expr())
|
||||
out << " expr=" << mk_pp(n->get_expr(), m);
|
||||
out << "\n";
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
void sgraph::collect_statistics(statistics& st) const {
|
||||
st.update("seq-graph-nodes", m_stats.m_num_nodes);
|
||||
st.update("seq-graph-concat", m_stats.m_num_concat);
|
||||
st.update("seq-graph-power", m_stats.m_num_power);
|
||||
st.update("seq-graph-hash-hits", m_stats.m_num_hash_hits);
|
||||
m_egraph.collect_statistics(st);
|
||||
}
|
||||
|
||||
}
|
||||
159
src/ast/euf/euf_sgraph.h
Normal file
159
src/ast/euf/euf_sgraph.h
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
euf_sgraph.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Sequence/string graph layer
|
||||
|
||||
Encapsulates string and regex expressions for the string solver.
|
||||
Implements the string graph layer from ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT).
|
||||
The sgraph maps Z3 sequence/regex AST expressions to snode structures
|
||||
organized as binary concatenation trees with metadata, and owns an
|
||||
egraph with a seq_plugin for congruence closure.
|
||||
|
||||
-- snode classification: empty, char, variable, unit, concat, power,
|
||||
star, loop, union, intersection, complement, fail, full_char,
|
||||
full_seq, to_re, in_re, other.
|
||||
-- Metadata computation: ground, regex_free, nullable, level, length.
|
||||
-- Expression registration via mk(expr*), lookup via find(expr*).
|
||||
-- Scope management: push/pop with backtracking.
|
||||
-- egraph ownership with seq_plugin for:
|
||||
* concat associativity via associativity-respecting hash table,
|
||||
* Kleene star merging (u.v*.v*.w = u.v*.w),
|
||||
* nullable absorption next to .* (u.*.v.w = u.*.w when v nullable),
|
||||
* str.++ identity elimination (concat(a, ε) = a),
|
||||
* re.++ identity/absorption (concat(a, epsilon) = a, concat(a, ∅) = ∅).
|
||||
-- enode registration via mk_enode(expr*).
|
||||
|
||||
ZIPT features not yet ported:
|
||||
|
||||
-- Str operations: normalisation with union-find representatives and
|
||||
cache migration, balanced tree maintenance, drop left/right with
|
||||
caching, substitution, indexed access, iteration, ToList caching,
|
||||
simplification, derivative computation, structural equality with
|
||||
associative hashing, rotation equality, expression reconstruction,
|
||||
Graphviz export.
|
||||
|
||||
-- StrToken subclasses: SymCharToken, StrAtToken, SubStrToken,
|
||||
SetToken, PostToken/PreToken.
|
||||
|
||||
-- StrToken features: Nielsen-style GetDecomposition with side
|
||||
constraints, NamedStrToken extension tracking for variable
|
||||
splitting with PowerExtension, CollectSymbols for Parikh analysis,
|
||||
MinTerms for character class analysis, token ordering, Derivable
|
||||
and BasicRegex flags.
|
||||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/region.h"
|
||||
#include "util/statistics.h"
|
||||
#include "ast/ast.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
#include "ast/euf/euf_snode.h"
|
||||
#include "ast/euf/euf_egraph.h"
|
||||
|
||||
namespace euf {
|
||||
|
||||
class seq_plugin;
|
||||
|
||||
class sgraph {
|
||||
|
||||
struct stats {
|
||||
unsigned m_num_nodes;
|
||||
unsigned m_num_concat;
|
||||
unsigned m_num_power;
|
||||
unsigned m_num_hash_hits;
|
||||
stats() { reset(); }
|
||||
void reset() { memset(this, 0, sizeof(*this)); }
|
||||
};
|
||||
|
||||
ast_manager& m;
|
||||
seq_util m_seq;
|
||||
seq_rewriter m_rewriter;
|
||||
egraph& m_egraph;
|
||||
region m_region;
|
||||
snode_vector m_nodes;
|
||||
sort_ref m_str_sort; // cached string sort
|
||||
unsigned_vector m_scopes;
|
||||
unsigned m_num_scopes = 0;
|
||||
stats m_stats;
|
||||
bool m_add_plugin; // whether sgraph created the seq_plugin
|
||||
|
||||
// tracks allocated subst caches for cleanup
|
||||
ptr_vector<snode_subst_cache> m_subst_caches;
|
||||
|
||||
// maps expression id to snode
|
||||
ptr_vector<snode> m_expr2snode;
|
||||
|
||||
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
|
||||
snode_kind classify(expr* e) const;
|
||||
void compute_metadata(snode* n);
|
||||
void compute_hash_matrix(snode* n);
|
||||
void collect_re_predicates(snode* re, expr_ref_vector& preds);
|
||||
|
||||
public:
|
||||
sgraph(ast_manager& m, egraph& eg, bool add_plugin = true);
|
||||
~sgraph();
|
||||
|
||||
ast_manager& get_manager() const { return m; }
|
||||
seq_util& get_seq_util() { return m_seq; }
|
||||
egraph& get_egraph() { return m_egraph; }
|
||||
egraph const& get_egraph() const { return m_egraph; }
|
||||
|
||||
// register an expression and return its snode
|
||||
snode* mk(expr* e);
|
||||
|
||||
// lookup an already-registered expression
|
||||
snode* find(expr* e) const;
|
||||
|
||||
// register expression in both sgraph and egraph
|
||||
enode* mk_enode(expr* e);
|
||||
|
||||
// factory methods for creating snodes with corresponding expressions
|
||||
snode* mk_var(symbol const& name);
|
||||
snode* mk_char(unsigned ch);
|
||||
snode* mk_empty();
|
||||
snode* mk_concat(snode* a, snode* b);
|
||||
|
||||
// drop operations: remove tokens from the front/back of a concat tree
|
||||
snode* drop_first(snode* n);
|
||||
snode* drop_last(snode* n);
|
||||
snode* drop_left(snode* n, unsigned count);
|
||||
snode* drop_right(snode* n, unsigned count);
|
||||
|
||||
// substitution: replace all occurrences of var in n by replacement
|
||||
snode* subst(snode* n, snode* var, snode* replacement);
|
||||
|
||||
// Brzozowski derivative of regex re with respect to element elem
|
||||
snode* brzozowski_deriv(snode* re, snode* elem);
|
||||
|
||||
// compute minterms (character class partition) from a regex
|
||||
void compute_minterms(snode* re, snode_vector& minterms);
|
||||
|
||||
// scope management for backtracking
|
||||
void push();
|
||||
void pop(unsigned num_scopes);
|
||||
|
||||
// access
|
||||
snode_vector const& nodes() const { return m_nodes; }
|
||||
unsigned num_nodes() const { return m_nodes.size(); }
|
||||
|
||||
// display
|
||||
std::ostream& display(std::ostream& out) const;
|
||||
|
||||
void collect_statistics(statistics& st) const;
|
||||
};
|
||||
|
||||
}
|
||||
201
src/ast/euf/euf_snode.h
Normal file
201
src/ast/euf/euf_snode.h
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
euf_snode.h
|
||||
|
||||
Abstract:
|
||||
|
||||
snode layer for sequence/string graph
|
||||
|
||||
Encapsulates strings in the style of euf_enode.h.
|
||||
Maps Z3 sequence expressions to a ZIPT-style representation where
|
||||
strings are composed of tokens (characters, variables, powers, regex, etc.)
|
||||
organized as a binary tree of concatenations.
|
||||
|
||||
Author:
|
||||
|
||||
Nikolaj Bjorner (nbjorner) 2026-03-01
|
||||
Clemens Eisenhofer 2026-03-01
|
||||
|
||||
--*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/vector.h"
|
||||
#include "util/region.h"
|
||||
#include "ast/ast.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
|
||||
namespace euf {
|
||||
|
||||
class sgraph;
|
||||
class snode;
|
||||
struct snode_subst_cache;
|
||||
|
||||
typedef ptr_vector<snode> snode_vector;
|
||||
|
||||
enum class snode_kind {
|
||||
s_empty, // empty string (OP_SEQ_EMPTY or empty string constant)
|
||||
s_char, // concrete character unit (OP_SEQ_UNIT wrapping a char literal)
|
||||
s_var, // string variable (uninterpreted constant of string sort)
|
||||
s_unit, // generic unit (OP_SEQ_UNIT with non-literal character)
|
||||
s_concat, // concatenation of two snodes (OP_SEQ_CONCAT)
|
||||
s_power, // string exponentiation s^n (OP_SEQ_POWER)
|
||||
s_star, // Kleene star r* (OP_RE_STAR)
|
||||
s_loop, // bounded loop r{lo,hi} (OP_RE_LOOP)
|
||||
s_union, // union r1|r2 (OP_RE_UNION)
|
||||
s_intersect, // intersection r1&r2 (OP_RE_INTERSECT)
|
||||
s_complement, // complement ~r (OP_RE_COMPLEMENT)
|
||||
s_fail, // empty language (OP_RE_EMPTY_SET)
|
||||
s_full_char, // full character set (OP_RE_FULL_CHAR_SET)
|
||||
s_full_seq, // full sequence set r=.* (OP_RE_FULL_SEQ_SET)
|
||||
s_to_re, // string to regex (OP_SEQ_TO_RE)
|
||||
s_in_re, // regex membership (OP_SEQ_IN_RE)
|
||||
s_other, // other sequence expression not directly classified
|
||||
};
|
||||
|
||||
class snode {
|
||||
expr* m_expr = nullptr;
|
||||
snode_kind m_kind = snode_kind::s_other;
|
||||
unsigned m_id = UINT_MAX;
|
||||
unsigned m_num_args = 0;
|
||||
|
||||
// metadata flags, analogous to ZIPT's Str/StrToken properties
|
||||
bool m_ground = true; // no uninterpreted string variables
|
||||
bool m_regex_free = true; // no regex constructs
|
||||
bool m_nullable = false; // accepts the empty string
|
||||
unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons)
|
||||
unsigned m_length = 0; // token count, number of leaf tokens in the tree
|
||||
|
||||
// hash matrix for associativity-respecting hashing (2x2 polynomial hash matrix)
|
||||
// all zeros means not cached, non-zero means cached
|
||||
unsigned m_hash_matrix[2][2] = {{0,0},{0,0}};
|
||||
|
||||
// substitution cache (lazy-initialized, owned by sgraph)
|
||||
snode_subst_cache* m_subst_cache = nullptr;
|
||||
|
||||
snode* m_args[0]; // variable-length array, allocated via get_snode_size(num_args)
|
||||
|
||||
friend class sgraph;
|
||||
|
||||
static unsigned get_snode_size(unsigned num_args) {
|
||||
return sizeof(snode) + num_args * sizeof(snode*);
|
||||
}
|
||||
|
||||
static snode* mk(region& r, expr* e, snode_kind k, unsigned id, unsigned num_args, snode* const* args) {
|
||||
void* mem = r.allocate(get_snode_size(num_args));
|
||||
snode* n = new (mem) snode();
|
||||
n->m_expr = e;
|
||||
n->m_kind = k;
|
||||
n->m_id = id;
|
||||
n->m_num_args = num_args;
|
||||
for (unsigned i = 0; i < num_args; ++i)
|
||||
n->m_args[i] = args[i];
|
||||
return n;
|
||||
}
|
||||
|
||||
public:
|
||||
expr* get_expr() const { return m_expr; }
|
||||
snode_kind kind() const { return m_kind; }
|
||||
unsigned id() const { return m_id; }
|
||||
unsigned num_args() const { return m_num_args; }
|
||||
snode* arg(unsigned i) const { SASSERT(i < m_num_args); return m_args[i]; }
|
||||
|
||||
bool is_ground() const { return m_ground; }
|
||||
bool is_regex_free() const { return m_regex_free; }
|
||||
bool is_nullable() const { return m_nullable; }
|
||||
unsigned level() const { return m_level; }
|
||||
unsigned length() const { return m_length; }
|
||||
|
||||
// associativity-respecting hash: cached if the 2x2 matrix is non-zero.
|
||||
// M[0][0] = HASH_BASE^(num_leaves) which is always nonzero since HASH_BASE
|
||||
// is odd and gcd(odd, 2^32) = 1, so the check is safe.
|
||||
bool has_cached_hash() const { return m_hash_matrix[0][0] != 0; }
|
||||
unsigned assoc_hash() const { return m_hash_matrix[0][1]; }
|
||||
|
||||
bool is_empty() const { return m_kind == snode_kind::s_empty; }
|
||||
bool is_char() const { return m_kind == snode_kind::s_char; }
|
||||
bool is_var() const { return m_kind == snode_kind::s_var; }
|
||||
bool is_unit() const { return m_kind == snode_kind::s_unit; }
|
||||
bool is_concat() const { return m_kind == snode_kind::s_concat; }
|
||||
bool is_power() const { return m_kind == snode_kind::s_power; }
|
||||
bool is_star() const { return m_kind == snode_kind::s_star; }
|
||||
bool is_loop() const { return m_kind == snode_kind::s_loop; }
|
||||
bool is_union() const { return m_kind == snode_kind::s_union; }
|
||||
bool is_intersect() const { return m_kind == snode_kind::s_intersect; }
|
||||
bool is_complement() const { return m_kind == snode_kind::s_complement; }
|
||||
bool is_fail() const { return m_kind == snode_kind::s_fail; }
|
||||
bool is_full_char() const { return m_kind == snode_kind::s_full_char; }
|
||||
bool is_full_seq() const { return m_kind == snode_kind::s_full_seq; }
|
||||
bool is_to_re() const { return m_kind == snode_kind::s_to_re; }
|
||||
bool is_in_re() const { return m_kind == snode_kind::s_in_re; }
|
||||
|
||||
// is this a leaf token (analogous to ZIPT's StrToken as opposed to Str)
|
||||
bool is_token() const {
|
||||
switch (m_kind) {
|
||||
case snode_kind::s_empty:
|
||||
case snode_kind::s_concat:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// analogous to ZIPT's Str.First / Str.Last
|
||||
snode const* first() const {
|
||||
snode const* s = this;
|
||||
while (s->is_concat())
|
||||
s = s->arg(0);
|
||||
return s;
|
||||
}
|
||||
|
||||
snode const* last() const {
|
||||
snode const* s = this;
|
||||
while (s->is_concat())
|
||||
s = s->arg(1);
|
||||
return s;
|
||||
}
|
||||
|
||||
snode* first() {
|
||||
snode* s = this;
|
||||
while (s->is_concat())
|
||||
s = s->arg(0);
|
||||
return s;
|
||||
}
|
||||
|
||||
snode* last() {
|
||||
snode* s = this;
|
||||
while (s->is_concat())
|
||||
s = s->arg(1);
|
||||
return s;
|
||||
}
|
||||
|
||||
// collect all leaf tokens in left-to-right order
|
||||
void collect_tokens(snode_vector& tokens) const {
|
||||
if (is_concat()) {
|
||||
arg(0)->collect_tokens(tokens);
|
||||
arg(1)->collect_tokens(tokens);
|
||||
}
|
||||
else if (!is_empty()) {
|
||||
tokens.push_back(const_cast<snode*>(this));
|
||||
}
|
||||
}
|
||||
|
||||
// access the i-th token (0-based, left-to-right order)
|
||||
// returns nullptr if i >= length()
|
||||
snode* at(unsigned i) const {
|
||||
if (is_concat()) {
|
||||
unsigned left_len = arg(0)->length();
|
||||
if (i < left_len)
|
||||
return arg(0)->at(i);
|
||||
return arg(1)->at(i - left_len);
|
||||
}
|
||||
if (is_empty())
|
||||
return nullptr;
|
||||
return i == 0 ? const_cast<snode*>(this) : nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
|
@ -222,6 +222,7 @@ void seq_decl_plugin::init() {
|
|||
m_sigs[OP_SEQ_NTH_I] = alloc(psig, m, "seq.nth_i", 1, 2, seqAintT, A);
|
||||
m_sigs[OP_SEQ_NTH_U] = alloc(psig, m, "seq.nth_u", 1, 2, seqAintT, A);
|
||||
m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq.len", 1, 1, &seqA, intT);
|
||||
m_sigs[OP_SEQ_POWER] = alloc(psig, m, "seq.power", 1, 2, seqAintT, seqA);
|
||||
m_sigs[OP_RE_PLUS] = alloc(psig, m, "re.+", 1, 1, &reA, reA);
|
||||
m_sigs[OP_RE_STAR] = alloc(psig, m, "re.*", 1, 1, &reA, reA);
|
||||
m_sigs[OP_RE_OPTION] = alloc(psig, m, "re.opt", 1, 1, &reA, reA);
|
||||
|
|
@ -591,6 +592,10 @@ func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, p
|
|||
add_map_sig();
|
||||
return mk_str_fun(k, arity, domain, range, k);
|
||||
|
||||
case OP_SEQ_POWER:
|
||||
match(*m_sigs[k], arity, domain, range, rng);
|
||||
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
|
||||
|
||||
case OP_SEQ_TO_RE:
|
||||
m_has_re = true;
|
||||
return mk_seq_fun(k, arity, domain, range, _OP_STRING_TO_REGEXP);
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ enum seq_op_kind {
|
|||
OP_SEQ_MAPI, // Array[Int,A,B] -> Int -> Seq[A] -> Seq[B]
|
||||
OP_SEQ_FOLDL, // Array[B,A,B] -> B -> Seq[A] -> B
|
||||
OP_SEQ_FOLDLI, // Array[Int,B,A,B] -> Int -> B -> Seq[A] -> B
|
||||
OP_SEQ_POWER, // Seq -> Int -> Seq, string exponentiation s^n
|
||||
|
||||
OP_RE_PLUS,
|
||||
OP_RE_STAR,
|
||||
|
|
@ -307,6 +308,7 @@ public:
|
|||
app* mk_mapi(expr* f, expr* i, expr* s) const { expr* es[3] = { f, i, s }; return m.mk_app(m_fid, OP_SEQ_MAPI, 3, es); }
|
||||
app* mk_foldl(expr* f, expr* b, expr* s) const { expr* es[3] = { f, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDL, 3, es); }
|
||||
app* mk_foldli(expr* f, expr* i, expr* b, expr* s) const { expr* es[4] = { f, i, b, s }; return m.mk_app(m_fid, OP_SEQ_FOLDLI, 4, es); }
|
||||
app* mk_power(expr* s, expr* n) const { expr* es[2] = { s, n }; return m.mk_app(m_fid, OP_SEQ_POWER, 2, es); }
|
||||
|
||||
app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); }
|
||||
app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); }
|
||||
|
|
@ -348,6 +350,7 @@ public:
|
|||
bool is_mapi(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_MAPI); }
|
||||
bool is_foldl(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_FOLDL); }
|
||||
bool is_foldli(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_FOLDLI); }
|
||||
bool is_power(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_POWER); }
|
||||
bool is_extract(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_EXTRACT); }
|
||||
bool is_contains(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_CONTAINS); }
|
||||
bool is_at(expr const* n) const { return is_app_of(n, m_fid, OP_SEQ_AT); }
|
||||
|
|
@ -404,6 +407,7 @@ public:
|
|||
MATCH_TERNARY(is_mapi);
|
||||
MATCH_TERNARY(is_foldl);
|
||||
MATCH_QUATARY(is_foldli);
|
||||
MATCH_BINARY(is_power);
|
||||
MATCH_BINARY(is_last_index);
|
||||
MATCH_TERNARY(is_replace);
|
||||
MATCH_TERNARY(is_replace_re);
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ add_executable(test-z3
|
|||
escaped.cpp
|
||||
euf_bv_plugin.cpp
|
||||
euf_arith_plugin.cpp
|
||||
euf_sgraph.cpp
|
||||
euf_seq_plugin.cpp
|
||||
ex.cpp
|
||||
expr_rand.cpp
|
||||
expr_substitution.cpp
|
||||
|
|
|
|||
240
src/test/euf_seq_plugin.cpp
Normal file
240
src/test/euf_seq_plugin.cpp
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
--*/
|
||||
|
||||
#include "util/util.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "ast/euf/euf_seq_plugin.h"
|
||||
#include "ast/euf/euf_egraph.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include <iostream>
|
||||
|
||||
static unsigned s_var = 0;
|
||||
|
||||
static euf::enode* get_node(euf::egraph& g, seq_util& seq, expr* e) {
|
||||
auto* n = g.find(e);
|
||||
if (n) return n;
|
||||
euf::enode_vector args;
|
||||
if (is_app(e))
|
||||
for (expr* arg : *to_app(e))
|
||||
args.push_back(get_node(g, seq, arg));
|
||||
n = g.mk(e, 0, args.size(), args.data());
|
||||
if (seq.is_seq(e) || seq.is_re(e))
|
||||
g.add_th_var(n, ++s_var, seq.get_family_id());
|
||||
return n;
|
||||
}
|
||||
|
||||
// test sgraph: basic classification and metadata
|
||||
static void test_sgraph_basic() {
|
||||
std::cout << "test_sgraph_basic\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref y(m.mk_const("y", str_sort), m);
|
||||
expr_ref empty(seq.str.mk_empty(str_sort), m);
|
||||
expr_ref xy(seq.str.mk_concat(x, y), m);
|
||||
|
||||
euf::snode* sx = sg.mk(x);
|
||||
SASSERT(sx);
|
||||
SASSERT(sx->is_var());
|
||||
SASSERT(!sx->is_ground());
|
||||
SASSERT(sx->is_regex_free());
|
||||
SASSERT(!sx->is_nullable());
|
||||
SASSERT(sx->length() == 1);
|
||||
|
||||
euf::snode* se = sg.mk(empty);
|
||||
SASSERT(se);
|
||||
SASSERT(se->is_empty());
|
||||
SASSERT(se->is_ground());
|
||||
SASSERT(se->is_nullable());
|
||||
SASSERT(se->length() == 0);
|
||||
|
||||
euf::snode* sxy = sg.mk(xy);
|
||||
SASSERT(sxy);
|
||||
SASSERT(sxy->is_concat());
|
||||
SASSERT(!sxy->is_ground());
|
||||
SASSERT(sxy->length() == 2);
|
||||
SASSERT(sxy->num_args() == 2);
|
||||
|
||||
std::cout << "sgraph:\n";
|
||||
sg.display(std::cout);
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
// test sgraph: backtracking with push/pop
|
||||
static void test_sgraph_backtrack() {
|
||||
std::cout << "test_sgraph_backtrack\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref y(m.mk_const("y", str_sort), m);
|
||||
|
||||
sg.mk(x);
|
||||
unsigned before = sg.num_nodes();
|
||||
|
||||
sg.push();
|
||||
|
||||
expr_ref xy(seq.str.mk_concat(x, y), m);
|
||||
sg.mk(xy);
|
||||
SASSERT(sg.num_nodes() > before);
|
||||
|
||||
sg.pop(1);
|
||||
// y and xy were created inside the scope, so some nodes should be removed
|
||||
// x was created before the scope, so it should persist
|
||||
SASSERT(sg.find(x));
|
||||
}
|
||||
|
||||
// test seq_plugin: concat associativity is normalized by the plugin
|
||||
static void test_seq_plugin_assoc() {
|
||||
std::cout << "test_seq_plugin_assoc\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
euf::egraph& g = sg.get_egraph();
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref a(m.mk_const("a", str_sort), m);
|
||||
expr_ref b(m.mk_const("b", str_sort), m);
|
||||
expr_ref c(m.mk_const("c", str_sort), m);
|
||||
|
||||
// register nodes in egraph
|
||||
// concat(concat(a,b),c) should be merged with concat(a,concat(b,c))
|
||||
expr_ref ab(seq.str.mk_concat(a, b), m);
|
||||
expr_ref ab_c(seq.str.mk_concat(ab, c), m);
|
||||
|
||||
euf::enode* nab_c = get_node(g, seq, ab_c);
|
||||
g.propagate();
|
||||
|
||||
// the plugin should have created a right-associated form and merged
|
||||
std::cout << g << "\n";
|
||||
}
|
||||
|
||||
// test seq_plugin: empty string elimination
|
||||
static void test_seq_plugin_empty() {
|
||||
std::cout << "test_seq_plugin_empty\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
euf::egraph& g = sg.get_egraph();
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref empty(seq.str.mk_empty(str_sort), m);
|
||||
expr_ref xe(seq.str.mk_concat(x, empty), m);
|
||||
|
||||
auto* nxe = get_node(g, seq, xe);
|
||||
auto* nx = g.find(x);
|
||||
g.propagate();
|
||||
|
||||
// concat(x, empty) should be merged with x
|
||||
SASSERT(nxe->get_root() == nx->get_root());
|
||||
std::cout << g << "\n";
|
||||
}
|
||||
|
||||
// test seq_plugin: Kleene star merging
|
||||
// The seq_plugin detects when star bodies are congruent
|
||||
// This tests the same_star_body logic at the regex level
|
||||
static void test_seq_plugin_star_merge() {
|
||||
std::cout << "test_seq_plugin_star_merge\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
euf::egraph& g = sg.get_egraph();
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
// re.star(to_re(x))
|
||||
expr_ref to_re_x(seq.re.mk_to_re(x), m);
|
||||
expr_ref star_x(seq.re.mk_star(to_re_x), m);
|
||||
// use regex concat for star * star
|
||||
expr_ref star_star(seq.re.mk_concat(star_x, star_x), m);
|
||||
|
||||
// register in sgraph
|
||||
sg.mk(star_star);
|
||||
euf::snode* s = sg.find(star_x);
|
||||
SASSERT(s && s->is_star());
|
||||
SASSERT(s->is_nullable());
|
||||
|
||||
std::cout << g << "\n";
|
||||
}
|
||||
|
||||
// test seq_plugin: nullable absorption by .*
|
||||
// concat(.*, nullable) should merge with .*
|
||||
static void test_seq_plugin_nullable_absorb() {
|
||||
std::cout << "test_seq_plugin_nullable_absorb\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
euf::egraph& g = sg.get_egraph();
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref empty(seq.str.mk_empty(str_sort), m);
|
||||
// concat(x, empty) = x (empty is nullable, exercises nullable check)
|
||||
expr_ref xe(seq.str.mk_concat(x, empty), m);
|
||||
|
||||
auto* nxe = get_node(g, seq, xe);
|
||||
auto* nx = g.find(x);
|
||||
g.propagate();
|
||||
|
||||
// concat(x, empty) should be merged with x (empty elimination)
|
||||
SASSERT(nxe->get_root() == nx->get_root());
|
||||
std::cout << g << "\n";
|
||||
}
|
||||
|
||||
// test sgraph owns egraph and syncs push/pop
|
||||
static void test_sgraph_egraph_sync() {
|
||||
std::cout << "test_sgraph_egraph_sync\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
euf::egraph& g = sg.get_egraph();
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref y(m.mk_const("y", str_sort), m);
|
||||
auto* nx = get_node(g, seq, x);
|
||||
auto* ny = get_node(g, seq, y);
|
||||
|
||||
sg.push();
|
||||
|
||||
g.merge(nx, ny, nullptr);
|
||||
g.propagate();
|
||||
SASSERT(nx->get_root() == ny->get_root());
|
||||
|
||||
sg.pop(1);
|
||||
// after pop, the merge should be undone
|
||||
SASSERT(nx->get_root() != ny->get_root());
|
||||
}
|
||||
|
||||
void tst_euf_seq_plugin() {
|
||||
s_var = 0; test_sgraph_basic();
|
||||
s_var = 0; test_sgraph_backtrack();
|
||||
s_var = 0; test_seq_plugin_assoc();
|
||||
s_var = 0; test_seq_plugin_empty();
|
||||
s_var = 0; test_seq_plugin_star_merge();
|
||||
s_var = 0; test_seq_plugin_nullable_absorb();
|
||||
s_var = 0; test_sgraph_egraph_sync();
|
||||
}
|
||||
816
src/test/euf_sgraph.cpp
Normal file
816
src/test/euf_sgraph.cpp
Normal file
|
|
@ -0,0 +1,816 @@
|
|||
/*++
|
||||
Copyright (c) 2026 Microsoft Corporation
|
||||
|
||||
Module Name:
|
||||
|
||||
euf_sgraph.cpp
|
||||
|
||||
Abstract:
|
||||
|
||||
Self-contained unit tests for the sgraph string graph layer.
|
||||
Tests snode classification, metadata computation, push/pop
|
||||
backtracking, associativity-respecting hash table, compound
|
||||
node construction, and snode navigation.
|
||||
|
||||
--*/
|
||||
|
||||
#include "util/util.h"
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "ast/reg_decl_plugins.h"
|
||||
#include "ast/ast_pp.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
// test classification and metadata for basic string nodes:
|
||||
// variables, empty strings, characters, units, and concats
|
||||
static void test_sgraph_classify() {
|
||||
std::cout << "test_sgraph_classify\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
// string variable
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
euf::snode* sx = sg.mk(x);
|
||||
SASSERT(sx && sx->is_var());
|
||||
SASSERT(!sx->is_ground());
|
||||
SASSERT(sx->is_regex_free());
|
||||
SASSERT(!sx->is_nullable());
|
||||
SASSERT(sx->level() == 1);
|
||||
SASSERT(sx->length() == 1);
|
||||
SASSERT(sx->is_token());
|
||||
|
||||
// empty string
|
||||
expr_ref empty(seq.str.mk_empty(str_sort), m);
|
||||
euf::snode* se = sg.mk(empty);
|
||||
SASSERT(se && se->is_empty());
|
||||
SASSERT(se->is_ground());
|
||||
SASSERT(se->is_nullable());
|
||||
SASSERT(se->level() == 0);
|
||||
SASSERT(se->length() == 0);
|
||||
SASSERT(!se->is_token());
|
||||
|
||||
// character unit with literal char
|
||||
expr_ref ch(seq.str.mk_char('A'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch), m);
|
||||
euf::snode* sca = sg.mk(unit_a);
|
||||
SASSERT(sca && sca->is_char());
|
||||
SASSERT(sca->is_ground());
|
||||
SASSERT(!sca->is_nullable());
|
||||
SASSERT(sca->level() == 1);
|
||||
SASSERT(sca->length() == 1);
|
||||
SASSERT(sca->is_token());
|
||||
|
||||
// concat of two variables
|
||||
expr_ref y(m.mk_const("y", str_sort), m);
|
||||
expr_ref xy(seq.str.mk_concat(x, y), m);
|
||||
euf::snode* sxy = sg.mk(xy);
|
||||
SASSERT(sxy && sxy->is_concat());
|
||||
SASSERT(!sxy->is_ground());
|
||||
SASSERT(sxy->is_regex_free());
|
||||
SASSERT(!sxy->is_nullable());
|
||||
SASSERT(sxy->level() == 2);
|
||||
SASSERT(sxy->length() == 2);
|
||||
SASSERT(sxy->num_args() == 2);
|
||||
SASSERT(!sxy->is_token());
|
||||
|
||||
sg.display(std::cout);
|
||||
}
|
||||
|
||||
// test classification for regex nodes:
|
||||
// star, union, intersection, complement, full_seq, full_char, fail, to_re, in_re
|
||||
static void test_sgraph_regex() {
|
||||
std::cout << "test_sgraph_regex\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
|
||||
// to_re
|
||||
expr_ref to_re_x(seq.re.mk_to_re(x), m);
|
||||
euf::snode* str = sg.mk(to_re_x);
|
||||
SASSERT(str && str->is_to_re());
|
||||
SASSERT(!str->is_regex_free());
|
||||
SASSERT(!str->is_nullable()); // to_re(x) nullable iff x nullable, x is var so not nullable
|
||||
SASSERT(str->num_args() == 1);
|
||||
|
||||
// star
|
||||
expr_ref star_x(seq.re.mk_star(to_re_x), m);
|
||||
euf::snode* ss = sg.mk(star_x);
|
||||
SASSERT(ss && ss->is_star());
|
||||
SASSERT(!ss->is_regex_free());
|
||||
SASSERT(ss->is_nullable()); // star is always nullable
|
||||
SASSERT(ss->num_args() == 1);
|
||||
|
||||
// full_seq (.*)
|
||||
expr_ref full_seq(seq.re.mk_full_seq(str_sort), m);
|
||||
euf::snode* sfs = sg.mk(full_seq);
|
||||
SASSERT(sfs && sfs->is_full_seq());
|
||||
SASSERT(sfs->is_ground());
|
||||
SASSERT(sfs->is_nullable());
|
||||
|
||||
// full_char (.)
|
||||
expr_ref full_char(seq.re.mk_full_char(str_sort), m);
|
||||
euf::snode* sfc = sg.mk(full_char);
|
||||
SASSERT(sfc && sfc->is_full_char());
|
||||
SASSERT(sfc->is_ground());
|
||||
SASSERT(!sfc->is_nullable());
|
||||
|
||||
// empty set, fail
|
||||
sort_ref re_sort(seq.re.mk_re(str_sort), m);
|
||||
expr_ref empty_set(seq.re.mk_empty(re_sort), m);
|
||||
euf::snode* sfail = sg.mk(empty_set);
|
||||
SASSERT(sfail && sfail->is_fail());
|
||||
SASSERT(!sfail->is_nullable());
|
||||
|
||||
// union: to_re(x) | star(to_re(x)), nullable because star is
|
||||
expr_ref re_union(seq.re.mk_union(to_re_x, star_x), m);
|
||||
euf::snode* su = sg.mk(re_union);
|
||||
SASSERT(su && su->is_union());
|
||||
SASSERT(su->is_nullable()); // star_x is nullable
|
||||
|
||||
// intersection: to_re(x) & star(to_re(x)), nullable only if both are
|
||||
expr_ref re_inter(seq.re.mk_inter(to_re_x, star_x), m);
|
||||
euf::snode* si = sg.mk(re_inter);
|
||||
SASSERT(si && si->is_intersect());
|
||||
SASSERT(!si->is_nullable()); // to_re(x) is not nullable
|
||||
|
||||
// complement of to_re(x): nullable because to_re(x) is not nullable
|
||||
expr_ref re_comp(seq.re.mk_complement(to_re_x), m);
|
||||
euf::snode* sc = sg.mk(re_comp);
|
||||
SASSERT(sc && sc->is_complement());
|
||||
SASSERT(sc->is_nullable()); // complement of non-nullable is nullable
|
||||
|
||||
// in_re
|
||||
expr_ref in_re(seq.re.mk_in_re(x, star_x), m);
|
||||
euf::snode* sir = sg.mk(in_re);
|
||||
SASSERT(sir && sir->is_in_re());
|
||||
SASSERT(!sir->is_regex_free());
|
||||
|
||||
sg.display(std::cout);
|
||||
}
|
||||
|
||||
// test re.concat is classified as s_concat (not s_other)
|
||||
// and loop nullability with lower bound 0
|
||||
static void test_sgraph_re_concat_and_loop() {
|
||||
std::cout << "test_sgraph_re_concat_and_loop\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref to_re_x(seq.re.mk_to_re(x), m);
|
||||
|
||||
// re.concat should be classified as s_concat
|
||||
expr_ref re_a(seq.re.mk_to_re(x), m);
|
||||
sort_ref re_sort(seq.re.mk_re(str_sort), m);
|
||||
expr_ref ch_a(seq.str.mk_char('a'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
expr_ref re_unit_a(seq.re.mk_to_re(unit_a), m);
|
||||
expr_ref star_x(seq.re.mk_star(to_re_x), m);
|
||||
|
||||
// re.concat(star_x, re_unit_a): should be classified as s_concat
|
||||
expr_ref re_cat(seq.re.mk_concat(star_x, re_unit_a), m);
|
||||
euf::snode* src = sg.mk(re_cat);
|
||||
SASSERT(src && src->is_concat()); // fix 1: re.concat must be s_concat
|
||||
|
||||
// loop with lower bound 0: r{0,5} should be nullable
|
||||
expr_ref loop_0(seq.re.mk_loop(star_x, 0, 5), m);
|
||||
euf::snode* sl0 = sg.mk(loop_0);
|
||||
SASSERT(sl0 && sl0->is_loop());
|
||||
SASSERT(sl0->is_nullable()); // fix 2: lo=0 => nullable
|
||||
|
||||
// loop with lower bound 1: r{1,5} should NOT be nullable
|
||||
expr_ref loop_1(seq.re.mk_loop(star_x, 1, 5), m);
|
||||
euf::snode* sl1 = sg.mk(loop_1);
|
||||
SASSERT(sl1 && sl1->is_loop());
|
||||
SASSERT(!sl1->is_nullable()); // lo=1 => not nullable (body star is nullable but lo>=1)
|
||||
|
||||
// loop with lower bound 0 (single-arg form): r{0} should be nullable
|
||||
expr_ref loop_0s(seq.re.mk_loop(star_x, 0u), m);
|
||||
euf::snode* sl0s = sg.mk(loop_0s);
|
||||
SASSERT(sl0s && sl0s->is_loop());
|
||||
SASSERT(sl0s->is_nullable()); // fix 2: lo=0 => nullable
|
||||
|
||||
sg.display(std::cout);
|
||||
}
|
||||
|
||||
// test power node classification and metadata
|
||||
static void test_sgraph_power() {
|
||||
std::cout << "test_sgraph_power\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
arith_util arith(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref n(arith.mk_int(3), m);
|
||||
expr_ref xn(seq.str.mk_power(x, n), m);
|
||||
|
||||
euf::snode* sp = sg.mk(xn);
|
||||
SASSERT(sp && sp->is_power());
|
||||
SASSERT(!sp->is_ground()); // base x is not ground
|
||||
SASSERT(sp->is_regex_free());
|
||||
SASSERT(!sp->is_nullable()); // base x is not nullable
|
||||
SASSERT(sp->num_args() >= 1);
|
||||
|
||||
sg.display(std::cout);
|
||||
}
|
||||
|
||||
// test push/pop backtracking: nodes created inside a scope
|
||||
// are removed on pop, nodes before persist
|
||||
static void test_sgraph_push_pop() {
|
||||
std::cout << "test_sgraph_push_pop\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref y(m.mk_const("y", str_sort), m);
|
||||
|
||||
// create x before any scope
|
||||
sg.mk(x);
|
||||
unsigned before = sg.num_nodes();
|
||||
SASSERT(sg.find(x));
|
||||
|
||||
sg.push();
|
||||
|
||||
// create y and concat(x,y) inside scope
|
||||
expr_ref xy(seq.str.mk_concat(x, y), m);
|
||||
sg.mk(xy);
|
||||
SASSERT(sg.num_nodes() > before);
|
||||
SASSERT(sg.find(y));
|
||||
SASSERT(sg.find(xy));
|
||||
|
||||
sg.pop(1);
|
||||
|
||||
// x persists, y and xy removed
|
||||
SASSERT(sg.find(x));
|
||||
SASSERT(!sg.find(y));
|
||||
SASSERT(!sg.find(xy));
|
||||
SASSERT(sg.num_nodes() == before);
|
||||
}
|
||||
|
||||
// test nested push/pop with multiple scopes
|
||||
static void test_sgraph_nested_scopes() {
|
||||
std::cout << "test_sgraph_nested_scopes\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref a(m.mk_const("a", str_sort), m);
|
||||
expr_ref b(m.mk_const("b", str_sort), m);
|
||||
expr_ref c(m.mk_const("c", str_sort), m);
|
||||
|
||||
sg.mk(a);
|
||||
unsigned n0 = sg.num_nodes();
|
||||
|
||||
sg.push();
|
||||
sg.mk(b);
|
||||
unsigned n1 = sg.num_nodes();
|
||||
|
||||
sg.push();
|
||||
sg.mk(c);
|
||||
unsigned n2 = sg.num_nodes();
|
||||
SASSERT(n2 > n1 && n1 > n0);
|
||||
|
||||
// pop inner scope, c goes away
|
||||
sg.pop(1);
|
||||
SASSERT(sg.num_nodes() == n1);
|
||||
SASSERT(sg.find(a));
|
||||
SASSERT(sg.find(b));
|
||||
SASSERT(!sg.find(c));
|
||||
|
||||
// pop outer scope, b goes away
|
||||
sg.pop(1);
|
||||
SASSERT(sg.num_nodes() == n0);
|
||||
SASSERT(sg.find(a));
|
||||
SASSERT(!sg.find(b));
|
||||
}
|
||||
|
||||
// test that find returns the same snode for the same expression
|
||||
static void test_sgraph_find_idempotent() {
|
||||
std::cout << "test_sgraph_find_idempotent\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
euf::snode* s1 = sg.mk(x);
|
||||
euf::snode* s2 = sg.mk(x); // calling mk again returns same node
|
||||
SASSERT(s1 == s2);
|
||||
SASSERT(s1 == sg.find(x));
|
||||
}
|
||||
|
||||
// test mk_concat: empty absorption, node construction via mk(concat_expr)
|
||||
static void test_sgraph_mk_concat() {
|
||||
std::cout << "test_sgraph_mk_concat\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref y(m.mk_const("y", str_sort), m);
|
||||
expr_ref empty(seq.str.mk_empty(str_sort), m);
|
||||
|
||||
euf::snode* sx = sg.mk(x);
|
||||
euf::snode* sy = sg.mk(y);
|
||||
euf::snode* se = sg.mk(empty);
|
||||
|
||||
// concat with empty yields the non-empty side at sgraph level
|
||||
SASSERT(se && se->is_empty());
|
||||
|
||||
// normal concat via expression
|
||||
expr_ref xy(seq.str.mk_concat(x, y), m);
|
||||
euf::snode* sxy = sg.mk(xy);
|
||||
SASSERT(sxy && sxy->is_concat());
|
||||
SASSERT(sxy->num_args() == 2);
|
||||
SASSERT(sxy->arg(0) == sx);
|
||||
SASSERT(sxy->arg(1) == sy);
|
||||
|
||||
// calling mk again with same expr returns same node
|
||||
euf::snode* sxy2 = sg.mk(xy);
|
||||
SASSERT(sxy == sxy2);
|
||||
}
|
||||
|
||||
// test power node construction via mk(power_expr)
|
||||
static void test_sgraph_mk_power() {
|
||||
std::cout << "test_sgraph_mk_power\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
arith_util arith(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref n(arith.mk_int(5), m);
|
||||
expr_ref xn(seq.str.mk_power(x, n), m);
|
||||
|
||||
euf::snode* sx = sg.mk(x);
|
||||
euf::snode* sp = sg.mk(xn);
|
||||
SASSERT(sp && sp->is_power());
|
||||
SASSERT(sp->num_args() == 2);
|
||||
SASSERT(sp->arg(0) == sx);
|
||||
|
||||
// calling mk again returns same node
|
||||
euf::snode* sp2 = sg.mk(xn);
|
||||
SASSERT(sp == sp2);
|
||||
}
|
||||
|
||||
// test snode first/last navigation on concat trees
|
||||
static void test_sgraph_first_last() {
|
||||
std::cout << "test_sgraph_first_last\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref a(m.mk_const("a", str_sort), m);
|
||||
expr_ref b(m.mk_const("b", str_sort), m);
|
||||
expr_ref c(m.mk_const("c", str_sort), m);
|
||||
|
||||
euf::snode* sa = sg.mk(a);
|
||||
euf::snode* sb = sg.mk(b);
|
||||
euf::snode* sc = sg.mk(c);
|
||||
|
||||
// concat(concat(a,b),c): first=a, last=c
|
||||
expr_ref ab(seq.str.mk_concat(a, b), m);
|
||||
expr_ref ab_c(seq.str.mk_concat(ab, c), m);
|
||||
euf::snode* sab_c = sg.mk(ab_c);
|
||||
SASSERT(sab_c->first() == sa);
|
||||
SASSERT(sab_c->last() == sc);
|
||||
|
||||
// concat(a,concat(b,c)): first=a, last=c
|
||||
expr_ref bc(seq.str.mk_concat(b, c), m);
|
||||
expr_ref a_bc(seq.str.mk_concat(a, bc), m);
|
||||
euf::snode* sa_bc = sg.mk(a_bc);
|
||||
SASSERT(sa_bc->first() == sa);
|
||||
SASSERT(sa_bc->last() == sc);
|
||||
|
||||
// single node: first and last are self
|
||||
SASSERT(sa->first() == sa);
|
||||
SASSERT(sa->last() == sa);
|
||||
}
|
||||
|
||||
// test concat metadata propagation:
|
||||
// ground, regex_free, nullable, level, length
|
||||
static void test_sgraph_concat_metadata() {
|
||||
std::cout << "test_sgraph_concat_metadata\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref empty(seq.str.mk_empty(str_sort), m);
|
||||
expr_ref ch(seq.str.mk_char('Z'), m);
|
||||
expr_ref unit_z(seq.str.mk_unit(ch), m);
|
||||
|
||||
euf::snode* sx = sg.mk(x);
|
||||
euf::snode* se = sg.mk(empty);
|
||||
euf::snode* sz = sg.mk(unit_z);
|
||||
|
||||
// concat(x, unit('Z')): not ground (x is variable), regex_free, not nullable
|
||||
expr_ref xz(seq.str.mk_concat(x, unit_z), m);
|
||||
euf::snode* sxz = sg.mk(xz);
|
||||
SASSERT(!sxz->is_ground());
|
||||
SASSERT(sxz->is_regex_free());
|
||||
SASSERT(!sxz->is_nullable());
|
||||
SASSERT(sxz->length() == 2);
|
||||
SASSERT(sxz->level() == 2);
|
||||
|
||||
// concat(empty, empty): nullable (both empty)
|
||||
expr_ref empty2(seq.str.mk_concat(empty, empty), m);
|
||||
euf::snode* see = sg.mk(empty2);
|
||||
SASSERT(see->is_nullable());
|
||||
SASSERT(see->is_ground());
|
||||
SASSERT(see->length() == 0);
|
||||
|
||||
// deep chain: concat(concat(x,x),concat(x,x)) has level 3, length 4
|
||||
expr_ref xx(seq.str.mk_concat(x, x), m);
|
||||
expr_ref xxxx(seq.str.mk_concat(xx, xx), m);
|
||||
euf::snode* sxxxx = sg.mk(xxxx);
|
||||
SASSERT(sxxxx->level() == 3);
|
||||
SASSERT(sxxxx->length() == 4);
|
||||
}
|
||||
|
||||
// test display does not crash
|
||||
static void test_sgraph_display() {
|
||||
std::cout << "test_sgraph_display\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref x(m.mk_const("x", str_sort), m);
|
||||
expr_ref y(m.mk_const("y", str_sort), m);
|
||||
expr_ref xy(seq.str.mk_concat(x, y), m);
|
||||
sg.mk(xy);
|
||||
|
||||
std::ostringstream oss;
|
||||
sg.display(oss);
|
||||
std::string out = oss.str();
|
||||
SASSERT(out.find("var") != std::string::npos);
|
||||
SASSERT(out.find("concat") != std::string::npos);
|
||||
std::cout << out;
|
||||
}
|
||||
|
||||
// test sgraph factory methods: mk_var, mk_char, mk_empty, mk_concat
|
||||
static void test_sgraph_factory() {
|
||||
std::cout << "test_sgraph_factory\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
|
||||
// mk_var
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
SASSERT(x && x->is_var());
|
||||
SASSERT(!x->is_ground());
|
||||
SASSERT(x->length() == 1);
|
||||
|
||||
// mk_char
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
SASSERT(a && a->is_char());
|
||||
SASSERT(a->is_ground());
|
||||
SASSERT(a->length() == 1);
|
||||
|
||||
// mk_empty
|
||||
euf::snode* e = sg.mk_empty();
|
||||
SASSERT(e && e->is_empty());
|
||||
SASSERT(e->is_nullable());
|
||||
SASSERT(e->length() == 0);
|
||||
|
||||
// mk_concat with empty absorption
|
||||
euf::snode* xe = sg.mk_concat(x, e);
|
||||
SASSERT(xe == x);
|
||||
euf::snode* ex = sg.mk_concat(e, x);
|
||||
SASSERT(ex == x);
|
||||
|
||||
// mk_concat of two variables
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* xy = sg.mk_concat(x, y);
|
||||
SASSERT(xy && xy->is_concat());
|
||||
SASSERT(xy->length() == 2);
|
||||
SASSERT(xy->arg(0) == x);
|
||||
SASSERT(xy->arg(1) == y);
|
||||
|
||||
// mk_concat of multiple characters
|
||||
euf::snode* b = sg.mk_char('B');
|
||||
euf::snode* c = sg.mk_char('C');
|
||||
euf::snode* abc = sg.mk_concat(sg.mk_concat(a, b), c);
|
||||
SASSERT(abc->length() == 3);
|
||||
SASSERT(abc->is_ground());
|
||||
SASSERT(abc->first() == a);
|
||||
SASSERT(abc->last() == c);
|
||||
}
|
||||
|
||||
// test snode::at() and snode::collect_tokens()
|
||||
static void test_sgraph_indexing() {
|
||||
std::cout << "test_sgraph_indexing\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* b = sg.mk_char('B');
|
||||
euf::snode* c = sg.mk_char('C');
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
|
||||
// build concat(concat(a, b), concat(c, x)) => [A, B, C, x]
|
||||
euf::snode* ab = sg.mk_concat(a, b);
|
||||
euf::snode* cx = sg.mk_concat(c, x);
|
||||
euf::snode* abcx = sg.mk_concat(ab, cx);
|
||||
|
||||
SASSERT(abcx->length() == 4);
|
||||
|
||||
// test at()
|
||||
SASSERT(abcx->at(0) == a);
|
||||
SASSERT(abcx->at(1) == b);
|
||||
SASSERT(abcx->at(2) == c);
|
||||
SASSERT(abcx->at(3) == x);
|
||||
SASSERT(abcx->at(4) == nullptr); // out of bounds
|
||||
|
||||
// test collect_tokens()
|
||||
euf::snode_vector tokens;
|
||||
abcx->collect_tokens(tokens);
|
||||
SASSERT(tokens.size() == 4);
|
||||
SASSERT(tokens[0] == a);
|
||||
SASSERT(tokens[1] == b);
|
||||
SASSERT(tokens[2] == c);
|
||||
SASSERT(tokens[3] == x);
|
||||
|
||||
// single token: at(0) is self
|
||||
SASSERT(a->at(0) == a);
|
||||
SASSERT(a->at(1) == nullptr);
|
||||
|
||||
// empty: at(0) is nullptr
|
||||
euf::snode* e = sg.mk_empty();
|
||||
SASSERT(e->at(0) == nullptr);
|
||||
euf::snode_vector empty_tokens;
|
||||
e->collect_tokens(empty_tokens);
|
||||
SASSERT(empty_tokens.empty());
|
||||
}
|
||||
|
||||
// test sgraph drop operations
|
||||
static void test_sgraph_drop() {
|
||||
std::cout << "test_sgraph_drop\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* b = sg.mk_char('B');
|
||||
euf::snode* c = sg.mk_char('C');
|
||||
euf::snode* d = sg.mk_char('D');
|
||||
|
||||
// build concat(concat(a, b), concat(c, d)) => [A, B, C, D]
|
||||
euf::snode* ab = sg.mk_concat(a, b);
|
||||
euf::snode* cd = sg.mk_concat(c, d);
|
||||
euf::snode* abcd = sg.mk_concat(ab, cd);
|
||||
|
||||
SASSERT(abcd->length() == 4);
|
||||
|
||||
// drop_first: [A, B, C, D] => [B, C, D]
|
||||
euf::snode* bcd = sg.drop_first(abcd);
|
||||
SASSERT(bcd->length() == 3);
|
||||
SASSERT(bcd->first() == b);
|
||||
SASSERT(bcd->last() == d);
|
||||
|
||||
// drop_last: [A, B, C, D] => [A, B, C]
|
||||
euf::snode* abc = sg.drop_last(abcd);
|
||||
SASSERT(abc->length() == 3);
|
||||
SASSERT(abc->first() == a);
|
||||
SASSERT(abc->last() == c);
|
||||
|
||||
// drop_left(2): [A, B, C, D] => [C, D]
|
||||
euf::snode* cd2 = sg.drop_left(abcd, 2);
|
||||
SASSERT(cd2->length() == 2);
|
||||
SASSERT(cd2->first() == c);
|
||||
|
||||
// drop_right(2): [A, B, C, D] => [A, B]
|
||||
euf::snode* ab2 = sg.drop_right(abcd, 2);
|
||||
SASSERT(ab2->length() == 2);
|
||||
SASSERT(ab2->last() == b);
|
||||
|
||||
// drop all: [A, B, C, D] => empty
|
||||
euf::snode* empty = sg.drop_left(abcd, 4);
|
||||
SASSERT(empty->is_empty());
|
||||
|
||||
// drop from single token: [A] => empty
|
||||
euf::snode* e = sg.drop_first(a);
|
||||
SASSERT(e->is_empty());
|
||||
|
||||
// drop from empty: no change
|
||||
euf::snode* ee = sg.drop_first(sg.mk_empty());
|
||||
SASSERT(ee->is_empty());
|
||||
}
|
||||
|
||||
// test sgraph substitution
|
||||
static void test_sgraph_subst() {
|
||||
std::cout << "test_sgraph_subst\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* b = sg.mk_char('B');
|
||||
|
||||
// concat(x, concat(a, x)) with x -> b gives concat(b, concat(a, b))
|
||||
euf::snode* ax = sg.mk_concat(a, x);
|
||||
euf::snode* xax = sg.mk_concat(x, ax);
|
||||
SASSERT(xax->length() == 3);
|
||||
|
||||
euf::snode* result = sg.subst(xax, x, b);
|
||||
SASSERT(result->length() == 3);
|
||||
SASSERT(result->first() == b);
|
||||
SASSERT(result->last() == b);
|
||||
SASSERT(result->at(1) == a); // middle is still 'A'
|
||||
|
||||
// substitution of non-occurring variable is identity
|
||||
euf::snode* same = sg.subst(xax, y, b);
|
||||
SASSERT(same == xax);
|
||||
|
||||
// substitution of variable with empty
|
||||
euf::snode* e = sg.mk_empty();
|
||||
euf::snode* collapsed = sg.subst(xax, x, e);
|
||||
SASSERT(collapsed->length() == 1); // just 'a' remains
|
||||
SASSERT(collapsed == a);
|
||||
}
|
||||
|
||||
// test complex concatenation creation, merging and simplification
|
||||
static void test_sgraph_complex_concat() {
|
||||
std::cout << "test_sgraph_complex_concat\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
|
||||
// build a string "HELLO" = concat(H, concat(E, concat(L, concat(L, O))))
|
||||
euf::snode* h = sg.mk_char('H');
|
||||
euf::snode* e = sg.mk_char('E');
|
||||
euf::snode* l = sg.mk_char('L');
|
||||
euf::snode* o = sg.mk_char('O');
|
||||
|
||||
euf::snode* lo = sg.mk_concat(l, o);
|
||||
euf::snode* llo = sg.mk_concat(l, lo);
|
||||
euf::snode* ello = sg.mk_concat(e, llo);
|
||||
euf::snode* hello = sg.mk_concat(h, ello);
|
||||
|
||||
SASSERT(hello->length() == 5);
|
||||
SASSERT(hello->is_ground());
|
||||
SASSERT(hello->first() == h);
|
||||
SASSERT(hello->last() == o);
|
||||
|
||||
// index into "HELLO"
|
||||
SASSERT(hello->at(0) == h);
|
||||
SASSERT(hello->at(1) == e);
|
||||
SASSERT(hello->at(2) == l);
|
||||
SASSERT(hello->at(3) == l);
|
||||
SASSERT(hello->at(4) == o);
|
||||
|
||||
// drop first 2 from "HELLO" => "LLO"
|
||||
euf::snode* llo2 = sg.drop_left(hello, 2);
|
||||
SASSERT(llo2->length() == 3);
|
||||
SASSERT(llo2->first() == l);
|
||||
|
||||
// drop last 3 from "HELLO" => "HE"
|
||||
euf::snode* he = sg.drop_right(hello, 3);
|
||||
SASSERT(he->length() == 2);
|
||||
SASSERT(he->first() == h);
|
||||
SASSERT(he->last() == e);
|
||||
|
||||
// mixed variables and characters: concat(x, "AB", y)
|
||||
euf::snode* x = sg.mk_var(symbol("x"));
|
||||
euf::snode* y = sg.mk_var(symbol("y"));
|
||||
euf::snode* a = sg.mk_char('A');
|
||||
euf::snode* b = sg.mk_char('B');
|
||||
euf::snode* ab = sg.mk_concat(a, b);
|
||||
euf::snode* xab = sg.mk_concat(x, ab);
|
||||
euf::snode* xaby = sg.mk_concat(xab, y);
|
||||
|
||||
SASSERT(xaby->length() == 4);
|
||||
SASSERT(!xaby->is_ground());
|
||||
SASSERT(xaby->at(0) == x);
|
||||
SASSERT(xaby->at(1) == a);
|
||||
SASSERT(xaby->at(2) == b);
|
||||
SASSERT(xaby->at(3) == y);
|
||||
}
|
||||
|
||||
// test Brzozowski derivative computation
|
||||
static void test_sgraph_brzozowski() {
|
||||
std::cout << "test_sgraph_brzozowski\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
// derivative of re.star(to_re("a")) w.r.t. 'a'
|
||||
// d/da (a*) = a*
|
||||
expr_ref ch_a(seq.str.mk_char('a'), m);
|
||||
expr_ref unit_a(seq.str.mk_unit(ch_a), m);
|
||||
expr_ref to_re_a(seq.re.mk_to_re(unit_a), m);
|
||||
expr_ref star_a(seq.re.mk_star(to_re_a), m);
|
||||
|
||||
euf::snode* s_star_a = sg.mk(star_a);
|
||||
euf::snode* s_unit_a = sg.mk(unit_a);
|
||||
|
||||
euf::snode* deriv = sg.brzozowski_deriv(s_star_a, s_unit_a);
|
||||
SASSERT(deriv != nullptr);
|
||||
std::cout << " d/da(a*) kind: " << (int)deriv->kind() << "\n";
|
||||
|
||||
// derivative of re.empty w.r.t. 'a' should be re.empty
|
||||
sort_ref re_sort(seq.re.mk_re(str_sort), m);
|
||||
expr_ref re_empty(seq.re.mk_empty(re_sort), m);
|
||||
euf::snode* s_empty = sg.mk(re_empty);
|
||||
euf::snode* deriv_empty = sg.brzozowski_deriv(s_empty, s_unit_a);
|
||||
SASSERT(deriv_empty != nullptr);
|
||||
SASSERT(deriv_empty->is_fail()); // derivative of empty set is empty set
|
||||
std::cout << " d/da(empty) kind: " << (int)deriv_empty->kind() << "\n";
|
||||
|
||||
sg.display(std::cout);
|
||||
}
|
||||
|
||||
// test minterm computation
|
||||
static void test_sgraph_minterms() {
|
||||
std::cout << "test_sgraph_minterms\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::egraph eg(m);
|
||||
euf::sgraph sg(m, eg);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
// simple regex with no character predicates: re.all (.*)
|
||||
expr_ref re_all(seq.re.mk_full_seq(str_sort), m);
|
||||
euf::snode* s_re_all = sg.mk(re_all);
|
||||
|
||||
euf::snode_vector minterms;
|
||||
sg.compute_minterms(s_re_all, minterms);
|
||||
// no predicates => single minterm (full_char)
|
||||
SASSERT(minterms.size() == 1);
|
||||
std::cout << " re.all minterms: " << minterms.size() << "\n";
|
||||
}
|
||||
|
||||
void tst_euf_sgraph() {
|
||||
test_sgraph_classify();
|
||||
test_sgraph_regex();
|
||||
test_sgraph_re_concat_and_loop();
|
||||
test_sgraph_power();
|
||||
test_sgraph_push_pop();
|
||||
test_sgraph_nested_scopes();
|
||||
test_sgraph_find_idempotent();
|
||||
test_sgraph_mk_concat();
|
||||
test_sgraph_mk_power();
|
||||
test_sgraph_first_last();
|
||||
test_sgraph_concat_metadata();
|
||||
test_sgraph_display();
|
||||
test_sgraph_factory();
|
||||
test_sgraph_indexing();
|
||||
test_sgraph_drop();
|
||||
test_sgraph_subst();
|
||||
test_sgraph_complex_concat();
|
||||
test_sgraph_brzozowski();
|
||||
test_sgraph_minterms();
|
||||
}
|
||||
|
|
@ -281,6 +281,8 @@ int main(int argc, char ** argv) {
|
|||
TST(distribution);
|
||||
TST(euf_bv_plugin);
|
||||
TST(euf_arith_plugin);
|
||||
TST(euf_sgraph);
|
||||
TST(euf_seq_plugin);
|
||||
TST(sls_test);
|
||||
TST(scoped_vector);
|
||||
TST(sls_seq_plugin);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue