mirror of
https://github.com/Z3Prover/z3
synced 2026-03-07 13:54:53 +00:00
Changes before error encountered
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
parent
a3baae5942
commit
8c0be6e614
4 changed files with 30 additions and 254 deletions
|
|
@ -23,47 +23,6 @@ Author:
|
|||
|
||||
namespace euf {
|
||||
|
||||
// Associativity-respecting hash: flattens concat into leaf sequence
|
||||
// concat(concat(a, b), c) and concat(a, concat(b, c)) hash identically
|
||||
// Recursively flatten a concatenation tree into a sequence of leaf nodes.
|
||||
// This produces the same leaf order regardless of tree associativity,
|
||||
// so concat(concat(a,b),c) and concat(a,concat(b,c)) yield [a,b,c].
|
||||
static void collect_leaves(snode const* n, ptr_vector<snode const>& leaves) {
|
||||
if (n->is_concat()) {
|
||||
collect_leaves(n->arg(0), leaves);
|
||||
collect_leaves(n->arg(1), leaves);
|
||||
}
|
||||
else {
|
||||
leaves.push_back(n);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned concat_hash::operator()(snode const* n) const {
|
||||
if (!n->is_concat())
|
||||
return n->id();
|
||||
ptr_vector<snode const> leaves;
|
||||
collect_leaves(n, leaves);
|
||||
unsigned h = 0;
|
||||
for (snode const* l : leaves)
|
||||
h = combine_hash(h, l->id());
|
||||
return h;
|
||||
}
|
||||
|
||||
bool concat_eq::operator()(snode const* a, snode const* b) const {
|
||||
if (a == b) return true;
|
||||
if (!a->is_concat() && !b->is_concat())
|
||||
return a->id() == b->id();
|
||||
ptr_vector<snode const> la, lb;
|
||||
collect_leaves(a, la);
|
||||
collect_leaves(b, lb);
|
||||
if (la.size() != lb.size())
|
||||
return false;
|
||||
for (unsigned i = 0; i < la.size(); ++i)
|
||||
if (la[i]->id() != lb[i]->id())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
sgraph::sgraph(ast_manager& m):
|
||||
m(m),
|
||||
m_seq(m),
|
||||
|
|
@ -315,9 +274,6 @@ namespace euf {
|
|||
m_expr2snode.reserve(eid + 1, nullptr);
|
||||
m_expr2snode[eid] = n;
|
||||
}
|
||||
// insert concats into the associativity-respecting hash table
|
||||
if (k == snode_kind::s_concat)
|
||||
m_concat_table.insert(n);
|
||||
++m_stats.m_num_nodes;
|
||||
return n;
|
||||
}
|
||||
|
|
@ -358,17 +314,6 @@ namespace euf {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
snode* sgraph::find_assoc_equal(snode* n) const {
|
||||
if (!n || !n->is_concat())
|
||||
return nullptr;
|
||||
snode* existing = nullptr;
|
||||
// find returns true when a matching entry exists,
|
||||
// check that it is a different node to report a genuine match
|
||||
if (m_concat_table.find(n, existing) && existing != n)
|
||||
return existing;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enode* sgraph::mk_enode(expr* e) {
|
||||
enode* n = m_egraph.find(e);
|
||||
if (n) return n;
|
||||
|
|
@ -396,8 +341,6 @@ namespace euf {
|
|||
unsigned old_sz = m_scopes[new_lvl];
|
||||
for (unsigned i = m_nodes.size(); i-- > old_sz; ) {
|
||||
snode* n = m_nodes[i];
|
||||
if (n->is_concat())
|
||||
m_concat_table.remove(n);
|
||||
if (n->get_expr()) {
|
||||
unsigned eid = n->get_expr()->get_id();
|
||||
if (eid < m_expr2snode.size())
|
||||
|
|
|
|||
|
|
@ -10,53 +10,38 @@ Abstract:
|
|||
Sequence/string graph layer
|
||||
|
||||
Encapsulates string expressions in the style of euf_egraph.h.
|
||||
The sgraph registers sequence expressions and classifies them
|
||||
into a ZIPT-style representation of string tokens organized
|
||||
as binary concatenation trees.
|
||||
The sgraph maps Z3 sequence/regex AST expressions to snode structures
|
||||
organized as binary concatenation trees with metadata, and owns an
|
||||
egraph with a seq_plugin for congruence closure.
|
||||
|
||||
This provides a layer that maps Z3 AST expressions (from seq_decl_plugin)
|
||||
to snode structures with metadata for ground, regex-free, nullable, etc.
|
||||
Implemented:
|
||||
-- snode classification: empty, char, variable, unit, concat, power,
|
||||
star, loop, union, intersection, complement, fail, full_char,
|
||||
full_seq, to_re, in_re, other.
|
||||
-- Metadata computation: ground, regex_free, nullable, level, length.
|
||||
-- Expression registration via mk, lookup via find.
|
||||
-- Scope management: push/pop with backtracking.
|
||||
-- egraph ownership with seq_plugin for concat associativity,
|
||||
Kleene star merging, and nullable absorption.
|
||||
-- enode registration via mk_enode.
|
||||
|
||||
ZIPT features not yet ported to snode/sgraph:
|
||||
|
||||
-- Str operations (from StrManager):
|
||||
Normalisation: union-find style representative tracking (Normalised/IsNormalised)
|
||||
with cache migration (MoveCache) for equal strings with different tree structures.
|
||||
Balanced tree maintenance: DegenerationLevel tracking with rebalancing
|
||||
(Balanced/BalancedTrans properties on TupleStr).
|
||||
Concat simplification: merging adjacent Kleene stars (u.v* + v*w = u.v*w),
|
||||
merging adjacent loops (v{l1,h1} + v{l2,h2} = v{l1+l2,h1+h2}),
|
||||
absorbing nullable tokens next to .* (u.* + vw = u.*w when v nullable).
|
||||
Drop operations: DropLeft/DropRight for removing prefix/suffix tokens,
|
||||
with caching (DropLeftCache/DropRightCache on TupleStr).
|
||||
Substitution: Subst(var, replacement) and SubstChar operations with caching.
|
||||
Indexed access: GetIndex/GetIndexFwd/GetIndexBwd for token-level random access.
|
||||
Forward/reverse iteration: GetEnumerator/GetRevEnumerator over leaf tokens.
|
||||
ToList with caching: flattened token array with ListCache on TupleStr.
|
||||
Simplification: OptSimplify that unfolds constant powers and simplifies tokens.
|
||||
Derivative computation: Derivative(CharacterSet, fwd) for regex derivative construction.
|
||||
Equality: structural equality with associative hashing (TriangleMatrix-based rolling hash).
|
||||
Rotation equality: RotationEquals for detecting cyclic permutations.
|
||||
Expression reconstruction: ToExpr for converting back to Z3 AST.
|
||||
Graphviz export: ToDot for debugging visualisation.
|
||||
|
||||
-- StrToken subclasses not yet mapped:
|
||||
SymCharToken: symbolic character variables (for regex unwinding).
|
||||
StrAtToken: str.at(s, i) as a named token with parent tracking.
|
||||
SubStrToken: str.substr(s, from, len) as a named token.
|
||||
SetToken: character set ranges (used for regex character classes).
|
||||
PostToken/PreToken: auxiliary regex position markers.
|
||||
|
||||
-- StrToken features:
|
||||
GetDecomposition: Nielsen-style prefix/postfix decomposition with
|
||||
side constraints (IntConstraint) and variable substitutions (Subst).
|
||||
NamedStrToken parent/child extension tracking (Extension1/Extension2)
|
||||
for variable splitting (x = x'x'') with PowerExtension for x / u^n u'.
|
||||
CollectSymbols: gathering non-terminals and alphabet for Parikh analysis.
|
||||
MinTerms: FirstMinTerms/LastMinTerms for character class analysis.
|
||||
Token ordering: StrTokenOrder for deterministic comparison.
|
||||
Derivable flag: tracking which tokens support regex derivatives.
|
||||
BasicRegex flag: distinguishing basic vs extended regex constructs.
|
||||
ZIPT features not yet ported:
|
||||
|
||||
-- Str operations: normalisation with union-find representatives and
|
||||
cache migration, balanced tree maintenance, drop left/right with
|
||||
caching, substitution, indexed access, iteration, ToList caching,
|
||||
simplification, derivative computation, structural equality with
|
||||
associative hashing, rotation equality, expression reconstruction,
|
||||
Graphviz export.
|
||||
|
||||
-- StrToken subclasses: SymCharToken, StrAtToken, SubStrToken,
|
||||
SetToken, PostToken/PreToken.
|
||||
|
||||
-- StrToken features: Nielsen-style GetDecomposition with side
|
||||
constraints, NamedStrToken extension tracking for variable
|
||||
splitting with PowerExtension, CollectSymbols for Parikh analysis,
|
||||
MinTerms for character class analysis, token ordering, Derivable
|
||||
and BasicRegex flags.
|
||||
|
||||
Author:
|
||||
|
||||
|
|
@ -69,7 +54,6 @@ Author:
|
|||
|
||||
#include "util/region.h"
|
||||
#include "util/statistics.h"
|
||||
#include "util/hashtable.h"
|
||||
#include "ast/ast.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/euf/euf_snode.h"
|
||||
|
|
@ -79,19 +63,6 @@ namespace euf {
|
|||
|
||||
class seq_plugin;
|
||||
|
||||
// Associativity-respecting hash for concatenations.
|
||||
// The hash function flattens concat trees so that
|
||||
// concat(concat(a, b), c) and concat(a, concat(b, c))
|
||||
// hash to the same value. This is how ZIPT ensures
|
||||
// finding equal concatenations efficiently.
|
||||
struct concat_hash {
|
||||
unsigned operator()(snode const* n) const;
|
||||
};
|
||||
|
||||
struct concat_eq {
|
||||
bool operator()(snode const* a, snode const* b) const;
|
||||
};
|
||||
|
||||
class sgraph {
|
||||
|
||||
struct stats {
|
||||
|
|
@ -116,9 +87,6 @@ namespace euf {
|
|||
// maps expression id to snode
|
||||
ptr_vector<snode> m_expr2snode;
|
||||
|
||||
// hash table for finding equal concatenations modulo associativity
|
||||
hashtable<snode*, concat_hash, concat_eq> m_concat_table;
|
||||
|
||||
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
|
||||
snode_kind classify(expr* e) const;
|
||||
void compute_metadata(snode* n);
|
||||
|
|
@ -138,9 +106,6 @@ namespace euf {
|
|||
// lookup an already-registered expression
|
||||
snode* find(expr* e) const;
|
||||
|
||||
// find an existing concat that is equal modulo associativity
|
||||
snode* find_assoc_equal(snode* n) const;
|
||||
|
||||
// register expression in both sgraph and egraph
|
||||
enode* mk_enode(expr* e);
|
||||
|
||||
|
|
|
|||
|
|
@ -94,50 +94,6 @@ static void test_sgraph_backtrack() {
|
|||
SASSERT(sg.find(x));
|
||||
}
|
||||
|
||||
// test assoc hash: concat(concat(a,b),c) hashes same as concat(a,concat(b,c))
|
||||
static void test_assoc_hash() {
|
||||
std::cout << "test_assoc_hash\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::sgraph sg(m);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref a(m.mk_const("a", str_sort), m);
|
||||
expr_ref b(m.mk_const("b", str_sort), m);
|
||||
expr_ref c(m.mk_const("c", str_sort), m);
|
||||
|
||||
euf::snode* sa = sg.mk(a);
|
||||
euf::snode* sb = sg.mk(b);
|
||||
euf::snode* sc = sg.mk(c);
|
||||
|
||||
// build concat(concat(a,b),c)
|
||||
expr_ref ab(seq.str.mk_concat(a, b), m);
|
||||
expr_ref ab_c(seq.str.mk_concat(ab, c), m);
|
||||
euf::snode* sab_c = sg.mk(ab_c);
|
||||
|
||||
// build concat(a,concat(b,c))
|
||||
expr_ref bc(seq.str.mk_concat(b, c), m);
|
||||
expr_ref a_bc(seq.str.mk_concat(a, bc), m);
|
||||
euf::snode* sa_bc = sg.mk(a_bc);
|
||||
|
||||
// they should hash to the same value via the assoc hash
|
||||
euf::concat_hash h;
|
||||
euf::concat_eq eq;
|
||||
SASSERT(h(sab_c) == h(sa_bc));
|
||||
SASSERT(eq(sab_c, sa_bc));
|
||||
|
||||
// different concat should not be equal
|
||||
expr_ref ac(seq.str.mk_concat(a, c), m);
|
||||
expr_ref ac_b(seq.str.mk_concat(ac, b), m);
|
||||
euf::snode* sac_b = sg.mk(ac_b);
|
||||
SASSERT(!eq(sab_c, sac_b));
|
||||
|
||||
// find_assoc_equal should find the first with same leaves
|
||||
euf::snode* found = sg.find_assoc_equal(sa_bc);
|
||||
SASSERT(found == sab_c);
|
||||
}
|
||||
|
||||
// test seq_plugin: concat associativity is normalized by the plugin
|
||||
static void test_seq_plugin_assoc() {
|
||||
std::cout << "test_seq_plugin_assoc\n";
|
||||
|
|
@ -270,7 +226,6 @@ static void test_sgraph_egraph_sync() {
|
|||
void tst_euf_seq_plugin() {
|
||||
s_var = 0; test_sgraph_basic();
|
||||
s_var = 0; test_sgraph_backtrack();
|
||||
s_var = 0; test_assoc_hash();
|
||||
s_var = 0; test_seq_plugin_assoc();
|
||||
s_var = 0; test_seq_plugin_empty();
|
||||
s_var = 0; test_seq_plugin_star_merge();
|
||||
|
|
|
|||
|
|
@ -330,91 +330,6 @@ static void test_sgraph_mk_power() {
|
|||
SASSERT(sp == sp2);
|
||||
}
|
||||
|
||||
// test associativity-respecting hash: concat trees with same
|
||||
// leaf order hash and compare equal regardless of tree structure
|
||||
static void test_sgraph_assoc_hash() {
|
||||
std::cout << "test_sgraph_assoc_hash\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::sgraph sg(m);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref a(m.mk_const("a", str_sort), m);
|
||||
expr_ref b(m.mk_const("b", str_sort), m);
|
||||
expr_ref c(m.mk_const("c", str_sort), m);
|
||||
|
||||
euf::snode* sa = sg.mk(a);
|
||||
euf::snode* sb = sg.mk(b);
|
||||
euf::snode* sc = sg.mk(c);
|
||||
|
||||
// concat(concat(a,b),c) — left-associated
|
||||
expr_ref ab(seq.str.mk_concat(a, b), m);
|
||||
expr_ref ab_c(seq.str.mk_concat(ab, c), m);
|
||||
euf::snode* sab_c = sg.mk(ab_c);
|
||||
|
||||
// concat(a,concat(b,c)) — right-associated
|
||||
expr_ref bc(seq.str.mk_concat(b, c), m);
|
||||
expr_ref a_bc(seq.str.mk_concat(a, bc), m);
|
||||
euf::snode* sa_bc = sg.mk(a_bc);
|
||||
|
||||
// hash and equality should agree
|
||||
euf::concat_hash h;
|
||||
euf::concat_eq eq;
|
||||
SASSERT(h(sab_c) == h(sa_bc));
|
||||
SASSERT(eq(sab_c, sa_bc));
|
||||
|
||||
// different leaf order should not be equal
|
||||
expr_ref ac(seq.str.mk_concat(a, c), m);
|
||||
expr_ref ac_b(seq.str.mk_concat(ac, b), m);
|
||||
euf::snode* sac_b = sg.mk(ac_b);
|
||||
SASSERT(!eq(sab_c, sac_b));
|
||||
|
||||
// find_assoc_equal finds existing node with same leaf sequence
|
||||
euf::snode* found = sg.find_assoc_equal(sa_bc);
|
||||
SASSERT(found == sab_c);
|
||||
}
|
||||
|
||||
// test that concat table is cleaned up on pop
|
||||
static void test_sgraph_assoc_hash_backtrack() {
|
||||
std::cout << "test_sgraph_assoc_hash_backtrack\n";
|
||||
ast_manager m;
|
||||
reg_decl_plugins(m);
|
||||
euf::sgraph sg(m);
|
||||
seq_util seq(m);
|
||||
sort_ref str_sort(seq.str.mk_string_sort(), m);
|
||||
|
||||
expr_ref a(m.mk_const("a", str_sort), m);
|
||||
expr_ref b(m.mk_const("b", str_sort), m);
|
||||
expr_ref c(m.mk_const("c", str_sort), m);
|
||||
|
||||
sg.mk(a);
|
||||
sg.mk(b);
|
||||
sg.mk(c);
|
||||
|
||||
sg.push();
|
||||
|
||||
// create left-associated concat inside scope
|
||||
expr_ref ab(seq.str.mk_concat(a, b), m);
|
||||
expr_ref ab_c(seq.str.mk_concat(ab, c), m);
|
||||
euf::snode* sab_c = sg.mk(ab_c);
|
||||
|
||||
// build right-associated variant and find the match
|
||||
expr_ref bc(seq.str.mk_concat(b, c), m);
|
||||
expr_ref a_bc(seq.str.mk_concat(a, bc), m);
|
||||
euf::snode* sa_bc = sg.mk(a_bc);
|
||||
SASSERT(sg.find_assoc_equal(sa_bc) == sab_c);
|
||||
|
||||
sg.pop(1);
|
||||
|
||||
// after pop, the concats are gone
|
||||
// recreate right-associated and check no match found
|
||||
expr_ref bc2(seq.str.mk_concat(b, c), m);
|
||||
expr_ref a_bc2(seq.str.mk_concat(a, bc2), m);
|
||||
euf::snode* sa_bc2 = sg.mk(a_bc2);
|
||||
SASSERT(sg.find_assoc_equal(sa_bc2) == nullptr);
|
||||
}
|
||||
|
||||
// test snode first/last navigation on concat trees
|
||||
static void test_sgraph_first_last() {
|
||||
std::cout << "test_sgraph_first_last\n";
|
||||
|
|
@ -525,8 +440,6 @@ void tst_euf_sgraph() {
|
|||
test_sgraph_find_idempotent();
|
||||
test_sgraph_mk_concat();
|
||||
test_sgraph_mk_power();
|
||||
test_sgraph_assoc_hash();
|
||||
test_sgraph_assoc_hash_backtrack();
|
||||
test_sgraph_first_last();
|
||||
test_sgraph_concat_metadata();
|
||||
test_sgraph_display();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue