diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index 43a4e2f5c..84f5de73c 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -23,47 +23,6 @@ Author: namespace euf { - // Associativity-respecting hash: flattens concat into leaf sequence - // concat(concat(a, b), c) and concat(a, concat(b, c)) hash identically - // Recursively flatten a concatenation tree into a sequence of leaf nodes. - // This produces the same leaf order regardless of tree associativity, - // so concat(concat(a,b),c) and concat(a,concat(b,c)) yield [a,b,c]. - static void collect_leaves(snode const* n, ptr_vector& leaves) { - if (n->is_concat()) { - collect_leaves(n->arg(0), leaves); - collect_leaves(n->arg(1), leaves); - } - else { - leaves.push_back(n); - } - } - - unsigned concat_hash::operator()(snode const* n) const { - if (!n->is_concat()) - return n->id(); - ptr_vector leaves; - collect_leaves(n, leaves); - unsigned h = 0; - for (snode const* l : leaves) - h = combine_hash(h, l->id()); - return h; - } - - bool concat_eq::operator()(snode const* a, snode const* b) const { - if (a == b) return true; - if (!a->is_concat() && !b->is_concat()) - return a->id() == b->id(); - ptr_vector la, lb; - collect_leaves(a, la); - collect_leaves(b, lb); - if (la.size() != lb.size()) - return false; - for (unsigned i = 0; i < la.size(); ++i) - if (la[i]->id() != lb[i]->id()) - return false; - return true; - } - sgraph::sgraph(ast_manager& m): m(m), m_seq(m), @@ -315,9 +274,6 @@ namespace euf { m_expr2snode.reserve(eid + 1, nullptr); m_expr2snode[eid] = n; } - // insert concats into the associativity-respecting hash table - if (k == snode_kind::s_concat) - m_concat_table.insert(n); ++m_stats.m_num_nodes; return n; } @@ -358,17 +314,6 @@ namespace euf { return nullptr; } - snode* sgraph::find_assoc_equal(snode* n) const { - if (!n || !n->is_concat()) - return nullptr; - snode* existing = nullptr; - // find returns true when a matching entry exists, - // check that it is a different node to report a genuine match - if (m_concat_table.find(n, existing) && existing != n) - return existing; - return nullptr; - } - enode* sgraph::mk_enode(expr* e) { enode* n = m_egraph.find(e); if (n) return n; @@ -396,8 +341,6 @@ namespace euf { unsigned old_sz = m_scopes[new_lvl]; for (unsigned i = m_nodes.size(); i-- > old_sz; ) { snode* n = m_nodes[i]; - if (n->is_concat()) - m_concat_table.remove(n); if (n->get_expr()) { unsigned eid = n->get_expr()->get_id(); if (eid < m_expr2snode.size()) diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index acc6cd154..60c1facfe 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -10,53 +10,38 @@ Abstract: Sequence/string graph layer Encapsulates string expressions in the style of euf_egraph.h. - The sgraph registers sequence expressions and classifies them - into a ZIPT-style representation of string tokens organized - as binary concatenation trees. + The sgraph maps Z3 sequence/regex AST expressions to snode structures + organized as binary concatenation trees with metadata, and owns an + egraph with a seq_plugin for congruence closure. - This provides a layer that maps Z3 AST expressions (from seq_decl_plugin) - to snode structures with metadata for ground, regex-free, nullable, etc. + Implemented: + -- snode classification: empty, char, variable, unit, concat, power, + star, loop, union, intersection, complement, fail, full_char, + full_seq, to_re, in_re, other. + -- Metadata computation: ground, regex_free, nullable, level, length. + -- Expression registration via mk, lookup via find. + -- Scope management: push/pop with backtracking. + -- egraph ownership with seq_plugin for concat associativity, + Kleene star merging, and nullable absorption. + -- enode registration via mk_enode. - ZIPT features not yet ported to snode/sgraph: - - -- Str operations (from StrManager): - Normalisation: union-find style representative tracking (Normalised/IsNormalised) - with cache migration (MoveCache) for equal strings with different tree structures. - Balanced tree maintenance: DegenerationLevel tracking with rebalancing - (Balanced/BalancedTrans properties on TupleStr). - Concat simplification: merging adjacent Kleene stars (u.v* + v*w = u.v*w), - merging adjacent loops (v{l1,h1} + v{l2,h2} = v{l1+l2,h1+h2}), - absorbing nullable tokens next to .* (u.* + vw = u.*w when v nullable). - Drop operations: DropLeft/DropRight for removing prefix/suffix tokens, - with caching (DropLeftCache/DropRightCache on TupleStr). - Substitution: Subst(var, replacement) and SubstChar operations with caching. - Indexed access: GetIndex/GetIndexFwd/GetIndexBwd for token-level random access. - Forward/reverse iteration: GetEnumerator/GetRevEnumerator over leaf tokens. - ToList with caching: flattened token array with ListCache on TupleStr. - Simplification: OptSimplify that unfolds constant powers and simplifies tokens. - Derivative computation: Derivative(CharacterSet, fwd) for regex derivative construction. - Equality: structural equality with associative hashing (TriangleMatrix-based rolling hash). - Rotation equality: RotationEquals for detecting cyclic permutations. - Expression reconstruction: ToExpr for converting back to Z3 AST. - Graphviz export: ToDot for debugging visualisation. - - -- StrToken subclasses not yet mapped: - SymCharToken: symbolic character variables (for regex unwinding). - StrAtToken: str.at(s, i) as a named token with parent tracking. - SubStrToken: str.substr(s, from, len) as a named token. - SetToken: character set ranges (used for regex character classes). - PostToken/PreToken: auxiliary regex position markers. - - -- StrToken features: - GetDecomposition: Nielsen-style prefix/postfix decomposition with - side constraints (IntConstraint) and variable substitutions (Subst). - NamedStrToken parent/child extension tracking (Extension1/Extension2) - for variable splitting (x = x'x'') with PowerExtension for x / u^n u'. - CollectSymbols: gathering non-terminals and alphabet for Parikh analysis. - MinTerms: FirstMinTerms/LastMinTerms for character class analysis. - Token ordering: StrTokenOrder for deterministic comparison. - Derivable flag: tracking which tokens support regex derivatives. - BasicRegex flag: distinguishing basic vs extended regex constructs. + ZIPT features not yet ported: + + -- Str operations: normalisation with union-find representatives and + cache migration, balanced tree maintenance, drop left/right with + caching, substitution, indexed access, iteration, ToList caching, + simplification, derivative computation, structural equality with + associative hashing, rotation equality, expression reconstruction, + Graphviz export. + + -- StrToken subclasses: SymCharToken, StrAtToken, SubStrToken, + SetToken, PostToken/PreToken. + + -- StrToken features: Nielsen-style GetDecomposition with side + constraints, NamedStrToken extension tracking for variable + splitting with PowerExtension, CollectSymbols for Parikh analysis, + MinTerms for character class analysis, token ordering, Derivable + and BasicRegex flags. Author: @@ -69,7 +54,6 @@ Author: #include "util/region.h" #include "util/statistics.h" -#include "util/hashtable.h" #include "ast/ast.h" #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_snode.h" @@ -79,19 +63,6 @@ namespace euf { class seq_plugin; - // Associativity-respecting hash for concatenations. - // The hash function flattens concat trees so that - // concat(concat(a, b), c) and concat(a, concat(b, c)) - // hash to the same value. This is how ZIPT ensures - // finding equal concatenations efficiently. - struct concat_hash { - unsigned operator()(snode const* n) const; - }; - - struct concat_eq { - bool operator()(snode const* a, snode const* b) const; - }; - class sgraph { struct stats { @@ -116,9 +87,6 @@ namespace euf { // maps expression id to snode ptr_vector m_expr2snode; - // hash table for finding equal concatenations modulo associativity - hashtable m_concat_table; - snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); snode_kind classify(expr* e) const; void compute_metadata(snode* n); @@ -138,9 +106,6 @@ namespace euf { // lookup an already-registered expression snode* find(expr* e) const; - // find an existing concat that is equal modulo associativity - snode* find_assoc_equal(snode* n) const; - // register expression in both sgraph and egraph enode* mk_enode(expr* e); diff --git a/src/test/euf_seq_plugin.cpp b/src/test/euf_seq_plugin.cpp index 2c8089479..e933d87c4 100644 --- a/src/test/euf_seq_plugin.cpp +++ b/src/test/euf_seq_plugin.cpp @@ -94,50 +94,6 @@ static void test_sgraph_backtrack() { SASSERT(sg.find(x)); } -// test assoc hash: concat(concat(a,b),c) hashes same as concat(a,concat(b,c)) -static void test_assoc_hash() { - std::cout << "test_assoc_hash\n"; - ast_manager m; - reg_decl_plugins(m); - euf::sgraph sg(m); - seq_util seq(m); - sort_ref str_sort(seq.str.mk_string_sort(), m); - - expr_ref a(m.mk_const("a", str_sort), m); - expr_ref b(m.mk_const("b", str_sort), m); - expr_ref c(m.mk_const("c", str_sort), m); - - euf::snode* sa = sg.mk(a); - euf::snode* sb = sg.mk(b); - euf::snode* sc = sg.mk(c); - - // build concat(concat(a,b),c) - expr_ref ab(seq.str.mk_concat(a, b), m); - expr_ref ab_c(seq.str.mk_concat(ab, c), m); - euf::snode* sab_c = sg.mk(ab_c); - - // build concat(a,concat(b,c)) - expr_ref bc(seq.str.mk_concat(b, c), m); - expr_ref a_bc(seq.str.mk_concat(a, bc), m); - euf::snode* sa_bc = sg.mk(a_bc); - - // they should hash to the same value via the assoc hash - euf::concat_hash h; - euf::concat_eq eq; - SASSERT(h(sab_c) == h(sa_bc)); - SASSERT(eq(sab_c, sa_bc)); - - // different concat should not be equal - expr_ref ac(seq.str.mk_concat(a, c), m); - expr_ref ac_b(seq.str.mk_concat(ac, b), m); - euf::snode* sac_b = sg.mk(ac_b); - SASSERT(!eq(sab_c, sac_b)); - - // find_assoc_equal should find the first with same leaves - euf::snode* found = sg.find_assoc_equal(sa_bc); - SASSERT(found == sab_c); -} - // test seq_plugin: concat associativity is normalized by the plugin static void test_seq_plugin_assoc() { std::cout << "test_seq_plugin_assoc\n"; @@ -270,7 +226,6 @@ static void test_sgraph_egraph_sync() { void tst_euf_seq_plugin() { s_var = 0; test_sgraph_basic(); s_var = 0; test_sgraph_backtrack(); - s_var = 0; test_assoc_hash(); s_var = 0; test_seq_plugin_assoc(); s_var = 0; test_seq_plugin_empty(); s_var = 0; test_seq_plugin_star_merge(); diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp index 3a6774799..7f7760e4e 100644 --- a/src/test/euf_sgraph.cpp +++ b/src/test/euf_sgraph.cpp @@ -330,91 +330,6 @@ static void test_sgraph_mk_power() { SASSERT(sp == sp2); } -// test associativity-respecting hash: concat trees with same -// leaf order hash and compare equal regardless of tree structure -static void test_sgraph_assoc_hash() { - std::cout << "test_sgraph_assoc_hash\n"; - ast_manager m; - reg_decl_plugins(m); - euf::sgraph sg(m); - seq_util seq(m); - sort_ref str_sort(seq.str.mk_string_sort(), m); - - expr_ref a(m.mk_const("a", str_sort), m); - expr_ref b(m.mk_const("b", str_sort), m); - expr_ref c(m.mk_const("c", str_sort), m); - - euf::snode* sa = sg.mk(a); - euf::snode* sb = sg.mk(b); - euf::snode* sc = sg.mk(c); - - // concat(concat(a,b),c) — left-associated - expr_ref ab(seq.str.mk_concat(a, b), m); - expr_ref ab_c(seq.str.mk_concat(ab, c), m); - euf::snode* sab_c = sg.mk(ab_c); - - // concat(a,concat(b,c)) — right-associated - expr_ref bc(seq.str.mk_concat(b, c), m); - expr_ref a_bc(seq.str.mk_concat(a, bc), m); - euf::snode* sa_bc = sg.mk(a_bc); - - // hash and equality should agree - euf::concat_hash h; - euf::concat_eq eq; - SASSERT(h(sab_c) == h(sa_bc)); - SASSERT(eq(sab_c, sa_bc)); - - // different leaf order should not be equal - expr_ref ac(seq.str.mk_concat(a, c), m); - expr_ref ac_b(seq.str.mk_concat(ac, b), m); - euf::snode* sac_b = sg.mk(ac_b); - SASSERT(!eq(sab_c, sac_b)); - - // find_assoc_equal finds existing node with same leaf sequence - euf::snode* found = sg.find_assoc_equal(sa_bc); - SASSERT(found == sab_c); -} - -// test that concat table is cleaned up on pop -static void test_sgraph_assoc_hash_backtrack() { - std::cout << "test_sgraph_assoc_hash_backtrack\n"; - ast_manager m; - reg_decl_plugins(m); - euf::sgraph sg(m); - seq_util seq(m); - sort_ref str_sort(seq.str.mk_string_sort(), m); - - expr_ref a(m.mk_const("a", str_sort), m); - expr_ref b(m.mk_const("b", str_sort), m); - expr_ref c(m.mk_const("c", str_sort), m); - - sg.mk(a); - sg.mk(b); - sg.mk(c); - - sg.push(); - - // create left-associated concat inside scope - expr_ref ab(seq.str.mk_concat(a, b), m); - expr_ref ab_c(seq.str.mk_concat(ab, c), m); - euf::snode* sab_c = sg.mk(ab_c); - - // build right-associated variant and find the match - expr_ref bc(seq.str.mk_concat(b, c), m); - expr_ref a_bc(seq.str.mk_concat(a, bc), m); - euf::snode* sa_bc = sg.mk(a_bc); - SASSERT(sg.find_assoc_equal(sa_bc) == sab_c); - - sg.pop(1); - - // after pop, the concats are gone - // recreate right-associated and check no match found - expr_ref bc2(seq.str.mk_concat(b, c), m); - expr_ref a_bc2(seq.str.mk_concat(a, bc2), m); - euf::snode* sa_bc2 = sg.mk(a_bc2); - SASSERT(sg.find_assoc_equal(sa_bc2) == nullptr); -} - // test snode first/last navigation on concat trees static void test_sgraph_first_last() { std::cout << "test_sgraph_first_last\n"; @@ -525,8 +440,6 @@ void tst_euf_sgraph() { test_sgraph_find_idempotent(); test_sgraph_mk_concat(); test_sgraph_mk_power(); - test_sgraph_assoc_hash(); - test_sgraph_assoc_hash_backtrack(); test_sgraph_first_last(); test_sgraph_concat_metadata(); test_sgraph_display();