3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2025-10-25 00:44:37 +00:00

Merge pull request #5302 from rocallahan/commutative-hash

Improve commutative hashing.
This commit is contained in:
Emil J 2025-08-20 10:43:00 +02:00 committed by GitHub
commit ba8af7ad8f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 100 additions and 7 deletions

View file

@ -12,6 +12,7 @@
#ifndef HASHLIB_H #ifndef HASHLIB_H
#define HASHLIB_H #define HASHLIB_H
#include <array>
#include <stdexcept> #include <stdexcept>
#include <algorithm> #include <algorithm>
#include <optional> #include <optional>
@ -127,6 +128,7 @@ private:
*this = hash_ops<T>::hash_into(t, *this); *this = hash_ops<T>::hash_into(t, *this);
} }
[[deprecated]]
void commutative_eat(hash_t t) { void commutative_eat(hash_t t) {
state ^= t; state ^= t;
} }
@ -356,6 +358,29 @@ template<typename K, int offset = 0, typename OPS = hash_ops<K>> class idict;
template<typename K, typename OPS = hash_ops<K>> class pool; template<typename K, typename OPS = hash_ops<K>> class pool;
template<typename K, typename OPS = hash_ops<K>> class mfp; template<typename K, typename OPS = hash_ops<K>> class mfp;
// Computes the hash value of an unordered set of elements.
// See https://www.preprints.org/manuscript/201710.0192/v1/download.
// This is the Sum(4) algorithm from that paper, which has good collision resistance,
// much better than Sum(1) or Xor(1) (and somewhat better than Xor(4)).
class commutative_hash {
public:
commutative_hash() {
buckets.fill(0);
}
void eat(Hasher h) {
Hasher::hash_t v = h.yield();
size_t index = v & (buckets.size() - 1);
buckets[index] += v;
}
[[nodiscard]] Hasher hash_into(Hasher h) const {
for (auto b : buckets)
h.eat(b);
return h;
}
private:
std::array<Hasher::hash_t, 4> buckets;
};
template<typename K, typename T, typename OPS> template<typename K, typename T, typename OPS>
class dict { class dict {
struct entry_t struct entry_t
@ -801,14 +826,14 @@ public:
} }
[[nodiscard]] Hasher hash_into(Hasher h) const { [[nodiscard]] Hasher hash_into(Hasher h) const {
commutative_hash comm;
for (auto &it : entries) { for (auto &it : entries) {
Hasher entry_hash; Hasher entry_hash;
entry_hash.eat(it.udata.first); entry_hash.eat(it.udata.first);
entry_hash.eat(it.udata.second); entry_hash.eat(it.udata.second);
h.commutative_eat(entry_hash.yield()); comm.eat(entry_hash);
} }
h.eat(entries.size()); return comm.hash_into(h);
return h;
} }
void reserve(size_t n) { entries.reserve(n); } void reserve(size_t n) { entries.reserve(n); }
@ -1184,11 +1209,11 @@ public:
} }
[[nodiscard]] Hasher hash_into(Hasher h) const { [[nodiscard]] Hasher hash_into(Hasher h) const {
commutative_hash comm;
for (auto &it : entries) { for (auto &it : entries) {
h.commutative_eat(ops.hash(it.udata).yield()); comm.eat(ops.hash(it.udata));
} }
h.eat(entries.size()); return comm.hash_into(h);
return h;
} }
void reserve(size_t n) { entries.reserve(n); } void reserve(size_t n) { entries.reserve(n); }

View file

@ -20,6 +20,7 @@
#ifndef YOSYS_COMMON_H #ifndef YOSYS_COMMON_H
#define YOSYS_COMMON_H #define YOSYS_COMMON_H
#include <array>
#include <map> #include <map>
#include <set> #include <set>
#include <tuple> #include <tuple>

View file

@ -0,0 +1,67 @@
#include <gtest/gtest.h>
#include "kernel/yosys_common.h"
#include <unordered_set>
YOSYS_NAMESPACE_BEGIN
static Hasher hash(int x)
{
Hasher h;
h.eat(x);
return h;
}
TEST(CommutativeTest, basic)
{
hashlib::commutative_hash comm1;
comm1.eat(hash(1));
comm1.eat(hash(2));
hashlib::commutative_hash comm2;
comm2.eat(hash(2));
comm2.eat(hash(1));
EXPECT_EQ(comm1.hash_into(Hasher()).yield(), comm2.hash_into(Hasher()).yield());
}
TEST(PoolHashTest, collisions)
{
uint64_t collisions = 0;
std::unordered_set<Hasher::hash_t> hashes;
for (int i = 0; i < 1000; ++i) {
for (int j = i + 1; j < 1000; ++j) {
pool<int> p1;
p1.insert(i);
p1.insert(j);
auto h = p1.hash_into(Hasher()).yield();
if (!hashes.insert(h).second) {
++collisions;
}
}
}
std::cout << "pool<int> collisions: " << collisions << std::endl;
EXPECT_LT(collisions, 10'000);
}
TEST(PoolHashTest, subset_collisions)
{
uint64_t collisions = 0;
std::unordered_set<Hasher::hash_t> hashes;
for (int i = 0; i < 1000 * 1000; ++i) {
pool<int> p1;
for (int b = 0; i >> b; ++b) {
if ((i >> b) & 1) {
p1.insert(b);
}
}
auto h = p1.hash_into(Hasher()).yield();
if (!hashes.insert(h).second) {
++collisions;
}
}
std::cout << "pool<int> subset collisions: " << collisions << std::endl;
EXPECT_LT(collisions, 100);
}
YOSYS_NAMESPACE_END