mirror of
https://github.com/YosysHQ/yosys
synced 2025-08-22 19:17:55 +00:00
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions. https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic. Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions. The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values. We get simple collisions like (0,1) colliding with (2,3). With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values. Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper mentioned above.
This commit is contained in:
parent
b0d709f6cf
commit
3a5742ffd2
3 changed files with 78 additions and 7 deletions
|
@ -12,6 +12,7 @@
|
||||||
#ifndef HASHLIB_H
|
#ifndef HASHLIB_H
|
||||||
#define HASHLIB_H
|
#define HASHLIB_H
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
@ -100,7 +101,7 @@ private:
|
||||||
uint32_t hash = ((a << 5) + a) ^ b;
|
uint32_t hash = ((a << 5) + a) ^ b;
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
void hash32(uint32_t i) {
|
void hash32(uint32_t i) {
|
||||||
state = djb2_xor(i, state);
|
state = djb2_xor(i, state);
|
||||||
state = mkhash_xorshift(fudge ^ state);
|
state = mkhash_xorshift(fudge ^ state);
|
||||||
|
@ -127,6 +128,7 @@ private:
|
||||||
*this = hash_ops<T>::hash_into(t, *this);
|
*this = hash_ops<T>::hash_into(t, *this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[deprecated]]
|
||||||
void commutative_eat(hash_t t) {
|
void commutative_eat(hash_t t) {
|
||||||
state ^= t;
|
state ^= t;
|
||||||
}
|
}
|
||||||
|
@ -356,6 +358,29 @@ template<typename K, int offset = 0, typename OPS = hash_ops<K>> class idict;
|
||||||
template<typename K, typename OPS = hash_ops<K>> class pool;
|
template<typename K, typename OPS = hash_ops<K>> class pool;
|
||||||
template<typename K, typename OPS = hash_ops<K>> class mfp;
|
template<typename K, typename OPS = hash_ops<K>> class mfp;
|
||||||
|
|
||||||
|
// Computes the hash value of an unordered set of elements.
|
||||||
|
// See https://www.preprints.org/manuscript/201710.0192/v1/download.
|
||||||
|
// This is the Sum(4) algorithm from that paper, which has good collision resistance,
|
||||||
|
// much better than Sum(1) or Xor(1) (and somewhat better than Xor(4)).
|
||||||
|
class commutative_hash {
|
||||||
|
public:
|
||||||
|
commutative_hash() {
|
||||||
|
buckets.fill(0);
|
||||||
|
}
|
||||||
|
void eat(Hasher h) {
|
||||||
|
Hasher::hash_t v = h.yield();
|
||||||
|
size_t index = v & (buckets.size() - 1);
|
||||||
|
buckets[index] += v;
|
||||||
|
}
|
||||||
|
[[nodiscard]] Hasher hash_into(Hasher h) const {
|
||||||
|
for (auto b : buckets)
|
||||||
|
h.eat(b);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
std::array<Hasher::hash_t, 4> buckets;
|
||||||
|
};
|
||||||
|
|
||||||
template<typename K, typename T, typename OPS>
|
template<typename K, typename T, typename OPS>
|
||||||
class dict {
|
class dict {
|
||||||
struct entry_t
|
struct entry_t
|
||||||
|
@ -801,14 +826,14 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] Hasher hash_into(Hasher h) const {
|
[[nodiscard]] Hasher hash_into(Hasher h) const {
|
||||||
|
commutative_hash comm;
|
||||||
for (auto &it : entries) {
|
for (auto &it : entries) {
|
||||||
Hasher entry_hash;
|
Hasher entry_hash;
|
||||||
entry_hash.eat(it.udata.first);
|
entry_hash.eat(it.udata.first);
|
||||||
entry_hash.eat(it.udata.second);
|
entry_hash.eat(it.udata.second);
|
||||||
h.commutative_eat(entry_hash.yield());
|
comm.eat(entry_hash);
|
||||||
}
|
}
|
||||||
h.eat(entries.size());
|
return comm.hash_into(h);
|
||||||
return h;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void reserve(size_t n) { entries.reserve(n); }
|
void reserve(size_t n) { entries.reserve(n); }
|
||||||
|
@ -1184,11 +1209,11 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] Hasher hash_into(Hasher h) const {
|
[[nodiscard]] Hasher hash_into(Hasher h) const {
|
||||||
|
commutative_hash comm;
|
||||||
for (auto &it : entries) {
|
for (auto &it : entries) {
|
||||||
h.commutative_eat(ops.hash(it.udata).yield());
|
comm.eat(ops.hash(it.udata));
|
||||||
}
|
}
|
||||||
h.eat(entries.size());
|
return comm.hash_into(h);
|
||||||
return h;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void reserve(size_t n) { entries.reserve(n); }
|
void reserve(size_t n) { entries.reserve(n); }
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#ifndef YOSYS_COMMON_H
|
#ifndef YOSYS_COMMON_H
|
||||||
#define YOSYS_COMMON_H
|
#define YOSYS_COMMON_H
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
45
tests/unit/kernel/hashTest.cc
Normal file
45
tests/unit/kernel/hashTest.cc
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include "kernel/yosys_common.h"
|
||||||
|
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
|
YOSYS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
static Hasher hash(int x)
|
||||||
|
{
|
||||||
|
Hasher h;
|
||||||
|
h.eat(x);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CommutativeTest, basic)
|
||||||
|
{
|
||||||
|
hashlib::commutative_hash comm1;
|
||||||
|
comm1.eat(hash(1));
|
||||||
|
comm1.eat(hash(2));
|
||||||
|
hashlib::commutative_hash comm2;
|
||||||
|
comm2.eat(hash(2));
|
||||||
|
comm2.eat(hash(1));
|
||||||
|
EXPECT_EQ(comm1.hash_into(Hasher()).yield(), comm2.hash_into(Hasher()).yield());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PoolHashTest, collisions)
|
||||||
|
{
|
||||||
|
uint64_t collisions = 0;
|
||||||
|
std::unordered_set<Hasher::hash_t> hashes;
|
||||||
|
for (int i = 0; i < 10000; ++i) {
|
||||||
|
for (int j = i + 1; j < 10000; ++j) {
|
||||||
|
pool<int> p1;
|
||||||
|
p1.insert(i);
|
||||||
|
p1.insert(j);
|
||||||
|
auto h = p1.hash_into(Hasher()).yield();
|
||||||
|
if (!hashes.insert(h).second) {
|
||||||
|
++collisions;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "pool<int> collisions: " << collisions << std::endl;
|
||||||
|
EXPECT_LT(collisions, 1000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
YOSYS_NAMESPACE_END
|
Loading…
Add table
Add a link
Reference in a new issue