diff --git a/kernel/hashlib.h b/kernel/hashlib.h index 7a5650fa3..b43b7302e 100644 --- a/kernel/hashlib.h +++ b/kernel/hashlib.h @@ -12,6 +12,7 @@ #ifndef HASHLIB_H #define HASHLIB_H +#include #include #include #include @@ -100,7 +101,7 @@ private: uint32_t hash = ((a << 5) + a) ^ b; return hash; } - public: +public: void hash32(uint32_t i) { state = djb2_xor(i, state); state = mkhash_xorshift(fudge ^ state); @@ -127,6 +128,7 @@ private: *this = hash_ops::hash_into(t, *this); } + [[deprecated]] void commutative_eat(hash_t t) { state ^= t; } @@ -356,6 +358,29 @@ template> class idict; template> class pool; template> class mfp; +// Computes the hash value of an unordered set of elements. +// See https://www.preprints.org/manuscript/201710.0192/v1/download. +// This is the Sum(4) algorithm from that paper, which has good collision resistance, +// much better than Sum(1) or Xor(1) (and somewhat better than Xor(4)). +class commutative_hash { +public: + commutative_hash() { + buckets.fill(0); + } + void eat(Hasher h) { + Hasher::hash_t v = h.yield(); + size_t index = v & (buckets.size() - 1); + buckets[index] += v; + } + [[nodiscard]] Hasher hash_into(Hasher h) const { + for (auto b : buckets) + h.eat(b); + return h; + } +private: + std::array buckets; +}; + template class dict { struct entry_t @@ -801,14 +826,14 @@ public: } [[nodiscard]] Hasher hash_into(Hasher h) const { + commutative_hash comm; for (auto &it : entries) { Hasher entry_hash; entry_hash.eat(it.udata.first); entry_hash.eat(it.udata.second); - h.commutative_eat(entry_hash.yield()); + comm.eat(entry_hash); } - h.eat(entries.size()); - return h; + return comm.hash_into(h); } void reserve(size_t n) { entries.reserve(n); } @@ -1184,11 +1209,11 @@ public: } [[nodiscard]] Hasher hash_into(Hasher h) const { + commutative_hash comm; for (auto &it : entries) { - h.commutative_eat(ops.hash(it.udata).yield()); + comm.eat(ops.hash(it.udata)); } - h.eat(entries.size()); - return h; + return comm.hash_into(h); } void reserve(size_t n) { entries.reserve(n); } diff --git a/kernel/yosys_common.h b/kernel/yosys_common.h index ecc8ce623..bc92e7869 100644 --- a/kernel/yosys_common.h +++ b/kernel/yosys_common.h @@ -20,6 +20,7 @@ #ifndef YOSYS_COMMON_H #define YOSYS_COMMON_H +#include #include #include #include diff --git a/tests/unit/kernel/hashTest.cc b/tests/unit/kernel/hashTest.cc new file mode 100644 index 000000000..319d064a7 --- /dev/null +++ b/tests/unit/kernel/hashTest.cc @@ -0,0 +1,67 @@ +#include +#include "kernel/yosys_common.h" + +#include + +YOSYS_NAMESPACE_BEGIN + +static Hasher hash(int x) +{ + Hasher h; + h.eat(x); + return h; +} + +TEST(CommutativeTest, basic) +{ + hashlib::commutative_hash comm1; + comm1.eat(hash(1)); + comm1.eat(hash(2)); + hashlib::commutative_hash comm2; + comm2.eat(hash(2)); + comm2.eat(hash(1)); + EXPECT_EQ(comm1.hash_into(Hasher()).yield(), comm2.hash_into(Hasher()).yield()); +} + +TEST(PoolHashTest, collisions) +{ + uint64_t collisions = 0; + std::unordered_set hashes; + for (int i = 0; i < 1000; ++i) { + for (int j = i + 1; j < 1000; ++j) { + pool p1; + p1.insert(i); + p1.insert(j); + auto h = p1.hash_into(Hasher()).yield(); + if (!hashes.insert(h).second) { + ++collisions; + } + } + } + std::cout << "pool collisions: " << collisions << std::endl; + EXPECT_LT(collisions, 10'000); +} + +TEST(PoolHashTest, subset_collisions) +{ + uint64_t collisions = 0; + std::unordered_set hashes; + for (int i = 0; i < 1000 * 1000; ++i) { + + pool p1; + for (int b = 0; i >> b; ++b) { + if ((i >> b) & 1) { + p1.insert(b); + } + } + auto h = p1.hash_into(Hasher()).yield(); + if (!hashes.insert(h).second) { + ++collisions; + } + + } + std::cout << "pool subset collisions: " << collisions << std::endl; + EXPECT_LT(collisions, 100); +} + +YOSYS_NAMESPACE_END