3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2025-06-06 06:03:23 +00:00

hashlib: redo interface for flexibility

This commit is contained in:
Emil J. Tywoniak 2024-10-01 15:12:03 +02:00
parent 7a362f1f74
commit d071489ab1
35 changed files with 542 additions and 386 deletions

View file

@ -17,27 +17,47 @@
#include <string>
#include <variant>
#include <vector>
#include <type_traits>
#include <stdint.h>
namespace hashlib {
/**
* HASHING
*
* The Hasher knows how to hash 32 and 64-bit integers. That's it.
* In the future, it could be expanded to do vectors with SIMD.
*
* The Hasher doesn't know how to hash common standard containers
* and compositions. However, hashlib provides centralized wrappers.
*
* Hashlib doesn't know how to hash silly Yosys-specific types.
* Hashlib doesn't depend on Yosys and can be used standalone.
* Please don't use hashlib standalone for new projects.
*
* The hash_ops type is now always left to its default value, derived
* from templated functions through SFINAE. Providing custom ops is
* still supported.
*
* HASH TABLES
*
* We implement associative data structures with separate chaining.
* Linked lists use integers into the indirection hashtable array
* instead of pointers.
*/
// TODO describe how comparison hashes are special
// TODO draw the line between generic and hash function specific code
const int hashtable_size_trigger = 2;
const int hashtable_size_factor = 3;
// The XOR version of DJB2
inline unsigned int mkhash(unsigned int a, unsigned int b) {
return ((a << 5) + a) ^ b;
}
#define DJB2_BROKEN_SIZE
// traditionally 5381 is used as starting value for the djb2 hash
const unsigned int mkhash_init = 5381;
#ifdef DJB2_BROKEN_SIZE
// The ADD version of DJB2
// (use this version for cache locality in b)
inline unsigned int mkhash_add(unsigned int a, unsigned int b) {
return ((a << 5) + a) + b;
}
template<typename T>
struct hash_ops;
inline unsigned int mkhash_xorshift(unsigned int a) {
if (sizeof(a) == 4) {
@ -53,62 +73,76 @@ inline unsigned int mkhash_xorshift(unsigned int a) {
return a;
}
template<typename T> struct hash_ops {
class Hasher {
public: //TODO
using hash_t = uint32_t;
Hasher() {
// traditionally 5381 is used as starting value for the djb2 hash
state = 5381;
}
private:
uint32_t state;
// The XOR version of DJB2
[[nodiscard]]
static uint32_t mkhash(uint32_t a, uint32_t b) {
return ((a << 5) + a) ^ b;
}
public:
void hash32(uint32_t i) {
state = mkhash(i, state);
return;
}
void hash64(uint64_t i) {
state = mkhash((uint32_t)(i % (1ULL << 32ULL)), state);
state = mkhash((uint32_t)(i >> 32ULL), state);
return;
}
hash_t yield() {
return (hash_t)state;
}
template<typename T>
void acc(T t) {
*this = hash_ops<T>::hash_acc(t, *this);
}
void commutative_acc(uint32_t t) {
state ^= t;
}
};
#endif
template<typename T>
struct hash_ops {
static inline bool cmp(const T &a, const T &b) {
return a == b;
}
static inline unsigned int hash(const T &a) {
return a.hash();
}
};
struct hash_int_ops {
template<typename T>
static inline bool cmp(T a, T b) {
return a == b;
}
};
template<> struct hash_ops<bool> : hash_int_ops
{
static inline unsigned int hash(bool a) {
return a ? 1 : 0;
}
};
template<> struct hash_ops<int32_t> : hash_int_ops
{
static inline unsigned int hash(int32_t a) {
return a;
}
};
template<> struct hash_ops<int64_t> : hash_int_ops
{
static inline unsigned int hash(int64_t a) {
return mkhash((unsigned int)(a), (unsigned int)(a >> 32));
}
};
template<> struct hash_ops<uint32_t> : hash_int_ops
{
static inline unsigned int hash(uint32_t a) {
return a;
}
};
template<> struct hash_ops<uint64_t> : hash_int_ops
{
static inline unsigned int hash(uint64_t a) {
return mkhash((unsigned int)(a), (unsigned int)(a >> 32));
}
};
template<> struct hash_ops<std::string> {
static inline bool cmp(const std::string &a, const std::string &b) {
return a == b;
}
static inline unsigned int hash(const std::string &a) {
unsigned int v = 0;
for (auto c : a)
v = mkhash(v, c);
return v;
static inline Hasher hash_acc(const T &a, Hasher h) {
if constexpr (std::is_same_v<T, bool>) {
h.hash32(a ? 1 : 0);
return h;
} else if constexpr (std::is_integral_v<T>) {
static_assert(sizeof(T) <= sizeof(uint64_t));
if (sizeof(T) == sizeof(uint64_t))
h.hash64(a);
else
h.hash32(a);
return h;
} else if constexpr (std::is_enum_v<T>) {
using u_type = std::underlying_type_t<T>;
return hash_ops<u_type>::hash_acc((u_type) a, h);
} else if constexpr (std::is_pointer_v<T>) {
return hash_ops<uintptr_t>::hash_acc((uintptr_t) a, h);
} else if constexpr (std::is_same_v<T, std::string>) {
for (auto c : a)
h.hash32(c);
return h;
} else {
return a.hash_acc(h);
}
}
};
@ -116,8 +150,10 @@ template<typename P, typename Q> struct hash_ops<std::pair<P, Q>> {
static inline bool cmp(std::pair<P, Q> a, std::pair<P, Q> b) {
return a == b;
}
static inline unsigned int hash(std::pair<P, Q> a) {
return mkhash(hash_ops<P>::hash(a.first), hash_ops<Q>::hash(a.second));
static inline Hasher hash_acc(std::pair<P, Q> a, Hasher h) {
h = hash_ops<P>::hash_acc(a.first, h);
h = hash_ops<Q>::hash_acc(a.second, h);
return h;
}
};
@ -126,13 +162,15 @@ template<typename... T> struct hash_ops<std::tuple<T...>> {
return a == b;
}
template<size_t I = 0>
static inline typename std::enable_if<I == sizeof...(T), unsigned int>::type hash(std::tuple<T...>) {
return mkhash_init;
static inline typename std::enable_if<I == sizeof...(T), Hasher>::type hash_acc(std::tuple<T...>, Hasher h) {
return h;
}
template<size_t I = 0>
static inline typename std::enable_if<I != sizeof...(T), unsigned int>::type hash(std::tuple<T...> a) {
static inline typename std::enable_if<I != sizeof...(T), Hasher>::type hash_acc(std::tuple<T...> a, Hasher h) {
typedef hash_ops<typename std::tuple_element<I, std::tuple<T...>>::type> element_ops_t;
return mkhash(hash<I+1>(a), element_ops_t::hash(std::get<I>(a)));
h = hash_acc<I+1>(a, h);
h = element_ops_t::hash_acc(std::get<I>(a), h);
return h;
}
};
@ -140,10 +178,10 @@ template<typename T> struct hash_ops<std::vector<T>> {
static inline bool cmp(std::vector<T> a, std::vector<T> b) {
return a == b;
}
static inline unsigned int hash(std::vector<T> a) {
unsigned int h = mkhash_init;
static inline Hasher hash_acc(std::vector<T> a, Hasher h) {
h.acc(a.size());
for (auto k : a)
h = mkhash(h, hash_ops<T>::hash(k));
h.acc(k);
return h;
}
};
@ -155,20 +193,21 @@ struct hash_cstr_ops {
return false;
return true;
}
static inline unsigned int hash(const char *a) {
unsigned int hash = mkhash_init;
static inline Hasher hash_acc(const char *a, Hasher h) {
while (*a)
hash = mkhash(hash, *(a++));
return hash;
h.hash32(*(a++));
return h;
}
};
template <> struct hash_ops<char*> : hash_cstr_ops {};
struct hash_ptr_ops {
static inline bool cmp(const void *a, const void *b) {
return a == b;
}
static inline unsigned int hash(const void *a) {
return (uintptr_t)a;
static inline Hasher hash_acc(const void *a, Hasher h) {
return hash_ops<uintptr_t>::hash_acc((uintptr_t)a, h);
}
};
@ -177,22 +216,36 @@ struct hash_obj_ops {
return a == b;
}
template<typename T>
static inline unsigned int hash(const T *a) {
return a ? a->hash() : 0;
static inline Hasher hash_acc(const T *a, Hasher h) {
return a ? a->hash_acc(h) : h;
}
};
/**
* If you find yourself using this function, think hard
* about if it's the right thing to do. Mixing finalized
* hashes together with XORs or worse can destroy
* desirable qualities of the hash function
*/
template<typename T>
inline unsigned int mkhash(const T &v) {
return hash_ops<T>().hash(v);
Hasher::hash_t run_hash(const T& obj) {
Hasher h;
h.acc(obj);
return h.yield();
}
// #ifdef OTHER_HASH...
// [[deprecated]]
// inline unsigned int mkhash_add(unsigned int a, unsigned int b) {
// return mkhash(a, b);
// }
template<> struct hash_ops<std::monostate> {
static inline bool cmp(std::monostate a, std::monostate b) {
return a == b;
}
static inline unsigned int hash(std::monostate) {
return mkhash_init;
static inline Hasher hash_acc(std::monostate, Hasher h) {
return h;
}
};
@ -200,9 +253,10 @@ template<typename... T> struct hash_ops<std::variant<T...>> {
static inline bool cmp(std::variant<T...> a, std::variant<T...> b) {
return a == b;
}
static inline unsigned int hash(std::variant<T...> a) {
unsigned int h = std::visit([](const auto &v) { return mkhash(v); }, a);
return mkhash(a.index(), h);
static inline Hasher hash_acc(std::variant<T...> a, Hasher h) {
std::visit([& h](const auto &v) { h.acc(v); }, a);
h.acc(a.index());
return h;
}
};
@ -210,11 +264,12 @@ template<typename T> struct hash_ops<std::optional<T>> {
static inline bool cmp(std::optional<T> a, std::optional<T> b) {
return a == b;
}
static inline unsigned int hash(std::optional<T> a) {
static inline Hasher hash_acc(std::optional<T> a, Hasher h) {
if(a.has_value())
return mkhash(*a);
h.acc(*a);
else
return 0;
h.acc(0);
return h;
}
};
@ -246,14 +301,13 @@ inline int hashtable_size(int min_size)
throw std::length_error("hash table exceeded maximum size.");
}
template<typename K, typename T, typename OPS = hash_ops<K>> class dict;
template<typename K, int offset = 0, typename OPS = hash_ops<K>> class idict;
template<typename K, typename OPS = hash_ops<K>> class pool;
template<typename K, typename OPS = hash_ops<K>> class mfp;
template<typename K, typename T> class dict;
template<typename K, int offset = 0> class idict;
template<typename K> class pool;
template<typename K> class mfp;
template<typename K, typename T, typename OPS>
class dict
{
template<typename K, typename T>
class dict {
struct entry_t
{
std::pair<K, T> udata;
@ -267,7 +321,7 @@ class dict
std::vector<int> hashtable;
std::vector<entry_t> entries;
OPS ops;
hash_ops<K> ops;
#ifdef NDEBUG
static inline void do_assert(bool) { }
@ -279,9 +333,9 @@ class dict
int do_hash(const K &key) const
{
unsigned int hash = 0;
Hasher::hash_t hash = 0;
if (!hashtable.empty())
hash = ops.hash(key) % (unsigned int)(hashtable.size());
hash = run_hash<K>(key) % (unsigned int)(hashtable.size());
return hash;
}
@ -683,11 +737,13 @@ public:
return !operator==(other);
}
unsigned int hash() const {
unsigned int h = mkhash_init;
for (auto &entry : entries) {
h ^= hash_ops<K>::hash(entry.udata.first);
h ^= hash_ops<T>::hash(entry.udata.second);
Hasher hash_acc(Hasher h) const {
h.acc(entries.size());
for (auto &it : entries) {
Hasher entry_hash;
entry_hash.acc(it.udata.first);
entry_hash.acc(it.udata.second);
h.commutative_acc(entry_hash.yield());
}
return h;
}
@ -706,10 +762,10 @@ public:
const_iterator end() const { return const_iterator(nullptr, -1); }
};
template<typename K, typename OPS>
template<typename K>
class pool
{
template<typename, int, typename> friend class idict;
template<typename, int> friend class idict;
protected:
struct entry_t
@ -724,7 +780,7 @@ protected:
std::vector<int> hashtable;
std::vector<entry_t> entries;
OPS ops;
hash_ops<K> ops;
#ifdef NDEBUG
static inline void do_assert(bool) { }
@ -736,9 +792,9 @@ protected:
int do_hash(const K &key) const
{
unsigned int hash = 0;
Hasher::hash_t hash = 0;
if (!hashtable.empty())
hash = ops.hash(key) % (unsigned int)(hashtable.size());
hash = run_hash<K>(key) % (unsigned int)(hashtable.size());
return hash;
}
@ -1051,11 +1107,12 @@ public:
return !operator==(other);
}
unsigned int hash() const {
unsigned int hashval = mkhash_init;
for (auto &it : entries)
hashval ^= ops.hash(it.udata);
return hashval;
Hasher hash_acc(Hasher h) const {
h.acc(entries.size());
for (auto &it : entries) {
h.commutative_acc(run_hash(it.udata));
}
return h;
}
void reserve(size_t n) { entries.reserve(n); }
@ -1072,10 +1129,10 @@ public:
const_iterator end() const { return const_iterator(nullptr, -1); }
};
template<typename K, int offset, typename OPS>
template<typename K, int offset>
class idict
{
pool<K, OPS> database;
pool<K> database;
public:
class const_iterator
@ -1169,14 +1226,14 @@ public:
* mfp stands for "merge, find, promote"
* i-prefixed methods operate on indices in parents
*/
template<typename K, typename OPS>
template<typename K>
class mfp
{
mutable idict<K, 0, OPS> database;
mutable idict<K, 0> database;
mutable std::vector<int> parents;
public:
typedef typename idict<K, 0, OPS>::const_iterator const_iterator;
typedef typename idict<K, 0>::const_iterator const_iterator;
constexpr mfp()
{