mirror of
https://github.com/YosysHQ/yosys
synced 2025-06-06 06:03:23 +00:00
hashlib: redo interface for flexibility
This commit is contained in:
parent
7a362f1f74
commit
d071489ab1
35 changed files with 542 additions and 386 deletions
303
kernel/hashlib.h
303
kernel/hashlib.h
|
@ -17,27 +17,47 @@
|
|||
#include <string>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <type_traits>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace hashlib {
|
||||
|
||||
/**
|
||||
* HASHING
|
||||
*
|
||||
* The Hasher knows how to hash 32 and 64-bit integers. That's it.
|
||||
* In the future, it could be expanded to do vectors with SIMD.
|
||||
*
|
||||
* The Hasher doesn't know how to hash common standard containers
|
||||
* and compositions. However, hashlib provides centralized wrappers.
|
||||
*
|
||||
* Hashlib doesn't know how to hash silly Yosys-specific types.
|
||||
* Hashlib doesn't depend on Yosys and can be used standalone.
|
||||
* Please don't use hashlib standalone for new projects.
|
||||
*
|
||||
* The hash_ops type is now always left to its default value, derived
|
||||
* from templated functions through SFINAE. Providing custom ops is
|
||||
* still supported.
|
||||
*
|
||||
* HASH TABLES
|
||||
*
|
||||
* We implement associative data structures with separate chaining.
|
||||
* Linked lists use integers into the indirection hashtable array
|
||||
* instead of pointers.
|
||||
*/
|
||||
|
||||
// TODO describe how comparison hashes are special
|
||||
// TODO draw the line between generic and hash function specific code
|
||||
|
||||
const int hashtable_size_trigger = 2;
|
||||
const int hashtable_size_factor = 3;
|
||||
|
||||
// The XOR version of DJB2
|
||||
inline unsigned int mkhash(unsigned int a, unsigned int b) {
|
||||
return ((a << 5) + a) ^ b;
|
||||
}
|
||||
#define DJB2_BROKEN_SIZE
|
||||
|
||||
// traditionally 5381 is used as starting value for the djb2 hash
|
||||
const unsigned int mkhash_init = 5381;
|
||||
#ifdef DJB2_BROKEN_SIZE
|
||||
|
||||
// The ADD version of DJB2
|
||||
// (use this version for cache locality in b)
|
||||
inline unsigned int mkhash_add(unsigned int a, unsigned int b) {
|
||||
return ((a << 5) + a) + b;
|
||||
}
|
||||
template<typename T>
|
||||
struct hash_ops;
|
||||
|
||||
inline unsigned int mkhash_xorshift(unsigned int a) {
|
||||
if (sizeof(a) == 4) {
|
||||
|
@ -53,62 +73,76 @@ inline unsigned int mkhash_xorshift(unsigned int a) {
|
|||
return a;
|
||||
}
|
||||
|
||||
template<typename T> struct hash_ops {
|
||||
class Hasher {
|
||||
public: //TODO
|
||||
using hash_t = uint32_t;
|
||||
|
||||
Hasher() {
|
||||
// traditionally 5381 is used as starting value for the djb2 hash
|
||||
state = 5381;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t state;
|
||||
// The XOR version of DJB2
|
||||
[[nodiscard]]
|
||||
static uint32_t mkhash(uint32_t a, uint32_t b) {
|
||||
return ((a << 5) + a) ^ b;
|
||||
}
|
||||
public:
|
||||
void hash32(uint32_t i) {
|
||||
state = mkhash(i, state);
|
||||
return;
|
||||
}
|
||||
void hash64(uint64_t i) {
|
||||
state = mkhash((uint32_t)(i % (1ULL << 32ULL)), state);
|
||||
state = mkhash((uint32_t)(i >> 32ULL), state);
|
||||
return;
|
||||
}
|
||||
hash_t yield() {
|
||||
return (hash_t)state;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void acc(T t) {
|
||||
*this = hash_ops<T>::hash_acc(t, *this);
|
||||
}
|
||||
|
||||
void commutative_acc(uint32_t t) {
|
||||
state ^= t;
|
||||
}
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
struct hash_ops {
|
||||
static inline bool cmp(const T &a, const T &b) {
|
||||
return a == b;
|
||||
}
|
||||
static inline unsigned int hash(const T &a) {
|
||||
return a.hash();
|
||||
}
|
||||
};
|
||||
|
||||
struct hash_int_ops {
|
||||
template<typename T>
|
||||
static inline bool cmp(T a, T b) {
|
||||
return a == b;
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct hash_ops<bool> : hash_int_ops
|
||||
{
|
||||
static inline unsigned int hash(bool a) {
|
||||
return a ? 1 : 0;
|
||||
}
|
||||
};
|
||||
template<> struct hash_ops<int32_t> : hash_int_ops
|
||||
{
|
||||
static inline unsigned int hash(int32_t a) {
|
||||
return a;
|
||||
}
|
||||
};
|
||||
template<> struct hash_ops<int64_t> : hash_int_ops
|
||||
{
|
||||
static inline unsigned int hash(int64_t a) {
|
||||
return mkhash((unsigned int)(a), (unsigned int)(a >> 32));
|
||||
}
|
||||
};
|
||||
template<> struct hash_ops<uint32_t> : hash_int_ops
|
||||
{
|
||||
static inline unsigned int hash(uint32_t a) {
|
||||
return a;
|
||||
}
|
||||
};
|
||||
template<> struct hash_ops<uint64_t> : hash_int_ops
|
||||
{
|
||||
static inline unsigned int hash(uint64_t a) {
|
||||
return mkhash((unsigned int)(a), (unsigned int)(a >> 32));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct hash_ops<std::string> {
|
||||
static inline bool cmp(const std::string &a, const std::string &b) {
|
||||
return a == b;
|
||||
}
|
||||
static inline unsigned int hash(const std::string &a) {
|
||||
unsigned int v = 0;
|
||||
for (auto c : a)
|
||||
v = mkhash(v, c);
|
||||
return v;
|
||||
static inline Hasher hash_acc(const T &a, Hasher h) {
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
h.hash32(a ? 1 : 0);
|
||||
return h;
|
||||
} else if constexpr (std::is_integral_v<T>) {
|
||||
static_assert(sizeof(T) <= sizeof(uint64_t));
|
||||
if (sizeof(T) == sizeof(uint64_t))
|
||||
h.hash64(a);
|
||||
else
|
||||
h.hash32(a);
|
||||
return h;
|
||||
} else if constexpr (std::is_enum_v<T>) {
|
||||
using u_type = std::underlying_type_t<T>;
|
||||
return hash_ops<u_type>::hash_acc((u_type) a, h);
|
||||
} else if constexpr (std::is_pointer_v<T>) {
|
||||
return hash_ops<uintptr_t>::hash_acc((uintptr_t) a, h);
|
||||
} else if constexpr (std::is_same_v<T, std::string>) {
|
||||
for (auto c : a)
|
||||
h.hash32(c);
|
||||
return h;
|
||||
} else {
|
||||
return a.hash_acc(h);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -116,8 +150,10 @@ template<typename P, typename Q> struct hash_ops<std::pair<P, Q>> {
|
|||
static inline bool cmp(std::pair<P, Q> a, std::pair<P, Q> b) {
|
||||
return a == b;
|
||||
}
|
||||
static inline unsigned int hash(std::pair<P, Q> a) {
|
||||
return mkhash(hash_ops<P>::hash(a.first), hash_ops<Q>::hash(a.second));
|
||||
static inline Hasher hash_acc(std::pair<P, Q> a, Hasher h) {
|
||||
h = hash_ops<P>::hash_acc(a.first, h);
|
||||
h = hash_ops<Q>::hash_acc(a.second, h);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -126,13 +162,15 @@ template<typename... T> struct hash_ops<std::tuple<T...>> {
|
|||
return a == b;
|
||||
}
|
||||
template<size_t I = 0>
|
||||
static inline typename std::enable_if<I == sizeof...(T), unsigned int>::type hash(std::tuple<T...>) {
|
||||
return mkhash_init;
|
||||
static inline typename std::enable_if<I == sizeof...(T), Hasher>::type hash_acc(std::tuple<T...>, Hasher h) {
|
||||
return h;
|
||||
}
|
||||
template<size_t I = 0>
|
||||
static inline typename std::enable_if<I != sizeof...(T), unsigned int>::type hash(std::tuple<T...> a) {
|
||||
static inline typename std::enable_if<I != sizeof...(T), Hasher>::type hash_acc(std::tuple<T...> a, Hasher h) {
|
||||
typedef hash_ops<typename std::tuple_element<I, std::tuple<T...>>::type> element_ops_t;
|
||||
return mkhash(hash<I+1>(a), element_ops_t::hash(std::get<I>(a)));
|
||||
h = hash_acc<I+1>(a, h);
|
||||
h = element_ops_t::hash_acc(std::get<I>(a), h);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -140,10 +178,10 @@ template<typename T> struct hash_ops<std::vector<T>> {
|
|||
static inline bool cmp(std::vector<T> a, std::vector<T> b) {
|
||||
return a == b;
|
||||
}
|
||||
static inline unsigned int hash(std::vector<T> a) {
|
||||
unsigned int h = mkhash_init;
|
||||
static inline Hasher hash_acc(std::vector<T> a, Hasher h) {
|
||||
h.acc(a.size());
|
||||
for (auto k : a)
|
||||
h = mkhash(h, hash_ops<T>::hash(k));
|
||||
h.acc(k);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
@ -155,20 +193,21 @@ struct hash_cstr_ops {
|
|||
return false;
|
||||
return true;
|
||||
}
|
||||
static inline unsigned int hash(const char *a) {
|
||||
unsigned int hash = mkhash_init;
|
||||
static inline Hasher hash_acc(const char *a, Hasher h) {
|
||||
while (*a)
|
||||
hash = mkhash(hash, *(a++));
|
||||
return hash;
|
||||
h.hash32(*(a++));
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct hash_ops<char*> : hash_cstr_ops {};
|
||||
|
||||
struct hash_ptr_ops {
|
||||
static inline bool cmp(const void *a, const void *b) {
|
||||
return a == b;
|
||||
}
|
||||
static inline unsigned int hash(const void *a) {
|
||||
return (uintptr_t)a;
|
||||
static inline Hasher hash_acc(const void *a, Hasher h) {
|
||||
return hash_ops<uintptr_t>::hash_acc((uintptr_t)a, h);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -177,22 +216,36 @@ struct hash_obj_ops {
|
|||
return a == b;
|
||||
}
|
||||
template<typename T>
|
||||
static inline unsigned int hash(const T *a) {
|
||||
return a ? a->hash() : 0;
|
||||
static inline Hasher hash_acc(const T *a, Hasher h) {
|
||||
return a ? a->hash_acc(h) : h;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* If you find yourself using this function, think hard
|
||||
* about if it's the right thing to do. Mixing finalized
|
||||
* hashes together with XORs or worse can destroy
|
||||
* desirable qualities of the hash function
|
||||
*/
|
||||
template<typename T>
|
||||
inline unsigned int mkhash(const T &v) {
|
||||
return hash_ops<T>().hash(v);
|
||||
Hasher::hash_t run_hash(const T& obj) {
|
||||
Hasher h;
|
||||
h.acc(obj);
|
||||
return h.yield();
|
||||
}
|
||||
|
||||
// #ifdef OTHER_HASH...
|
||||
|
||||
// [[deprecated]]
|
||||
// inline unsigned int mkhash_add(unsigned int a, unsigned int b) {
|
||||
// return mkhash(a, b);
|
||||
// }
|
||||
|
||||
template<> struct hash_ops<std::monostate> {
|
||||
static inline bool cmp(std::monostate a, std::monostate b) {
|
||||
return a == b;
|
||||
}
|
||||
static inline unsigned int hash(std::monostate) {
|
||||
return mkhash_init;
|
||||
static inline Hasher hash_acc(std::monostate, Hasher h) {
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -200,9 +253,10 @@ template<typename... T> struct hash_ops<std::variant<T...>> {
|
|||
static inline bool cmp(std::variant<T...> a, std::variant<T...> b) {
|
||||
return a == b;
|
||||
}
|
||||
static inline unsigned int hash(std::variant<T...> a) {
|
||||
unsigned int h = std::visit([](const auto &v) { return mkhash(v); }, a);
|
||||
return mkhash(a.index(), h);
|
||||
static inline Hasher hash_acc(std::variant<T...> a, Hasher h) {
|
||||
std::visit([& h](const auto &v) { h.acc(v); }, a);
|
||||
h.acc(a.index());
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -210,11 +264,12 @@ template<typename T> struct hash_ops<std::optional<T>> {
|
|||
static inline bool cmp(std::optional<T> a, std::optional<T> b) {
|
||||
return a == b;
|
||||
}
|
||||
static inline unsigned int hash(std::optional<T> a) {
|
||||
static inline Hasher hash_acc(std::optional<T> a, Hasher h) {
|
||||
if(a.has_value())
|
||||
return mkhash(*a);
|
||||
h.acc(*a);
|
||||
else
|
||||
return 0;
|
||||
h.acc(0);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -246,14 +301,13 @@ inline int hashtable_size(int min_size)
|
|||
throw std::length_error("hash table exceeded maximum size.");
|
||||
}
|
||||
|
||||
template<typename K, typename T, typename OPS = hash_ops<K>> class dict;
|
||||
template<typename K, int offset = 0, typename OPS = hash_ops<K>> class idict;
|
||||
template<typename K, typename OPS = hash_ops<K>> class pool;
|
||||
template<typename K, typename OPS = hash_ops<K>> class mfp;
|
||||
template<typename K, typename T> class dict;
|
||||
template<typename K, int offset = 0> class idict;
|
||||
template<typename K> class pool;
|
||||
template<typename K> class mfp;
|
||||
|
||||
template<typename K, typename T, typename OPS>
|
||||
class dict
|
||||
{
|
||||
template<typename K, typename T>
|
||||
class dict {
|
||||
struct entry_t
|
||||
{
|
||||
std::pair<K, T> udata;
|
||||
|
@ -267,7 +321,7 @@ class dict
|
|||
|
||||
std::vector<int> hashtable;
|
||||
std::vector<entry_t> entries;
|
||||
OPS ops;
|
||||
hash_ops<K> ops;
|
||||
|
||||
#ifdef NDEBUG
|
||||
static inline void do_assert(bool) { }
|
||||
|
@ -279,9 +333,9 @@ class dict
|
|||
|
||||
int do_hash(const K &key) const
|
||||
{
|
||||
unsigned int hash = 0;
|
||||
Hasher::hash_t hash = 0;
|
||||
if (!hashtable.empty())
|
||||
hash = ops.hash(key) % (unsigned int)(hashtable.size());
|
||||
hash = run_hash<K>(key) % (unsigned int)(hashtable.size());
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
@ -683,11 +737,13 @@ public:
|
|||
return !operator==(other);
|
||||
}
|
||||
|
||||
unsigned int hash() const {
|
||||
unsigned int h = mkhash_init;
|
||||
for (auto &entry : entries) {
|
||||
h ^= hash_ops<K>::hash(entry.udata.first);
|
||||
h ^= hash_ops<T>::hash(entry.udata.second);
|
||||
Hasher hash_acc(Hasher h) const {
|
||||
h.acc(entries.size());
|
||||
for (auto &it : entries) {
|
||||
Hasher entry_hash;
|
||||
entry_hash.acc(it.udata.first);
|
||||
entry_hash.acc(it.udata.second);
|
||||
h.commutative_acc(entry_hash.yield());
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
@ -706,10 +762,10 @@ public:
|
|||
const_iterator end() const { return const_iterator(nullptr, -1); }
|
||||
};
|
||||
|
||||
template<typename K, typename OPS>
|
||||
template<typename K>
|
||||
class pool
|
||||
{
|
||||
template<typename, int, typename> friend class idict;
|
||||
template<typename, int> friend class idict;
|
||||
|
||||
protected:
|
||||
struct entry_t
|
||||
|
@ -724,7 +780,7 @@ protected:
|
|||
|
||||
std::vector<int> hashtable;
|
||||
std::vector<entry_t> entries;
|
||||
OPS ops;
|
||||
hash_ops<K> ops;
|
||||
|
||||
#ifdef NDEBUG
|
||||
static inline void do_assert(bool) { }
|
||||
|
@ -736,9 +792,9 @@ protected:
|
|||
|
||||
int do_hash(const K &key) const
|
||||
{
|
||||
unsigned int hash = 0;
|
||||
Hasher::hash_t hash = 0;
|
||||
if (!hashtable.empty())
|
||||
hash = ops.hash(key) % (unsigned int)(hashtable.size());
|
||||
hash = run_hash<K>(key) % (unsigned int)(hashtable.size());
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
@ -1051,11 +1107,12 @@ public:
|
|||
return !operator==(other);
|
||||
}
|
||||
|
||||
unsigned int hash() const {
|
||||
unsigned int hashval = mkhash_init;
|
||||
for (auto &it : entries)
|
||||
hashval ^= ops.hash(it.udata);
|
||||
return hashval;
|
||||
Hasher hash_acc(Hasher h) const {
|
||||
h.acc(entries.size());
|
||||
for (auto &it : entries) {
|
||||
h.commutative_acc(run_hash(it.udata));
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
void reserve(size_t n) { entries.reserve(n); }
|
||||
|
@ -1072,10 +1129,10 @@ public:
|
|||
const_iterator end() const { return const_iterator(nullptr, -1); }
|
||||
};
|
||||
|
||||
template<typename K, int offset, typename OPS>
|
||||
template<typename K, int offset>
|
||||
class idict
|
||||
{
|
||||
pool<K, OPS> database;
|
||||
pool<K> database;
|
||||
|
||||
public:
|
||||
class const_iterator
|
||||
|
@ -1169,14 +1226,14 @@ public:
|
|||
* mfp stands for "merge, find, promote"
|
||||
* i-prefixed methods operate on indices in parents
|
||||
*/
|
||||
template<typename K, typename OPS>
|
||||
template<typename K>
|
||||
class mfp
|
||||
{
|
||||
mutable idict<K, 0, OPS> database;
|
||||
mutable idict<K, 0> database;
|
||||
mutable std::vector<int> parents;
|
||||
|
||||
public:
|
||||
typedef typename idict<K, 0, OPS>::const_iterator const_iterator;
|
||||
typedef typename idict<K, 0>::const_iterator const_iterator;
|
||||
|
||||
constexpr mfp()
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue