3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2026-02-14 12:51:48 +00:00

Add ShardedHashSet

We'll use this later in the PR.
This commit is contained in:
Robert O'Callahan 2026-01-28 18:59:35 +00:00
parent b06e903906
commit d0276169d3

View file

@ -391,6 +391,177 @@ private:
Buckets buckets;
};
template <typename V>
struct DefaultCollisionHandler {
void operator()(typename V::Accumulated &, typename V::Accumulated &) const {}
};
// A hashtable that can be efficiently built in parallel and then looked up concurrently.
// `V` is the type of elements that will be added to the hashtable. It must have a
// member type `Accumulated` representing the combination of multiple `V` elements. This
// can be the same as `V`, but for example `V` could contain a Wire* and `V::Accumulated`
// could contain a `pool<Wire*>`. `KeyEquality` is a class containing an `operator()` that
// returns true of two `V` elements have equal keys.
// `CollisionHandler` is used to reduce two `V::Accumulated` values into a single value.
//
// To use this, first construct a `Builder` and fill it in (in parallel), then construct
// a `ShardedHashSet` from the `Builder`.
template <typename V, typename KeyEquality, typename CollisionHandler = DefaultCollisionHandler<V>>
class ShardedHashSet {
public:
// A combination of a `V` and its hash value.
struct Value {
Value(V value, unsigned int hash) : value(std::move(value)), hash(hash) {}
Value(Value &&) = default;
Value(const Value &) = delete;
Value &operator=(const Value &) = delete;
V value;
unsigned int hash;
};
// A combination of a `V::Accumulated` and its hash value.
struct AccumulatedValue {
AccumulatedValue(typename V::Accumulated value, unsigned int hash) : value(std::move(value)), hash(hash) {}
AccumulatedValue(AccumulatedValue &&) = default;
#if defined(_MSC_VER)
AccumulatedValue(const AccumulatedValue &) {
log_error("Copy constructor called on AccumulatedValue");
}
AccumulatedValue &operator=(const AccumulatedValue &) {
log_error("Copy assignment called on AccumulatedValue");
return *this;
}
#else
AccumulatedValue(const AccumulatedValue &) = delete;
AccumulatedValue &operator=(const AccumulatedValue &) = delete;
#endif
typename V::Accumulated value;
unsigned int hash;
};
// A class containing an `operator()` that returns true of two `AccumulatedValue`
// elements have equal keys.
// Required to insert `AccumulatedValue`s into an `std::unordered_set`.
struct AccumulatedValueEquality {
KeyEquality inner;
AccumulatedValueEquality(const KeyEquality &inner) : inner(inner) {}
bool operator()(const AccumulatedValue &v1, const AccumulatedValue &v2) const {
return inner(v1.value, v2.value);
}
};
// A class containing an `operator()` that returns the hash value of an `AccumulatedValue`.
// Required to insert `AccumulatedValue`s into an `std::unordered_set`.
struct AccumulatedValueHashOp {
size_t operator()(const AccumulatedValue &v) const {
return static_cast<size_t>(v.hash);
}
};
using Shard = std::unordered_set<AccumulatedValue, AccumulatedValueHashOp, AccumulatedValueEquality>;
// First construct one of these. Then populate it in parallel by calling `insert()` from many threads.
// Then do another parallel phase calling `process()` from many threads.
class Builder {
public:
Builder(const ParallelDispatchThreadPool &thread_pool, KeyEquality equality = KeyEquality(), CollisionHandler collision_handler = CollisionHandler())
: collision_handler(std::move(collision_handler)) {
init(thread_pool.num_threads(), std::move(equality));
}
Builder(const ParallelDispatchThreadPool::Subpool &thread_pool, KeyEquality equality = KeyEquality(), CollisionHandler collision_handler = CollisionHandler())
: collision_handler(std::move(collision_handler)) {
init(thread_pool.num_threads(), std::move(equality));
}
// First call `insert` to insert all elements. All inserts must finish
// before calling any `process()`.
void insert(const ThreadIndex &thread, Value v) {
// You might think that for the single-threaded case, we can optimize by
// inserting directly into the `std::unordered_set` here. But that slows things down
// a lot and I never got around to figuring out why.
std::vector<std::vector<Value>> &buckets = all_buckets[thread.thread_num];
size_t bucket = static_cast<size_t>(v.hash) % buckets.size();
buckets[bucket].emplace_back(std::move(v));
}
// Then call `process` for each thread. All `process()`s must finish before using
// the `Builder` to construct a `ShardedHashSet`.
void process(const ThreadIndex &thread) {
int size = 0;
for (std::vector<std::vector<Value>> &buckets : all_buckets)
size += GetSize(buckets[thread.thread_num]);
Shard &shard = shards[thread.thread_num];
shard.reserve(size);
for (std::vector<std::vector<Value>> &buckets : all_buckets) {
for (Value &value : buckets[thread.thread_num])
accumulate(value, shard);
// Free as much memory as we can during the parallel phase.
std::vector<Value>().swap(buckets[thread.thread_num]);
}
}
private:
friend class ShardedHashSet<V, KeyEquality, CollisionHandler>;
void accumulate(Value &value, Shard &shard) {
// With C++20 we could make this more efficient using heterogenous lookup
AccumulatedValue accumulated_value{std::move(value.value), value.hash};
auto [it, inserted] = shard.insert(std::move(accumulated_value));
if (!inserted)
collision_handler(const_cast<typename V::Accumulated &>(it->value), accumulated_value.value);
}
void init(int num_threads, KeyEquality equality) {
all_buckets.resize(num_threads);
for (std::vector<std::vector<Value>> &buckets : all_buckets)
buckets.resize(num_threads);
for (int i = 0; i < num_threads; ++i)
shards.emplace_back(0, AccumulatedValueHashOp(), AccumulatedValueEquality(equality));
}
const CollisionHandler collision_handler;
std::vector<std::vector<std::vector<Value>>> all_buckets;
std::vector<Shard> shards;
};
// Then finally construct the hashtable:
ShardedHashSet(Builder &builder) : shards(std::move(builder.shards)) {
// Check that all necessary 'process()' calls were made.
for (std::vector<std::vector<Value>> &buckets : builder.all_buckets)
for (std::vector<Value> &bucket : buckets)
log_assert(bucket.empty());
// Free memory.
std::vector<std::vector<std::vector<Value>>>().swap(builder.all_buckets);
}
ShardedHashSet(ShardedHashSet &&other) = default;
ShardedHashSet() {}
ShardedHashSet &operator=(ShardedHashSet &&other) = default;
// Look up by `AccumulatedValue`. If we switch to C++20 then we could use
// heterogenous lookup to support looking up by `Value` here. Returns nullptr
// if the key is not found.
const typename V::Accumulated *find(const AccumulatedValue &v) const {
size_t num_shards = shards.size();
if (num_shards == 0)
return nullptr;
size_t shard = static_cast<size_t>(v.hash) % num_shards;
auto it = shards[shard].find(v);
if (it == shards[shard].end())
return nullptr;
return &it->value;
}
// Insert an element into the table. The caller is responsible for ensuring this does not
// happen concurrently with any other method calls.
void insert(AccumulatedValue v) {
size_t num_shards = shards.size();
if (num_shards == 0)
return;
size_t shard = static_cast<size_t>(v.hash) % num_shards;
shards[shard].insert(v);
}
// Call this for each shard to implement parallel destruction. For very large `ShardedHashSet`s,
// deleting all elements of all shards on a single thread can be a performance bottleneck.
void clear(const ThreadIndex &shard) {
AccumulatedValueEquality equality = shards[0].key_eq();
shards[shard.thread_num] = Shard(0, AccumulatedValueHashOp(), equality);
}
private:
std::vector<Shard> shards;
};
YOSYS_NAMESPACE_END
#endif // YOSYS_THREADING_H