change Interner to use a sharded hash table

This commit is contained in:
Jacob Lifshay 2026-02-02 15:49:26 -08:00
parent 39810043ea
commit 4ac1bcbc0a
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ

View file

@ -10,16 +10,31 @@ use hashbrown::HashTable;
use std::{
borrow::Cow,
hash::{BuildHasher, Hash},
sync::Mutex,
sync::RwLock,
};
struct InternerState<T: ?Sized + 'static + Send + Sync> {
struct InternerShard<T: ?Sized + 'static + Send + Sync> {
table: HashTable<&'static T>,
hasher: DefaultBuildHasher,
}
const LOG2_SHARD_COUNT: u32 = 6;
fn shard_index_from_hash(hash: u64) -> usize {
// number of bits used for hashbrown's Tag
const HASH_BROWN_TAG_BITS: u32 = 7;
// try to extract bits of the hash that hashbrown isn't using,
// while accounting for some hash functions only returning `usize` bits.
const SHARD_INDEX_START: u32 = usize::BITS
.saturating_sub(HASH_BROWN_TAG_BITS)
.saturating_sub(LOG2_SHARD_COUNT);
let mut shard_index = hash >> SHARD_INDEX_START;
shard_index %= 1 << LOG2_SHARD_COUNT;
shard_index as usize
}
pub(crate) struct Interner<T: ?Sized + 'static + Send + Sync> {
state: Mutex<InternerState<T>>,
shards: [RwLock<InternerShard<T>>; 1 << LOG2_SHARD_COUNT],
hasher: DefaultBuildHasher,
}
impl<T: ?Sized + 'static + Send + Sync> Interner<T> {
@ -39,10 +54,12 @@ impl<T: ?Sized + 'static + Send + Sync> Interner<T> {
impl<T: ?Sized + 'static + Send + Sync> Default for Interner<T> {
fn default() -> Self {
Self {
state: Mutex::new(InternerState {
table: HashTable::new(),
hasher: Default::default(),
}),
shards: [const {
RwLock::new(InternerShard {
table: HashTable::new(),
})
}; _],
hasher: Default::default(),
}
}
}
@ -53,14 +70,28 @@ impl<T: ?Sized + 'static + Send + Sync + Hash + Eq + ToOwned> Interner<T> {
alloc: F,
value: Cow<'_, T>,
) -> Interned<T> {
let mut state = self.state.lock().unwrap();
let InternerState { table, hasher } = &mut *state;
let inner = *table
.entry(
hasher.hash_one(&*value),
|k| **k == *value,
|k| hasher.hash_one(&**k),
)
let hash = self.hasher.hash_one(&*value);
let shard_index = shard_index_from_hash(hash);
let shard = &self.shards[shard_index];
let shard_read = shard.read().unwrap();
let Some(&inner) = shard_read.table.find(hash, |k| **k == *value) else {
drop(shard_read);
return self.intern_cold(alloc, value, hash, shard);
};
Interned { inner }
}
#[cold]
fn intern_cold<F: FnOnce(Cow<'_, T>) -> &'static T>(
&self,
alloc: F,
value: Cow<'_, T>,
hash: u64,
shard: &RwLock<InternerShard<T>>,
) -> Interned<T> {
let mut shard = shard.write().unwrap();
let inner = *shard
.table
.entry(hash, |k| **k == *value, |k| self.hasher.hash_one(&**k))
.or_insert_with(|| alloc(value))
.get();
Interned { inner }