change Interner to use a sharded hash table

This commit is contained in:
Jacob Lifshay 2026-02-02 15:49:26 -08:00
parent 39810043ea
commit 4ac1bcbc0a
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ

View file

@ -10,16 +10,31 @@ use hashbrown::HashTable;
use std::{ use std::{
borrow::Cow, borrow::Cow,
hash::{BuildHasher, Hash}, hash::{BuildHasher, Hash},
sync::Mutex, sync::RwLock,
}; };
struct InternerState<T: ?Sized + 'static + Send + Sync> { struct InternerShard<T: ?Sized + 'static + Send + Sync> {
table: HashTable<&'static T>, table: HashTable<&'static T>,
hasher: DefaultBuildHasher, }
const LOG2_SHARD_COUNT: u32 = 6;
fn shard_index_from_hash(hash: u64) -> usize {
// number of bits used for hashbrown's Tag
const HASH_BROWN_TAG_BITS: u32 = 7;
// try to extract bits of the hash that hashbrown isn't using,
// while accounting for some hash functions only returning `usize` bits.
const SHARD_INDEX_START: u32 = usize::BITS
.saturating_sub(HASH_BROWN_TAG_BITS)
.saturating_sub(LOG2_SHARD_COUNT);
let mut shard_index = hash >> SHARD_INDEX_START;
shard_index %= 1 << LOG2_SHARD_COUNT;
shard_index as usize
} }
pub(crate) struct Interner<T: ?Sized + 'static + Send + Sync> { pub(crate) struct Interner<T: ?Sized + 'static + Send + Sync> {
state: Mutex<InternerState<T>>, shards: [RwLock<InternerShard<T>>; 1 << LOG2_SHARD_COUNT],
hasher: DefaultBuildHasher,
} }
impl<T: ?Sized + 'static + Send + Sync> Interner<T> { impl<T: ?Sized + 'static + Send + Sync> Interner<T> {
@ -39,10 +54,12 @@ impl<T: ?Sized + 'static + Send + Sync> Interner<T> {
impl<T: ?Sized + 'static + Send + Sync> Default for Interner<T> { impl<T: ?Sized + 'static + Send + Sync> Default for Interner<T> {
fn default() -> Self { fn default() -> Self {
Self { Self {
state: Mutex::new(InternerState { shards: [const {
table: HashTable::new(), RwLock::new(InternerShard {
hasher: Default::default(), table: HashTable::new(),
}), })
}; _],
hasher: Default::default(),
} }
} }
} }
@ -53,14 +70,28 @@ impl<T: ?Sized + 'static + Send + Sync + Hash + Eq + ToOwned> Interner<T> {
alloc: F, alloc: F,
value: Cow<'_, T>, value: Cow<'_, T>,
) -> Interned<T> { ) -> Interned<T> {
let mut state = self.state.lock().unwrap(); let hash = self.hasher.hash_one(&*value);
let InternerState { table, hasher } = &mut *state; let shard_index = shard_index_from_hash(hash);
let inner = *table let shard = &self.shards[shard_index];
.entry( let shard_read = shard.read().unwrap();
hasher.hash_one(&*value), let Some(&inner) = shard_read.table.find(hash, |k| **k == *value) else {
|k| **k == *value, drop(shard_read);
|k| hasher.hash_one(&**k), return self.intern_cold(alloc, value, hash, shard);
) };
Interned { inner }
}
#[cold]
fn intern_cold<F: FnOnce(Cow<'_, T>) -> &'static T>(
&self,
alloc: F,
value: Cow<'_, T>,
hash: u64,
shard: &RwLock<InternerShard<T>>,
) -> Interned<T> {
let mut shard = shard.write().unwrap();
let inner = *shard
.table
.entry(hash, |k| **k == *value, |k| self.hasher.hash_one(&**k))
.or_insert_with(|| alloc(value)) .or_insert_with(|| alloc(value))
.get(); .get();
Interned { inner } Interned { inner }