From 4ac1bcbc0a9e4d3225608f4319c269f8ffaf1dd6 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Mon, 2 Feb 2026 15:49:26 -0800 Subject: [PATCH] change Interner to use a sharded hash table --- crates/fayalite/src/intern/interner.rs | 63 +++++++++++++++++++------- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/crates/fayalite/src/intern/interner.rs b/crates/fayalite/src/intern/interner.rs index 45899af..4e35636 100644 --- a/crates/fayalite/src/intern/interner.rs +++ b/crates/fayalite/src/intern/interner.rs @@ -10,16 +10,31 @@ use hashbrown::HashTable; use std::{ borrow::Cow, hash::{BuildHasher, Hash}, - sync::Mutex, + sync::RwLock, }; -struct InternerState { +struct InternerShard { table: HashTable<&'static T>, - hasher: DefaultBuildHasher, +} + +const LOG2_SHARD_COUNT: u32 = 6; + +fn shard_index_from_hash(hash: u64) -> usize { + // number of bits used for hashbrown's Tag + const HASH_BROWN_TAG_BITS: u32 = 7; + // try to extract bits of the hash that hashbrown isn't using, + // while accounting for some hash functions only returning `usize` bits. + const SHARD_INDEX_START: u32 = usize::BITS + .saturating_sub(HASH_BROWN_TAG_BITS) + .saturating_sub(LOG2_SHARD_COUNT); + let mut shard_index = hash >> SHARD_INDEX_START; + shard_index %= 1 << LOG2_SHARD_COUNT; + shard_index as usize } pub(crate) struct Interner { - state: Mutex>, + shards: [RwLock>; 1 << LOG2_SHARD_COUNT], + hasher: DefaultBuildHasher, } impl Interner { @@ -39,10 +54,12 @@ impl Interner { impl Default for Interner { fn default() -> Self { Self { - state: Mutex::new(InternerState { - table: HashTable::new(), - hasher: Default::default(), - }), + shards: [const { + RwLock::new(InternerShard { + table: HashTable::new(), + }) + }; _], + hasher: Default::default(), } } } @@ -53,14 +70,28 @@ impl Interner { alloc: F, value: Cow<'_, T>, ) -> Interned { - let mut state = self.state.lock().unwrap(); - let InternerState { table, hasher } = &mut *state; - let inner = *table - .entry( - hasher.hash_one(&*value), - |k| **k == *value, - |k| hasher.hash_one(&**k), - ) + let hash = self.hasher.hash_one(&*value); + let shard_index = shard_index_from_hash(hash); + let shard = &self.shards[shard_index]; + let shard_read = shard.read().unwrap(); + let Some(&inner) = shard_read.table.find(hash, |k| **k == *value) else { + drop(shard_read); + return self.intern_cold(alloc, value, hash, shard); + }; + Interned { inner } + } + #[cold] + fn intern_cold) -> &'static T>( + &self, + alloc: F, + value: Cow<'_, T>, + hash: u64, + shard: &RwLock>, + ) -> Interned { + let mut shard = shard.write().unwrap(); + let inner = *shard + .table + .entry(hash, |k| **k == *value, |k| self.hasher.hash_one(&**k)) .or_insert_with(|| alloc(value)) .get(); Interned { inner }