speed up interning #61

Merged
programmerjake merged 5 commits from programmerjake/fayalite:speed-up-interning into master 2026-02-03 01:58:52 +00:00
7 changed files with 277 additions and 85 deletions

5
Cargo.lock generated
View file

@ -319,6 +319,7 @@ dependencies = [
"jobslot",
"num-bigint",
"num-traits",
"once_cell",
"ordered-float",
"petgraph",
"serde",
@ -521,9 +522,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.19.0"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "ordered-float"

View file

@ -30,6 +30,7 @@ indexmap = { version = "2.5.0", features = ["serde"] }
jobslot = "0.2.23"
num-bigint = "0.4.6"
num-traits = "0.2.16"
once_cell = "1.21.3"
ordered-float = { version = "5.1.0", features = ["serde"] }
petgraph = "0.8.1"
prettyplease = "0.2.20"

View file

@ -26,6 +26,7 @@ hashbrown.workspace = true
jobslot.workspace = true
num-bigint.workspace = true
num-traits.workspace = true
once_cell.workspace = true
ordered-float.workspace = true
petgraph.workspace = true
serde_json.workspace = true

View file

@ -10,7 +10,7 @@ use crate::{
value_category::ValueCategoryValue,
},
hdl,
intern::{Intern, Interned, Memoize},
intern::{Intern, Interned, Memoize, OnceInterned},
sim::value::{SimValue, ToSimValueWithType},
source_location::SourceLocation,
ty::{
@ -65,14 +65,21 @@ pub type DynSize = ConstUsize<DYN_SIZE>;
trait KnownSizeBaseSealed {}
impl<const N: usize> KnownSizeBaseSealed for [(); N] {}
impl<const N: usize> KnownSizeBaseSealed for ConstUsize<N> {}
#[expect(private_bounds)]
pub trait KnownSizeBase: KnownSizeBaseSealed {}
pub trait KnownSizeBase: KnownSizeBaseSealed + GetInternedIntCaches {}
macro_rules! impl_known_size_base {
($($size:literal),* $(,)?) => {
$(impl KnownSizeBase for [(); $size] {})*
$(impl KnownSizeBase for ConstUsize<$size> {})*
$(impl GetInternedIntCaches for ConstUsize<$size> {
#[inline(always)]
fn get_interned_int_caches() -> &'static InternedIntCaches<Self> {
static CACHES: InternedIntCaches<ConstUsize<$size>> = InternedIntCaches::new();
&CACHES
}
})*
};
}
@ -113,12 +120,34 @@ impl_known_size_base! {
0x200,
}
trait GetInternedIntCaches {
fn get_interned_int_caches() -> &'static InternedIntCaches<Self>
where
Self: KnownSize;
}
struct InternedIntCaches<Width: KnownSize> {
uint: OnceInterned<UIntType<Width>>,
sint: OnceInterned<SIntType<Width>>,
}
impl<Width: KnownSize> InternedIntCaches<Width> {
const fn new() -> Self {
Self {
uint: OnceInterned::new(),
sint: OnceInterned::new(),
}
}
}
#[expect(private_bounds)]
pub trait KnownSize:
GenericConstUsize
+ sealed::SizeTypeSealed
+ sealed::SizeSealed
+ Default
+ FillInDefaultedGenerics<Type = Self>
+ GetInternedIntCaches
{
const SIZE: Self;
type ArrayMatch<Element: Type>: AsRef<[Expr<Element>]>
@ -148,7 +177,7 @@ pub trait KnownSize:
impl<const N: usize> KnownSize for ConstUsize<N>
where
[(); N]: KnownSizeBase,
ConstUsize<N>: KnownSizeBase,
{
const SIZE: Self = Self;
type ArrayMatch<Element: Type> = [Expr<Element>; N];
@ -221,6 +250,10 @@ pub trait Size:
fn from_usize(v: usize) -> Self::SizeType {
Self::try_from_usize(v).expect("wrong size")
}
#[doc(hidden)]
fn interned_uint(size_type: Self::SizeType) -> Interned<UIntType<Self>>;
#[doc(hidden)]
fn interned_sint(size_type: Self::SizeType) -> Interned<SIntType<Self>>;
}
impl sealed::SizeTypeSealed for usize {}
@ -229,6 +262,8 @@ impl SizeType for usize {
type Size = DynSize;
}
const MAX_CACHED_INT_WIDTH: usize = 1 << 10;
impl Size for DynSize {
type ArrayMatch<Element: Type> = Box<[Expr<Element>]>;
type ArraySimValue<Element: Type> = Box<[SimValue<Element>]>;
@ -242,6 +277,36 @@ impl Size for DynSize {
fn try_from_usize(v: usize) -> Option<Self::SizeType> {
Some(v)
}
#[doc(hidden)]
fn interned_uint(size_type: Self::SizeType) -> Interned<UIntType<Self>> {
static CACHED: [OnceInterned<UInt>; MAX_CACHED_INT_WIDTH] =
[const { OnceInterned::new() }; _];
#[cold]
fn intern_cold(width: usize) -> Interned<UInt> {
Intern::intern_sized(UInt::new(width))
}
if let Some(cached) = CACHED.get(size_type) {
cached.get_or_init(|| intern_cold(size_type))
} else {
intern_cold(size_type)
}
}
#[doc(hidden)]
fn interned_sint(size_type: Self::SizeType) -> Interned<SIntType<Self>> {
static CACHED: [OnceInterned<SInt>; MAX_CACHED_INT_WIDTH] =
[const { OnceInterned::new() }; _];
#[cold]
fn intern_cold(width: usize) -> Interned<SInt> {
Intern::intern_sized(SInt::new(width))
}
if let Some(cached) = CACHED.get(size_type) {
cached.get_or_init(|| intern_cold(size_type))
} else {
intern_cold(size_type)
}
}
}
impl<const VALUE: usize> sealed::SizeSealed for ConstUsize<VALUE> {}
@ -267,6 +332,20 @@ impl<T: KnownSize> Size for T {
fn try_from_usize(v: usize) -> Option<Self::SizeType> {
if v == T::VALUE { Some(T::SIZE) } else { None }
}
#[doc(hidden)]
fn interned_uint(_size_type: Self::SizeType) -> Interned<UIntType<Self>> {
T::get_interned_int_caches()
.uint
.get_or_init(|| UIntType::new_static().intern_sized())
}
#[doc(hidden)]
fn interned_sint(_size_type: Self::SizeType) -> Interned<SIntType<Self>> {
T::get_interned_int_caches()
.sint
.get_or_init(|| SIntType::new_static().intern_sized())
}
}
#[derive(Clone, PartialEq, Eq, Debug)]
@ -586,7 +665,7 @@ macro_rules! impl_valueless_op_forward {
}
macro_rules! impl_int {
($pretty_name:ident, $name:ident, $generic_name:ident, $value:ident, $SIGNED:literal) => {
($pretty_name:ident, $name:ident, $generic_name:ident, $value:ident, $SIGNED:literal, $interned_int:ident) => {
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
#[repr(transparent)]
pub struct $name<Width: Size = DynSize> {
@ -1003,7 +1082,7 @@ macro_rules! impl_int {
type Output = $name<Width::Size>;
fn index(&self, width: Width) -> &Self::Output {
Interned::into_inner(Intern::intern_sized($name::new(width)))
Interned::into_inner(Width::Size::$interned_int(width))
}
}
@ -1184,8 +1263,22 @@ macro_rules! impl_int {
};
}
impl_int!(UInt, UIntType, UIntWithoutGenerics, UIntValue, false);
impl_int!(SInt, SIntType, SIntWithoutGenerics, SIntValue, true);
impl_int!(
UInt,
UIntType,
UIntWithoutGenerics,
UIntValue,
false,
interned_uint
);
impl_int!(
SInt,
SIntType,
SIntWithoutGenerics,
SIntValue,
true,
interned_sint
);
impl UInt {
/// gets the smallest `UInt` that fits `v` losslessly

View file

@ -4,6 +4,7 @@
use crate::{intern::type_map::TypeIdMap, util::DefaultBuildHasher};
use bitvec::{ptr::BitPtr, slice::BitSlice, vec::BitVec};
use hashbrown::HashTable;
use once_cell::race::OnceRef;
use serde::{Deserialize, Serialize};
use std::{
any::{Any, TypeId},
@ -16,9 +17,10 @@ use std::{
marker::PhantomData,
ops::Deref,
path::{Path, PathBuf},
sync::{Mutex, RwLock},
sync::RwLock,
};
mod interner;
mod type_map;
pub trait LazyInternedTrait<T: ?Sized + Send + Sync + 'static>: Send + Sync + Any {
@ -593,71 +595,6 @@ impl<T: ?Sized + 'static + Send + Sync + ToOwned> From<Interned<T>> for Cow<'_,
}
}
struct InternerState<T: ?Sized + 'static + Send + Sync> {
table: HashTable<&'static T>,
hasher: DefaultBuildHasher,
}
pub struct Interner<T: ?Sized + 'static + Send + Sync> {
state: Mutex<InternerState<T>>,
}
impl<T: ?Sized + 'static + Send + Sync> Interner<T> {
fn get() -> &'static Interner<T> {
static TYPE_ID_MAP: TypeIdMap = TypeIdMap::new();
TYPE_ID_MAP.get_or_insert_default()
}
}
impl<T: ?Sized + 'static + Send + Sync> Default for Interner<T> {
fn default() -> Self {
Self {
state: Mutex::new(InternerState {
table: HashTable::new(),
hasher: Default::default(),
}),
}
}
}
impl<T: ?Sized + 'static + Send + Sync + Hash + Eq + ToOwned> Interner<T> {
fn intern<F: FnOnce(Cow<'_, T>) -> &'static T>(
&self,
alloc: F,
value: Cow<'_, T>,
) -> Interned<T> {
let mut state = self.state.lock().unwrap();
let InternerState { table, hasher } = &mut *state;
let inner = *table
.entry(
hasher.hash_one(&*value),
|k| **k == *value,
|k| hasher.hash_one(&**k),
)
.or_insert_with(|| alloc(value))
.get();
Interned { inner }
}
}
impl<T: Clone + 'static + Send + Sync + Hash + Eq> Interner<T> {
fn intern_sized(&self, value: Cow<'_, T>) -> Interned<T> {
self.intern(|value| Box::leak(Box::new(value.into_owned())), value)
}
}
impl<T: Clone + 'static + Send + Sync + Hash + Eq> Interner<[T]> {
fn intern_slice(&self, value: Cow<'_, [T]>) -> Interned<[T]> {
self.intern(|value| value.into_owned().leak(), value)
}
}
impl Interner<BitSlice> {
fn intern_bit_slice(&self, value: Cow<'_, BitSlice>) -> Interned<BitSlice> {
self.intern(|value| value.into_owned().leak(), value)
}
}
pub struct Interned<T: ?Sized + 'static + Send + Sync> {
inner: &'static T,
}
@ -977,7 +914,7 @@ impl<T: Clone + Send + Sync + 'static + Hash + Eq> Intern for T {
where
Self: ToOwned,
{
Interner::get().intern_sized(this)
interner::Interner::get().intern_sized(this)
}
}
@ -997,7 +934,7 @@ impl<T: Clone + Send + Sync + 'static + Hash + Eq> Intern for [T] {
where
Self: ToOwned,
{
Interner::get().intern_slice(this)
interner::Interner::get().intern_slice(this)
}
}
@ -1017,7 +954,7 @@ impl Intern for BitSlice {
where
Self: ToOwned,
{
Interner::get().intern_bit_slice(this)
interner::Interner::get().intern_bit_slice(this)
}
}
@ -1035,10 +972,17 @@ pub trait MemoizeGeneric: 'static + Send + Sync + Hash + Eq + Copy {
fn inner(self, input: Self::InputRef<'_>) -> Self::Output;
fn get_cow(self, input: Self::InputCow<'_>) -> Self::Output {
static TYPE_ID_MAP: TypeIdMap = TypeIdMap::new();
thread_local! {
static TYPE_ID_MAP_CACHE: TypeIdMap = const { TypeIdMap::new() };
}
let map: &RwLock<(
DefaultBuildHasher,
HashTable<(Self, Self::InputOwned, Self::Output)>,
)> = TYPE_ID_MAP.get_or_insert_default();
)> = TYPE_ID_MAP_CACHE.with(|cache| {
cache.get_or_insert_with(|| {
TYPE_ID_MAP.get_or_insert_with(|| Box::leak(Default::default()))
})
});
fn hash_eq_key<'a, 'b, T: MemoizeGeneric>(
this: &'a T,
input: T::InputRef<'b>,
@ -1140,3 +1084,35 @@ pub trait Memoize: 'static + Send + Sync + Hash + Eq + Copy {
self.get_cow(Cow::Borrowed(input))
}
}
/// like `once_cell::race::OnceBox` but for `Interned<T>` instead of `Box<T>`
pub struct OnceInterned<T: 'static + Send + Sync>(OnceRef<'static, T>);
impl<T: 'static + Send + Sync + fmt::Debug> fmt::Debug for OnceInterned<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("OnceInterned").field(&self.get()).finish()
}
}
impl<T: 'static + Send + Sync> Default for OnceInterned<T> {
fn default() -> Self {
Self::new()
}
}
impl<T: 'static + Send + Sync> OnceInterned<T> {
pub const fn new() -> Self {
Self(OnceRef::new())
}
pub fn set(&self, v: Interned<T>) -> Result<(), ()> {
self.0.set(v.inner)
}
pub fn get(&self) -> Option<Interned<T>> {
self.0.get().map(|inner| Interned { inner })
}
pub fn get_or_init<F: FnOnce() -> Interned<T>>(&self, f: F) -> Interned<T> {
Interned {
inner: self.0.get_or_init(|| f().inner),
}
}
}

View file

@ -0,0 +1,117 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::{
intern::{Interned, type_map::TypeIdMap},
util::DefaultBuildHasher,
};
use bitvec::slice::BitSlice;
use hashbrown::HashTable;
use std::{
borrow::Cow,
hash::{BuildHasher, Hash},
sync::RwLock,
};
struct InternerShard<T: ?Sized + 'static + Send + Sync> {
table: HashTable<&'static T>,
}
const LOG2_SHARD_COUNT: u32 = 6;
fn shard_index_from_hash(hash: u64) -> usize {
// number of bits used for hashbrown's Tag
const HASH_BROWN_TAG_BITS: u32 = 7;
// try to extract bits of the hash that hashbrown isn't using,
// while accounting for some hash functions only returning `usize` bits.
const SHARD_INDEX_START: u32 = usize::BITS
.saturating_sub(HASH_BROWN_TAG_BITS)
.saturating_sub(LOG2_SHARD_COUNT);
let mut shard_index = hash >> SHARD_INDEX_START;
shard_index %= 1 << LOG2_SHARD_COUNT;
shard_index as usize
}
pub(crate) struct Interner<T: ?Sized + 'static + Send + Sync> {
shards: [RwLock<InternerShard<T>>; 1 << LOG2_SHARD_COUNT],
hasher: DefaultBuildHasher,
}
impl<T: ?Sized + 'static + Send + Sync> Interner<T> {
pub(crate) fn get() -> &'static Interner<T> {
static TYPE_ID_MAP: TypeIdMap = TypeIdMap::new();
thread_local! {
static TYPE_ID_MAP_CACHE: TypeIdMap = const { TypeIdMap::new() };
}
TYPE_ID_MAP_CACHE.with(|cache| {
cache.get_or_insert_with(|| {
TYPE_ID_MAP.get_or_insert_with(|| Box::leak(Default::default()))
})
})
}
}
impl<T: ?Sized + 'static + Send + Sync> Default for Interner<T> {
fn default() -> Self {
Self {
shards: [const {
RwLock::new(InternerShard {
table: HashTable::new(),
})
}; _],
hasher: Default::default(),
}
}
}
impl<T: ?Sized + 'static + Send + Sync + Hash + Eq + ToOwned> Interner<T> {
fn intern<F: FnOnce(Cow<'_, T>) -> &'static T>(
&self,
alloc: F,
value: Cow<'_, T>,
) -> Interned<T> {
let hash = self.hasher.hash_one(&*value);
let shard_index = shard_index_from_hash(hash);
let shard = &self.shards[shard_index];
let shard_read = shard.read().unwrap();
let Some(&inner) = shard_read.table.find(hash, |k| **k == *value) else {
drop(shard_read);
return self.intern_cold(alloc, value, hash, shard);
};
Interned { inner }
}
#[cold]
fn intern_cold<F: FnOnce(Cow<'_, T>) -> &'static T>(
&self,
alloc: F,
value: Cow<'_, T>,
hash: u64,
shard: &RwLock<InternerShard<T>>,
) -> Interned<T> {
let mut shard = shard.write().unwrap();
let inner = *shard
.table
.entry(hash, |k| **k == *value, |k| self.hasher.hash_one(&**k))
.or_insert_with(|| alloc(value))
.get();
Interned { inner }
}
}
impl<T: Clone + 'static + Send + Sync + Hash + Eq> Interner<T> {
pub(crate) fn intern_sized(&self, value: Cow<'_, T>) -> Interned<T> {
self.intern(|value| Box::leak(Box::new(value.into_owned())), value)
}
}
impl<T: Clone + 'static + Send + Sync + Hash + Eq> Interner<[T]> {
pub(crate) fn intern_slice(&self, value: Cow<'_, [T]>) -> Interned<[T]> {
self.intern(|value| value.into_owned().leak(), value)
}
}
impl Interner<BitSlice> {
pub(crate) fn intern_bit_slice(&self, value: Cow<'_, BitSlice>) -> Interned<BitSlice> {
self.intern(|value| value.into_owned().leak(), value)
}
}

View file

@ -87,20 +87,23 @@ impl TypeIdMap {
fn insert_slow(
&self,
type_id: TypeId,
make: fn() -> Box<dyn Any + Sync + Send>,
make: impl FnOnce() -> &'static (dyn Any + Sync + Send),
) -> &'static (dyn Any + Sync + Send) {
let value = Box::leak(make());
let value = make();
let mut write_guard = self.0.write().unwrap();
*write_guard.entry(type_id).or_insert(value)
}
pub(crate) fn get_or_insert_default<T: Sized + Any + Send + Sync + Default>(&self) -> &T {
pub(crate) fn get_or_insert_with<T: Sized + Any + Send + Sync>(
&self,
make: impl FnOnce() -> &'static T,
) -> &'static T {
let type_id = TypeId::of::<T>();
let read_guard = self.0.read().unwrap();
let retval = read_guard.get(&type_id).map(|v| *v);
drop(read_guard);
let retval = match retval {
Some(retval) => retval,
None => self.insert_slow(type_id, move || Box::new(T::default())),
None => self.insert_slow(type_id, move || make()),
};
retval.downcast_ref().expect("known to have correct TypeId")
}