diff --git a/crates/fayalite/Cargo.toml b/crates/fayalite/Cargo.toml index 2652792..f176698 100644 --- a/crates/fayalite/Cargo.toml +++ b/crates/fayalite/Cargo.toml @@ -40,6 +40,7 @@ fayalite-visit-gen.workspace = true [features] unstable-doc = [] +unstable-test-hasher = [] [package.metadata.docs.rs] features = ["unstable-doc"] diff --git a/crates/fayalite/src/util.rs b/crates/fayalite/src/util.rs index 8d90135..ebc3f6d 100644 --- a/crates/fayalite/src/util.rs +++ b/crates/fayalite/src/util.rs @@ -8,8 +8,12 @@ mod const_usize; mod misc; mod scoped_ref; pub(crate) mod streaming_read_utf8; +mod test_hasher; // allow easily switching the hasher crate-wide for testing +#[cfg(feature = "unstable-test-hasher")] +pub type DefaultBuildHasher = test_hasher::DefaultBuildHasher; +#[cfg(not(feature = "unstable-test-hasher"))] pub(crate) type DefaultBuildHasher = hashbrown::hash_map::DefaultHashBuilder; pub(crate) type HashMap = hashbrown::HashMap; diff --git a/crates/fayalite/src/util/test_hasher.rs b/crates/fayalite/src/util/test_hasher.rs new file mode 100644 index 0000000..2a0cdd4 --- /dev/null +++ b/crates/fayalite/src/util/test_hasher.rs @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information +#![cfg(feature = "unstable-test-hasher")] + +use std::{ + fmt::Write as _, + hash::{BuildHasher, Hash, Hasher}, + io::Write as _, + marker::PhantomData, + sync::LazyLock, +}; + +type BoxDynHasher = Box; +type BoxDynBuildHasher = Box; +type BoxDynMakeBuildHasher = Box BoxDynBuildHasher + Send + Sync>; + +trait TryGetDynBuildHasher: Copy { + type Type; + fn try_get_make_build_hasher(self) -> Option; +} + +impl TryGetDynBuildHasher for PhantomData { + type Type = T; + fn try_get_make_build_hasher(self) -> Option { + None + } +} + +impl + Send + Sync + 'static + Clone> + TryGetDynBuildHasher for &'_ PhantomData +{ + type Type = T; + fn try_get_make_build_hasher(self) -> Option { + Some(Box::new(|| Box::>::default())) + } +} + +#[derive(Default, Clone)] +struct DynBuildHasher(T); + +trait DynBuildHasherTrait: BuildHasher { + fn clone_dyn_build_hasher(&self) -> BoxDynBuildHasher; +} + +impl> BuildHasher for DynBuildHasher { + type Hasher = BoxDynHasher; + + fn build_hasher(&self) -> Self::Hasher { + Box::new(self.0.build_hasher()) + } + + fn hash_one(&self, x: T) -> u64 { + self.0.hash_one(x) + } +} + +impl DynBuildHasherTrait for DynBuildHasher +where + Self: Clone + BuildHasher + Send + Sync + 'static, +{ + fn clone_dyn_build_hasher(&self) -> BoxDynBuildHasher { + Box::new(self.clone()) + } +} + +pub struct DefaultBuildHasher(BoxDynBuildHasher); + +impl Clone for DefaultBuildHasher { + fn clone(&self) -> Self { + DefaultBuildHasher(self.0.clone_dyn_build_hasher()) + } +} + +const ENV_VAR_NAME: &'static str = "FAYALITE_TEST_HASHER"; + +struct EnvVarValue { + key: &'static str, + try_get_make_build_hasher: fn() -> Option, + description: &'static str, +} + +macro_rules! env_var_value { + ( + key: $key:literal, + build_hasher: $build_hasher:ty, + description: $description:literal, + ) => { + EnvVarValue { + key: $key, + try_get_make_build_hasher: || { + // use rust method resolution to detect if $build_hasher is usable + // (e.g. hashbrown's hasher won't be usable without the right feature enabled) + (&PhantomData::>).try_get_make_build_hasher() + }, + description: $description, + } + }; +} + +#[derive(Default)] +struct AlwaysZeroHasher; + +impl Hasher for AlwaysZeroHasher { + fn write(&mut self, _bytes: &[u8]) {} + fn finish(&self) -> u64 { + 0 + } +} + +const ENV_VAR_VALUES: &'static [EnvVarValue] = &[ + env_var_value! { + key: "std", + build_hasher: std::hash::RandomState, + description: "use std::hash::RandomState", + }, + env_var_value! { + key: "hashbrown", + build_hasher: hashbrown::hash_map::DefaultHashBuilder, + description: "use hashbrown's DefaultHashBuilder", + }, + env_var_value! { + key: "always_zero", + build_hasher: std::hash::BuildHasherDefault, + description: "use a hasher that always returns 0 for all hashes,\n \ + this is useful for checking that PartialEq impls are correct", + }, +]; + +fn report_bad_env_var(msg: impl std::fmt::Display) -> ! { + let mut msg = format!("{ENV_VAR_NAME}: {msg}\n"); + for &EnvVarValue { + key, + try_get_make_build_hasher, + description, + } in ENV_VAR_VALUES + { + let availability = match try_get_make_build_hasher() { + Some(_) => "available", + None => "unavailable", + }; + writeln!(msg, "{key}: ({availability})\n {description}").expect("can't fail"); + } + std::io::stderr() + .write_all(msg.as_bytes()) + .expect("should be able to write to stderr"); + std::process::abort(); +} + +impl Default for DefaultBuildHasher { + fn default() -> Self { + static DEFAULT_FN: LazyLock = LazyLock::new(|| { + let var = std::env::var_os(ENV_VAR_NAME); + let var = var.as_deref().unwrap_or("std".as_ref()); + for &EnvVarValue { + key, + try_get_make_build_hasher, + description: _, + } in ENV_VAR_VALUES + { + if var.as_encoded_bytes().eq_ignore_ascii_case(key.as_bytes()) { + return try_get_make_build_hasher().unwrap_or_else(|| { + report_bad_env_var(format_args!( + "unavailable hasher: {key} (is the appropriate feature enabled?)" + )); + }); + } + } + report_bad_env_var(format_args!("unrecognized hasher: {var:?}")); + }); + Self(DEFAULT_FN()) + } +} + +pub struct DefaultHasher(BoxDynHasher); + +impl BuildHasher for DefaultBuildHasher { + type Hasher = DefaultHasher; + + fn build_hasher(&self) -> Self::Hasher { + DefaultHasher(self.0.build_hasher()) + } +} + +impl Hasher for DefaultHasher { + fn finish(&self) -> u64 { + self.0.finish() + } + + fn write(&mut self, bytes: &[u8]) { + self.0.write(bytes) + } + + fn write_u8(&mut self, i: u8) { + self.0.write_u8(i) + } + + fn write_u16(&mut self, i: u16) { + self.0.write_u16(i) + } + + fn write_u32(&mut self, i: u32) { + self.0.write_u32(i) + } + + fn write_u64(&mut self, i: u64) { + self.0.write_u64(i) + } + + fn write_u128(&mut self, i: u128) { + self.0.write_u128(i) + } + + fn write_usize(&mut self, i: usize) { + self.0.write_usize(i) + } + + fn write_i8(&mut self, i: i8) { + self.0.write_i8(i) + } + + fn write_i16(&mut self, i: i16) { + self.0.write_i16(i) + } + + fn write_i32(&mut self, i: i32) { + self.0.write_i32(i) + } + + fn write_i64(&mut self, i: i64) { + self.0.write_i64(i) + } + + fn write_i128(&mut self, i: i128) { + self.0.write_i128(i) + } + + fn write_isize(&mut self, i: isize) { + self.0.write_isize(i) + } +}