From 759c5de5b4c17d7d9783f85f43bc44688b801be9 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Wed, 4 Feb 2026 17:55:14 -0800 Subject: [PATCH] WIP adding fetch::l1_i_cache --- crates/cpu/src/config.rs | 34 ++++ crates/cpu/src/fetch.rs | 266 ++++++++++++++++++++++++++++ crates/cpu/src/lib.rs | 1 + crates/cpu/src/next_pc.rs | 8 +- crates/cpu/tests/expected/fetch.vcd | 0 crates/cpu/tests/fetch.rs | 185 +++++++++++++++++++ 6 files changed, 490 insertions(+), 4 deletions(-) create mode 100644 crates/cpu/src/fetch.rs create mode 100644 crates/cpu/tests/expected/fetch.vcd create mode 100644 crates/cpu/tests/fetch.rs diff --git a/crates/cpu/src/config.rs b/crates/cpu/src/config.rs index cf2fd08..e2145d0 100644 --- a/crates/cpu/src/config.rs +++ b/crates/cpu/src/config.rs @@ -37,6 +37,8 @@ pub struct CpuConfig { pub max_branches_per_fetch: NonZeroUsize, pub max_fetches_in_flight: NonZeroUsize, pub log2_fetch_width_in_bytes: u8, + pub log2_cache_line_size_in_bytes: u8, + pub l1_i_cache_line_count: NonZeroUsize, /// default value for [`UnitConfig::max_in_flight`] pub default_unit_max_in_flight: NonZeroUsize, pub rob_size: NonZeroUsize, @@ -63,6 +65,13 @@ impl CpuConfig { v }; pub const DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES: u8 = 3; + pub const DEFAULT_LOG2_CACHE_LINE_SIZE_IN_BYTES: u8 = 6; + pub const DEFAULT_L1_I_CACHE_LINE_COUNT: NonZeroUsize = { + let Some(v) = NonZeroUsize::new(256) else { + unreachable!(); + }; + v + }; pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = { let Some(v) = NonZeroUsize::new(8) else { unreachable!(); @@ -77,6 +86,8 @@ impl CpuConfig { max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH, max_fetches_in_flight: Self::DEFAULT_MAX_FETCHES_IN_FLIGHT, log2_fetch_width_in_bytes: Self::DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES, + log2_cache_line_size_in_bytes: Self::DEFAULT_LOG2_CACHE_LINE_SIZE_IN_BYTES, + l1_i_cache_line_count: Self::DEFAULT_L1_I_CACHE_LINE_COUNT, default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT, rob_size, } @@ -141,6 +152,17 @@ impl CpuConfig { .checked_shl(self.log2_fetch_width_in_bytes.into()) .expect("log2_fetch_width_in_bytes is too big") } + pub fn cache_line_size_in_bytes(&self) -> usize { + 1usize + .checked_shl(self.log2_cache_line_size_in_bytes.into()) + .expect("log2_cache_line_size_in_bytes is too big") + } + pub fn l1_i_cache_size_in_bytes(&self) -> usize { + self.l1_i_cache_line_count + .get() + .checked_mul(self.cache_line_size_in_bytes()) + .expect("L1 I-Cache is too big") + } } #[hdl(get(|c| c.fetch_width.get()))] @@ -161,6 +183,18 @@ pub type CpuConfigLog2FetchWidthInBytes> = DynSize #[hdl(get(|c| c.fetch_width_in_bytes()))] pub type CpuConfigFetchWidthInBytes> = DynSize; +#[hdl(get(|c| c.log2_cache_line_size_in_bytes.into()))] +pub type CpuConfigLog2CacheLineSizeInBytes> = DynSize; + +#[hdl(get(|c| c.cache_line_size_in_bytes()))] +pub type CpuConfigCacheLineSizeInBytes> = DynSize; + +#[hdl(get(|c| c.l1_i_cache_line_count.get()))] +pub type CpuConfigL1ICacheLineCount> = DynSize; + +#[hdl(get(|c| c.l1_i_cache_size_in_bytes()))] +pub type CpuConfigL1ICacheSizeInBytes> = DynSize; + #[hdl(get(|c| c.rob_size.get()))] pub type CpuConfigRobSize> = DynSize; diff --git a/crates/cpu/src/fetch.rs b/crates/cpu/src/fetch.rs new file mode 100644 index 0000000..571a438 --- /dev/null +++ b/crates/cpu/src/fetch.rs @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use crate::{ + config::{ + CpuConfig, CpuConfigCacheLineSizeInBytes, CpuConfigFetchWidthInBytes, + CpuConfigL1ICacheLineCount, PhantomConstCpuConfig, + }, + next_pc::{NextPcToFetchInterface, ResetStatus, ResetSteps, SimValueDefault}, +}; +use fayalite::{ + memory::{ReadWriteStruct, memory_addr_width, splat_mask}, + prelude::*, + util::ready_valid::ReadyValid, +}; + +#[hdl] +pub enum MemoryOperationKind { + Read, + Write, +} + +#[hdl(no_static)] +pub struct MemoryOperationStart + PhantomConstCpuConfig> { + pub kind: MemoryOperationKind, + pub addr: UInt<64>, + pub write_data: ArrayType, CpuConfigFetchWidthInBytes>, + pub config: C, +} + +#[hdl] +pub enum MemoryOperationErrorKind { + Generic, +} + +#[hdl] +pub enum MemoryOperationFinishKind { + Success(MemoryOperationKind), + Error(MemoryOperationErrorKind), +} + +#[hdl(no_static)] +pub struct MemoryOperationFinish + PhantomConstCpuConfig> { + pub kind: MemoryOperationFinishKind, + pub read_data: ArrayType, CpuConfigFetchWidthInBytes>, + pub config: C, +} + +#[hdl(no_static)] +pub struct MemoryInterface + PhantomConstCpuConfig> { + pub start: ReadyValid>, + #[hdl(flip)] + pub finish: ReadyValid>, + pub config: C, +} + +#[hdl(no_static)] +struct CacheLine + PhantomConstCpuConfig> { + data: ArrayType, CpuConfigCacheLineSizeInBytes>, + addr: HdlOption>, + config: C, +} + +#[hdl(no_static)] +struct L1ICacheState + PhantomConstCpuConfig> { + config: C, +} + +impl SimValueDefault for L1ICacheState { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { config } = self; + #[hdl(sim)] + Self { config } + } +} + +impl ResetSteps for L1ICacheState { + #[hdl] + fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { config: _ } = this; + ResetStatus::Done + } +} + +#[hdl_module(extern)] +fn l1_i_cache_impl(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let memory_interface: MemoryInterface> = + m.output(MemoryInterface[config]); + #[hdl] + let from_next_pc: NextPcToFetchInterface> = + m.input(NextPcToFetchInterface[config]); + // i_cache_port.clk is externally overridden with cd.clk + #[hdl] + let i_cache_port: ReadWriteStruct>, DynSize> = m.output( + ReadWriteStruct[CacheLine[config]][memory_addr_width(CpuConfigL1ICacheLineCount[config])], + ); + #[hdl] + let state_for_debug: L1ICacheState> = m.output(L1ICacheState[config]); + m.register_clock_for_past(cd.clk); + #[hdl] + async fn run( + mut sim: ExternModuleSimulationState, + cd: Expr, + memory_interface: Expr>>, + from_next_pc: Expr>>, + i_cache_port: Expr>, DynSize>>, + state_expr: Expr>>, + ) { + let mut state = sim.read(state_expr).await; + let config = state.config.ty(); + let l1_i_cache_line_count = CpuConfigL1ICacheLineCount[config]; + let cache_line_ty = CacheLine[config]; + for step in 0usize.. { + sim.write(state_expr, state).await; + sim.wait_for_clock_edge(cd.clk).await; + state = sim.read_past(state_expr, cd.clk).await; + sim.write(i_cache_port.en, false).await; + let mut reset_status = ResetSteps::reset_step(&mut state, step); + if step < l1_i_cache_line_count { + reset_status = ResetStatus::Working; + #[hdl] + let ReadWriteStruct::<_, _> { + addr, + en, + clk: _, // externally overridden with cd.clk + rdata: _, + wmode, + wdata, + wmask, + } = i_cache_port; + sim.write(addr, step.cast_to(addr.ty())).await; + sim.write(en, true).await; + sim.write(wmode, true).await; + sim.write( + wdata, + #[hdl(sim)] + CacheLine::<_> { + data: repeat(0u8, cache_line_ty.data.len()), + addr: #[hdl(sim)] + HdlNone(), + config, + }, + ) + .await; + sim.write(wmask, splat_mask(cache_line_ty, true.to_expr())) + .await; + } + match reset_status { + ResetStatus::Done => break, + ResetStatus::Working => {} + } + } + todo!(); + } + m.extern_module_simulation_fn( + ( + cd, + memory_interface, + from_next_pc, + i_cache_port, + state_for_debug, + ), + |(cd, memory_interface, from_next_pc, i_cache_port, state_for_debug), mut sim| async move { + let config = memory_interface.ty().config; + let cache_line_size_in_bytes = CpuConfigCacheLineSizeInBytes[config]; + let cache_line_ty = CacheLine[config]; + sim.write(i_cache_port.clk, false).await; // externally overridden with cd.clk, so just write a constant here + sim.resettable( + cd, + |mut sim: ExternModuleSimulationState| async move { + sim.write(memory_interface.start.ready, false).await; + sim.write(memory_interface.finish.ready, false).await; + sim.write( + from_next_pc.next_fetch_block_ids, + from_next_pc.ty().next_fetch_block_ids.HdlNone(), + ) + .await; + sim.write(from_next_pc.fetch.ready, false).await; + sim.write(from_next_pc.cancel.ready, false).await; + sim.write(i_cache_port.addr, 0u8.cast_to(i_cache_port.addr.ty())) + .await; + sim.write(i_cache_port.en, false).await; + sim.write(i_cache_port.wmode, false).await; + sim.write( + i_cache_port.wdata, + #[hdl(sim)] + CacheLine::<_> { + data: repeat(0u8, cache_line_size_in_bytes), + addr: HdlNone(), + config, + }, + ) + .await; + sim.write( + i_cache_port.wmask, + splat_mask(cache_line_ty, false.to_expr()), + ) + .await; + sim.write( + state_for_debug, + #[hdl(sim)] + L1ICacheState::<_> { config }, + ) + .await; + }, + |sim, ()| { + run( + sim, + cd, + memory_interface, + from_next_pc, + i_cache_port, + state_for_debug, + ) + }, + ) + .await; + }, + ); +} + +#[hdl_module] +pub fn l1_i_cache(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let memory_interface: MemoryInterface> = + m.output(MemoryInterface[config]); + #[hdl] + let from_next_pc: NextPcToFetchInterface> = + m.input(NextPcToFetchInterface[config]); + let cache_line_ty = CacheLine[config]; + let cache_line_count = CpuConfigL1ICacheLineCount[config]; + #[hdl] + let mut i_cache = memory_array(ArrayType[cache_line_ty][cache_line_count]); + let i_cache_port = i_cache.new_rw_port(); + #[hdl] + let l1_i_cache_impl = instance(l1_i_cache_impl(config)); + connect(l1_i_cache_impl.cd, cd); + connect(memory_interface, l1_i_cache_impl.memory_interface); + connect(l1_i_cache_impl.from_next_pc, from_next_pc); + connect(i_cache_port, l1_i_cache_impl.i_cache_port); + connect(i_cache_port.clk, cd.clk); +} + +#[hdl_module] +pub fn fetch(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let memory_interface: MemoryInterface> = + m.output(MemoryInterface[config]); + #[hdl] + let from_next_pc: NextPcToFetchInterface> = + m.input(NextPcToFetchInterface[config]); + #[hdl] + let l1_i_cache = instance(l1_i_cache(config)); + connect(l1_i_cache.cd, cd); + connect(memory_interface, l1_i_cache.memory_interface); + connect(l1_i_cache.from_next_pc, from_next_pc); +} diff --git a/crates/cpu/src/lib.rs b/crates/cpu/src/lib.rs index 7992ec5..62936de 100644 --- a/crates/cpu/src/lib.rs +++ b/crates/cpu/src/lib.rs @@ -2,6 +2,7 @@ // See Notices.txt for copyright information pub mod config; pub mod decoder; +pub mod fetch; pub mod instruction; pub mod next_pc; pub mod powerisa_instructions_xml; diff --git a/crates/cpu/src/next_pc.rs b/crates/cpu/src/next_pc.rs index db22f7e..379dbfd 100644 --- a/crates/cpu/src/next_pc.rs +++ b/crates/cpu/src/next_pc.rs @@ -2719,13 +2719,13 @@ impl SimValueDefault for BranchPredictionState { #[derive(Copy, Clone, Debug)] #[must_use] -enum ResetStatus { +pub(crate) enum ResetStatus { Done, Working, } impl ResetStatus { - fn and(self, other: Self) -> Self { + pub(crate) fn and(self, other: Self) -> Self { match (self, other) { (ResetStatus::Done, ResetStatus::Done) => ResetStatus::Done, (ResetStatus::Done | ResetStatus::Working, ResetStatus::Working) @@ -2734,7 +2734,7 @@ impl ResetStatus { } } -trait SimValueDefault: Type { +pub(crate) trait SimValueDefault: Type { fn sim_value_default(self) -> SimValue; } @@ -2828,7 +2828,7 @@ impl SimValueDefault for WipDecodedInsn { } } -trait ResetSteps: Type { +pub(crate) trait ResetSteps: Type { fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus; } diff --git a/crates/cpu/tests/expected/fetch.vcd b/crates/cpu/tests/expected/fetch.vcd new file mode 100644 index 0000000..e69de29 diff --git a/crates/cpu/tests/fetch.rs b/crates/cpu/tests/fetch.rs new file mode 100644 index 0000000..a4a2238 --- /dev/null +++ b/crates/cpu/tests/fetch.rs @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use cpu::{ + config::{CpuConfig, UnitConfig}, + fetch::{MemoryInterface, fetch}, + next_pc::NextPcToFetchInterface, + unit::UnitKind, + util::array_vec::ArrayVec, +}; +use fayalite::{ + prelude::*, + sim::vcd::VcdWriterDecls, + util::{DebugAsDisplay, RcWriter}, +}; +use std::{cell::Cell, collections::VecDeque, num::NonZeroUsize}; + +const MEMORY_QUEUE_SIZE: usize = 32; + +#[hdl] +struct MemoryQueueEntry { + addr: UInt<64>, + cycles_left: UInt<8>, +} + +impl MemoryQueueEntry { + #[hdl] + fn default_sim(self) -> SimValue { + #[hdl(sim)] + Self { + addr: 0u64, + cycles_left: 0u8, + } + } +} + +#[hdl_module(extern)] +fn mock_memory(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let memory_interface: MemoryInterface> = + m.input(MemoryInterface[config]); + #[hdl] + let queue_debug: ArrayVec> = m.output(); + m.register_clock_for_past(cd.clk); + m.extern_module_simulation_fn( + (cd, memory_interface, queue_debug), + |(cd, memory_interface, queue_debug), mut sim| async move { + // intentionally have a different sequence each time we're reset + let delay_sequence_index = Cell::new(0); + sim.resettable( + cd, + async |mut sim| { + sim.write(memory_interface.start.ready, false).await; + sim.write(memory_interface.finish.ready, false).await; + sim.write( + queue_debug, + queue_debug.ty().new_sim(MemoryQueueEntry.default_sim()), + ) + .await; + }, + |sim, ()| { + run_fn( + cd, + memory_interface, + queue_debug, + &delay_sequence_index, + sim, + ) + }, + ) + .await; + }, + ); + #[hdl] + async fn run_fn( + cd: Expr, + memory_interface: Expr>>, + queue_debug: Expr>>, + delay_sequence_index: &Cell, + mut sim: ExternModuleSimulationState, + ) { + let config = memory_interface.config.ty(); + let mut queue: VecDeque> = VecDeque::new(); + loop { + let mut sim_queue = queue_debug.ty().new_sim(MemoryQueueEntry.default_sim()); + for entry in &queue { + ArrayVec::try_push_sim(&mut sim_queue, entry) + .ok() + .expect("queue is known to be small enough"); + } + sim.write(queue_debug, sim_queue).await; + todo!(); + sim.wait_for_clock_edge(cd.clk).await; + println!( + "Dump mock memory queue: {:#?}", + Vec::from_iter( + queue + .iter() + .map(|v| { DebugAsDisplay(format!("addr={:#x}", v.addr.as_int())) }) + ) + ); + } + } +} + +#[hdl_module] +fn dut(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let from_next_pc: NextPcToFetchInterface> = + m.input(NextPcToFetchInterface[config]); + #[hdl] + let fetch = instance(fetch(config)); + #[hdl] + let fetch { + cd: fetch_cd, + memory_interface: fetch_memory_interface, + from_next_pc: fetch_from_next_pc, + } = fetch; + connect(fetch_cd, cd); + connect(fetch_from_next_pc, from_next_pc); + #[hdl] + let mock_memory = instance(mock_memory(config)); + #[hdl] + let mock_memory { + cd: mock_memory_cd, + memory_interface: mock_memory_interface, + queue_debug: _, + } = mock_memory; + connect(mock_memory_cd, cd); + connect(mock_memory_interface, fetch_memory_interface); +} + +#[hdl] +#[test] +fn test_fetch() { + let _n = SourceLocation::normalize_files_for_tests(); + let mut config = CpuConfig::new( + vec![ + UnitConfig::new(UnitKind::AluBranch), + UnitConfig::new(UnitKind::AluBranch), + ], + NonZeroUsize::new(20).unwrap(), + ); + config.fetch_width = NonZeroUsize::new(2).unwrap(); + config.log2_fetch_width_in_bytes = 4; + let m = dut(PhantomConst::new_sized(config)); + let mut sim = Simulation::new(m); + let writer = RcWriter::default(); + sim.add_trace_writer(VcdWriterDecls::new(writer.clone())); + struct DumpVcdOnDrop { + writer: Option, + } + impl Drop for DumpVcdOnDrop { + fn drop(&mut self) { + if let Some(mut writer) = self.writer.take() { + let vcd = String::from_utf8(writer.take()).unwrap(); + println!("####### VCD:\n{vcd}\n#######"); + } + } + } + let mut writer = DumpVcdOnDrop { + writer: Some(writer), + }; + sim.write_clock(sim.io().cd.clk, false); + sim.write_reset(sim.io().cd.rst, true); + for cycle in 0..2000 { + todo!("drive m.from_next_pc"); + sim.advance_time(SimDuration::from_nanos(500)); + println!("clock tick: {cycle}"); + sim.write_clock(sim.io().cd.clk, true); + sim.advance_time(SimDuration::from_nanos(500)); + sim.write_clock(sim.io().cd.clk, false); + sim.write_reset(sim.io().cd.rst, false); + } + // FIXME: vcd is just whatever fetch does now, which isn't known to be correct + let vcd = String::from_utf8(writer.writer.take().unwrap().take()).unwrap(); + println!("####### VCD:\n{vcd}\n#######"); + if vcd != include_str!("expected/fetch.vcd") { + panic!(); + } +}