WIP adding fetch::l1_i_cache

This commit is contained in:
Jacob Lifshay 2026-02-04 17:55:14 -08:00
parent c62d33048c
commit c6a77f3d9c
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
7 changed files with 46549 additions and 4 deletions

View file

@ -37,6 +37,8 @@ pub struct CpuConfig {
pub max_branches_per_fetch: NonZeroUsize,
pub max_fetches_in_flight: NonZeroUsize,
pub log2_fetch_width_in_bytes: u8,
pub log2_cache_line_size_in_bytes: u8,
pub log2_l1_i_cache_line_count: u8,
/// default value for [`UnitConfig::max_in_flight`]
pub default_unit_max_in_flight: NonZeroUsize,
pub rob_size: NonZeroUsize,
@ -63,6 +65,8 @@ impl CpuConfig {
v
};
pub const DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES: u8 = 3;
pub const DEFAULT_LOG2_CACHE_LINE_SIZE_IN_BYTES: u8 = 6;
pub const DEFAULT_LOG2_L1_I_CACHE_LINE_COUNT: u8 = 8;
pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(8) else {
unreachable!();
@ -77,6 +81,8 @@ impl CpuConfig {
max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH,
max_fetches_in_flight: Self::DEFAULT_MAX_FETCHES_IN_FLIGHT,
log2_fetch_width_in_bytes: Self::DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES,
log2_cache_line_size_in_bytes: Self::DEFAULT_LOG2_CACHE_LINE_SIZE_IN_BYTES,
log2_l1_i_cache_line_count: Self::DEFAULT_LOG2_L1_I_CACHE_LINE_COUNT,
default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT,
rob_size,
}
@ -141,6 +147,37 @@ impl CpuConfig {
.checked_shl(self.log2_fetch_width_in_bytes.into())
.expect("log2_fetch_width_in_bytes is too big")
}
pub fn cache_line_size_in_bytes(&self) -> usize {
1usize
.checked_shl(self.log2_cache_line_size_in_bytes.into())
.expect("log2_cache_line_size_in_bytes is too big")
}
pub fn log2_fetches_per_cache_line(&self) -> usize {
self.log2_cache_line_size_in_bytes
.checked_sub(self.log2_fetch_width_in_bytes)
.expect("cache line size in bytes must not be smaller than fetch width in bytes")
.into()
}
pub fn fetches_per_cache_line(&self) -> usize {
self.log2_fetches_per_cache_line()
.try_into()
.ok()
.and_then(|v| 1usize.checked_shl(v))
.expect("log2_fetches_per_cache_line is too big")
}
pub fn l1_i_cache_line_count(&self) -> usize {
1usize
.checked_shl(self.log2_l1_i_cache_line_count.into())
.expect("log2_l1_i_cache_line_count is too big")
}
pub fn log2_l1_i_cache_size_in_bytes(&self) -> usize {
self.log2_l1_i_cache_line_count as usize + self.log2_cache_line_size_in_bytes as usize
}
pub fn l1_i_cache_size_in_bytes(&self) -> usize {
1usize
.checked_shl(self.log2_l1_i_cache_size_in_bytes() as _)
.expect("L1 I-Cache is too big")
}
}
#[hdl(get(|c| c.fetch_width.get()))]
@ -161,6 +198,30 @@ pub type CpuConfigLog2FetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize
#[hdl(get(|c| c.fetch_width_in_bytes()))]
pub type CpuConfigFetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.log2_fetches_per_cache_line()))]
pub type CpuConfigLog2FetchesPerCacheLine<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.fetches_per_cache_line()))]
pub type CpuConfigFetchesPerCacheLine<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.log2_cache_line_size_in_bytes.into()))]
pub type CpuConfigLog2CacheLineSizeInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.cache_line_size_in_bytes()))]
pub type CpuConfigCacheLineSizeInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.log2_l1_i_cache_line_count.into()))]
pub type CpuConfigLog2L1ICacheLineCount<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.l1_i_cache_line_count()))]
pub type CpuConfigL1ICacheLineCount<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.log2_l1_i_cache_size_in_bytes()))]
pub type CpuConfigLog2L1ICacheSizeInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.l1_i_cache_size_in_bytes()))]
pub type CpuConfigL1ICacheSizeInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.rob_size.get()))]
pub type CpuConfigRobSize<C: PhantomConstGet<CpuConfig>> = DynSize;

670
crates/cpu/src/fetch.rs Normal file
View file

@ -0,0 +1,670 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::{
config::{
CpuConfig, CpuConfigCacheLineSizeInBytes, CpuConfigFetchWidthInBytes,
CpuConfigFetchesPerCacheLine, CpuConfigL1ICacheLineCount, CpuConfigLog2FetchWidthInBytes,
CpuConfigLog2FetchesPerCacheLine, CpuConfigLog2L1ICacheLineCount,
CpuConfigMaxFetchesInFlight, PhantomConstCpuConfig,
},
next_pc::{
FETCH_BLOCK_ID_WIDTH, NextPcToFetchInterface, NextPcToFetchInterfaceInner, ResetStatus,
ResetSteps, SimValueDefault,
},
util::array_vec::ArrayVec,
};
use fayalite::{
int::UIntInRangeType,
memory::{ReadWriteStruct, memory_addr_width, splat_mask},
prelude::*,
util::ready_valid::ReadyValid,
};
use std::{collections::VecDeque, ops::Range};
#[hdl]
pub enum MemoryOperationKind {
Read,
Write,
}
#[hdl(no_static)]
pub struct MemoryOperationStart<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig> {
pub kind: MemoryOperationKind,
pub addr: UInt<64>,
pub write_data: ArrayType<UInt<8>, CpuConfigFetchWidthInBytes<C>>,
pub config: C,
}
#[hdl]
pub enum MemoryOperationErrorKind {
Generic,
}
#[hdl]
pub enum MemoryOperationFinishKind {
Success(MemoryOperationKind),
Error(MemoryOperationErrorKind),
}
#[hdl(no_static)]
pub struct MemoryOperationFinish<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig> {
pub kind: MemoryOperationFinishKind,
pub read_data: ArrayType<UInt<8>, CpuConfigFetchWidthInBytes<C>>,
pub config: C,
}
#[hdl(no_static)]
pub struct MemoryInterface<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig> {
pub start: ReadyValid<MemoryOperationStart<C>>,
#[hdl(flip)]
pub finish: ReadyValid<MemoryOperationFinish<C>>,
pub config: C,
}
#[hdl(no_static)]
struct CacheLine<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig> {
data: ArrayType<UInt<8>, CpuConfigCacheLineSizeInBytes<C>>,
addr: HdlOption<UIntType<CacheLineTagAddrWidth<C>>>,
config: C,
}
impl<C: PhantomConstCpuConfig> SimValueDefault for CacheLine<C> {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
let Self { data, addr, config } = self;
#[hdl(sim)]
Self {
data: data.sim_value_default(),
addr: addr.sim_value_default(),
config,
}
}
}
#[hdl(get(|c| 64usize.saturating_sub(c.log2_l1_i_cache_size_in_bytes())))]
type CacheLineTagAddrWidth<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl]
enum CacheLookupState {
Empty,
NotStarted,
CanceledWhileReadingCache,
ReadingCache,
CacheMissCanceled,
CacheMiss,
CleaningUpErrorCanceled,
CleaningUpError,
ErrorCleanUpDone,
}
impl CacheLookupState {
#[hdl]
fn is_empty(this: &SimValue<Self>) -> bool {
#[hdl(sim)]
match this {
Self::Empty => true,
_ => false,
}
}
fn is_canceled(this: &SimValue<Self>) -> bool {
Self::try_to_canceled(this).is_none()
}
/// if `this` is canceled return `None`,
/// otherwise return `Some(canceled)` where `canceled` is the canceled equivalent
#[hdl]
fn try_to_canceled(this: &SimValue<Self>) -> Option<SimValue<Self>> {
#[hdl(sim)]
match this {
Self::Empty
| Self::CanceledWhileReadingCache
| Self::CacheMissCanceled
| Self::CleaningUpErrorCanceled => None,
Self::NotStarted => Some(
#[hdl(sim)]
CacheLookupState.Empty(),
),
Self::ReadingCache => Some(
#[hdl(sim)]
CacheLookupState.CanceledWhileReadingCache(),
),
Self::CacheMiss => Some(
#[hdl(sim)]
CacheLookupState.CacheMissCanceled(),
),
Self::CleaningUpError => Some(
#[hdl(sim)]
CacheLookupState.CleaningUpErrorCanceled(),
),
Self::ErrorCleanUpDone => Some(
#[hdl(sim)]
CacheLookupState.Empty(),
),
Self::Unknown => unreachable!(),
}
}
#[hdl]
fn to_canceled(this: &SimValue<Self>) -> SimValue<Self> {
Self::try_to_canceled(this).unwrap_or_else(|| this.clone())
}
}
#[hdl(no_static)]
struct CacheMissFillIndexes<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig> {
next_index: UIntInRangeType<ConstUsize<0>, CpuConfigFetchesPerCacheLine<C>>,
config: C,
}
#[hdl(no_static)]
struct FetchQueueEntry<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig> {
start_pc: UInt<64>,
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
state: CacheLookupState,
fill_indexes_to_start: HdlOption<CacheMissFillIndexes<C>>,
fill_indexes_to_finish: HdlOption<CacheMissFillIndexes<C>>,
config: C,
}
impl<C: PhantomConstCpuConfig> FetchQueueEntry<C> {
fn is_empty(this: &SimValue<Self>) -> bool {
CacheLookupState::is_empty(&this.state)
}
}
impl<C: PhantomConstCpuConfig> SimValueDefault for FetchQueueEntry<C> {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
let Self {
start_pc: _,
fetch_block_id,
state: _,
fill_indexes_to_start,
fill_indexes_to_finish,
config,
} = self;
#[hdl(sim)]
Self {
start_pc: 0u64,
fetch_block_id: fetch_block_id.zero(),
state: #[hdl(sim)]
CacheLookupState.Empty(),
fill_indexes_to_start: #[hdl(sim)]
fill_indexes_to_start.HdlNone(),
fill_indexes_to_finish: #[hdl(sim)]
fill_indexes_to_finish.HdlNone(),
config,
}
}
}
#[hdl(no_static)]
struct L1ICacheState<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig> {
queue: ArrayVec<FetchQueueEntry<C>, CpuConfigMaxFetchesInFlight<C>>,
config: C,
}
#[derive(Clone, Debug)]
struct L1ICacheStateSim<C: PhantomConstCpuConfig> {
queue: VecDeque<SimValue<FetchQueueEntry<C>>>,
config: C,
}
#[hdl(no_static)]
struct WriteBackStep<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig> {
cache_addr: UIntInRangeType<ConstUsize<0>, CpuConfigL1ICacheLineCount<C>>,
data: CacheLine<C>,
mask: AsMask<CacheLine<C>>,
}
#[hdl(no_static)]
struct SplitAddr<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig> {
byte_in_fetch_block: UIntType<CpuConfigLog2FetchWidthInBytes<C>>,
fetch_block_in_cache_line: UIntType<CpuConfigLog2FetchesPerCacheLine<C>>,
cache_line_index: UIntType<CpuConfigLog2L1ICacheLineCount<C>>,
tag: UIntType<CacheLineTagAddrWidth<C>>,
}
impl<C: PhantomConstCpuConfig> SplitAddr<C> {
fn byte_in_fetch_block_bit_range(self) -> Range<usize> {
0..self.byte_in_fetch_block.width()
}
fn fetch_block_in_cache_line_bit_range(self) -> Range<usize> {
let start = self.byte_in_fetch_block_bit_range().end;
start..(start + self.fetch_block_in_cache_line.width())
}
fn cache_line_index_bit_range(self) -> Range<usize> {
let start = self.fetch_block_in_cache_line_bit_range().end;
start..(start + self.cache_line_index.width())
}
fn tag_bit_range(self) -> Range<usize> {
self.cache_line_index_bit_range().end..64
}
#[hdl]
fn split_addr_sim(self, addr: impl ToSimValueWithType<UInt<64>>) -> SimValue<Self> {
let addr = addr.into_sim_value_with_type(UInt::<64>::new_static());
#[hdl(sim)]
Self {
byte_in_fetch_block: addr[self.byte_in_fetch_block_bit_range()],
fetch_block_in_cache_line: addr[self.fetch_block_in_cache_line_bit_range()],
cache_line_index: addr[self.cache_line_index_bit_range()],
tag: addr[self.tag_bit_range()],
}
}
#[hdl]
fn addr_sim(this: impl ToSimValue<Type = Self>) -> SimValue<UInt<64>> {
#[hdl(sim)]
let Self {
byte_in_fetch_block,
fetch_block_in_cache_line,
cache_line_index,
tag,
} = this;
todo!()
}
}
impl<C: PhantomConstCpuConfig> L1ICacheStateSim<C> {
#[hdl]
fn try_start_memory_operation(&mut self) -> Option<SimValue<MemoryOperationStart<C>>> {
for entry in &mut self.queue {
#[hdl(sim)]
let FetchQueueEntry::<_> {
start_pc,
fetch_block_id: _,
state,
fill_indexes_to_start,
fill_indexes_to_finish: _,
config: _,
} = entry;
#[hdl(sim)]
match state {
CacheLookupState::Empty
| CacheLookupState::NotStarted
| CacheLookupState::CanceledWhileReadingCache
| CacheLookupState::ReadingCache
| CacheLookupState::CleaningUpErrorCanceled
| CacheLookupState::CleaningUpError
| CacheLookupState::ErrorCleanUpDone => continue,
CacheLookupState::CacheMissCanceled => false,
CacheLookupState::CacheMiss => false,
CacheLookupState::Unknown => unreachable!(),
}
#[hdl(sim)]
if let HdlSome(indexes) = fill_indexes_to_start {
let end_index = indexes.next_index.ty().end();
let next_index = *indexes.next_index + 1;
if next_index < end_index {
*indexes.next_index = next_index;
} else {
let ty = fill_indexes_to_start.ty();
*fill_indexes_to_start = #[hdl(sim)]
ty.HdlNone();
}
let ty = MemoryOperationStart[self.config];
return Some(
#[hdl(sim)]
MemoryOperationStart::<_> {
kind: #[hdl(sim)]
MemoryOperationKind.Read(),
addr: todo!(),
write_data: repeat(ty.write_data.element().zero(), ty.write_data.len()),
config: self.config,
},
);
}
}
None
}
/// tries to do a cache fill, only calls the passed in functions when it's waiting on those
#[must_use]
#[hdl]
fn do_cache_fill<'a>(
&mut self,
try_finish_memory_operation: impl FnOnce() -> Result<&'a SimValue<MemoryOperationFinish<C>>, ()>,
) -> Option<SimValue<WriteBackStep<C>>> {
let mut try_finish_memory_operation = Some(try_finish_memory_operation);
for entry in &mut self.queue {
#[hdl(sim)]
let FetchQueueEntry::<_> {
start_pc,
fetch_block_id,
state,
fill_indexes_to_start,
fill_indexes_to_finish,
config,
} = entry;
let cleaning_up_error = #[hdl(sim)]
match state {
CacheLookupState::Empty
| CacheLookupState::NotStarted
| CacheLookupState::CanceledWhileReadingCache
| CacheLookupState::ReadingCache
| CacheLookupState::ErrorCleanUpDone => continue,
CacheLookupState::CacheMissCanceled => false,
CacheLookupState::CacheMiss => false,
CacheLookupState::CleaningUpErrorCanceled => true,
CacheLookupState::CleaningUpError => true,
CacheLookupState::Unknown => unreachable!(),
};
#[hdl(sim)]
if let HdlSome(fill_indexes_to_finish) = fill_indexes_to_finish {
match try_finish_memory_operation() {
Ok(_) => todo!(),
Err(()) => {}
}
}
}
None
}
}
impl<C: PhantomConstCpuConfig> SimValueDefault for L1ICacheState<C> {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
let Self { queue, config } = self;
#[hdl(sim)]
Self {
queue: queue.sim_value_default(),
config,
}
}
}
impl<C: PhantomConstCpuConfig> ResetSteps for L1ICacheState<C> {
#[hdl]
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
#[hdl(sim)]
let Self {
// overwritten every cycle, no reset needed
queue: _,
config: _,
} = this;
let _ = step;
ResetStatus::Done
}
}
#[hdl_module(extern)]
fn l1_i_cache_impl(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let memory_interface: MemoryInterface<PhantomConst<CpuConfig>> =
m.output(MemoryInterface[config]);
#[hdl]
let from_next_pc: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
m.input(NextPcToFetchInterface[config]);
// i_cache_port.clk is externally overridden with cd.clk
#[hdl]
let i_cache_port: ReadWriteStruct<CacheLine<PhantomConst<CpuConfig>>, DynSize> = m.output(
ReadWriteStruct[CacheLine[config]][memory_addr_width(CpuConfigL1ICacheLineCount[config])],
);
#[hdl]
let state_for_debug: L1ICacheState<PhantomConst<CpuConfig>> = m.output(L1ICacheState[config]);
m.register_clock_for_past(cd.clk);
#[hdl]
async fn run(
mut sim: ExternModuleSimulationState,
cd: Expr<ClockDomain>,
memory_interface: Expr<MemoryInterface<PhantomConst<CpuConfig>>>,
from_next_pc: Expr<NextPcToFetchInterface<PhantomConst<CpuConfig>>>,
i_cache_port: Expr<ReadWriteStruct<CacheLine<PhantomConst<CpuConfig>>, DynSize>>,
state_expr: Expr<L1ICacheState<PhantomConst<CpuConfig>>>,
) {
let mut state = sim.read(state_expr).await;
let config = state.config.ty();
let l1_i_cache_line_count = CpuConfigL1ICacheLineCount[config];
let cache_line_ty = CacheLine[config];
let entry_ty = FetchQueueEntry[config];
let max_fetches_in_flight = CpuConfigMaxFetchesInFlight[config];
for step in 0usize.. {
sim.write(state_expr, state).await;
sim.wait_for_clock_edge(cd.clk).await;
state = sim.read_past(state_expr, cd.clk).await;
sim.write(i_cache_port.en, false).await;
let mut reset_status = ResetSteps::reset_step(&mut state, step);
if step < l1_i_cache_line_count {
reset_status = ResetStatus::Working;
#[hdl]
let ReadWriteStruct::<_, _> {
addr,
en,
clk: _, // externally overridden with cd.clk
rdata: _,
wmode,
wdata,
wmask,
} = i_cache_port;
sim.write(addr, step.cast_to(addr.ty())).await;
sim.write(en, true).await;
sim.write(wmode, true).await;
sim.write(wdata, CacheLine::sim_value_default(wdata.ty()))
.await;
sim.write(wmask, splat_mask(cache_line_ty, true.to_expr()))
.await;
}
match reset_status {
ResetStatus::Done => break,
ResetStatus::Working => {}
}
}
sim.write(from_next_pc.cancel.ready, true).await;
let mut queue = VecDeque::<SimValue<_>>::with_capacity(max_fetches_in_flight);
loop {
while queue
.pop_back_if(|entry| FetchQueueEntry::is_empty(entry))
.is_some()
{}
while queue
.pop_front_if(|entry| FetchQueueEntry::is_empty(entry))
.is_some()
{}
sim.write(
from_next_pc.fetch.ready,
queue.len() < max_fetches_in_flight,
)
.await;
// TODO
state.queue = state
.queue
.ty()
.from_iter_sim(state.queue.ty().element().sim_value_default(), &queue)
.expect("known to fit");
sim.write(state_expr, state).await;
sim.wait_for_clock_edge(cd.clk).await;
state = sim.read_past(state_expr, cd.clk).await;
// handle cancels before pushing new fetch op
if sim.read_past_bool(from_next_pc.cancel.ready, cd.clk).await {
#[hdl(sim)]
if let HdlSome(in_progress_fetches_to_cancel) =
sim.read_past(from_next_pc.cancel.data, cd.clk).await
{
let mut in_progress_fetches_to_cancel = *in_progress_fetches_to_cancel;
// cancel in-progress fetches from newest to oldest
for entry in queue.iter_mut().rev() {
if in_progress_fetches_to_cancel == 0 {
break;
}
if let Some(canceled) = CacheLookupState::try_to_canceled(&entry.state) {
entry.state = canceled;
in_progress_fetches_to_cancel -= 1;
}
}
}
}
while queue
.pop_back_if(|entry| FetchQueueEntry::is_empty(entry))
.is_some()
{}
if !sim.read_past_bool(from_next_pc.fetch.ready, cd.clk).await {
continue;
}
// handle pushing new fetch op after handling cancels
#[hdl(sim)]
if let HdlSome(inner) = sim.read_past(from_next_pc.fetch.data, cd.clk).await {
#[hdl(sim)]
let NextPcToFetchInterfaceInner {
start_pc,
fetch_block_id,
} = &inner;
queue.push_back(
#[hdl(sim)]
FetchQueueEntry::<_> {
start_pc,
fetch_block_id,
state: CacheLookupState.NotStarted(),
fill_indexes_to_start: entry_ty.fill_indexes_to_start.HdlNone(),
fill_indexes_to_finish: entry_ty.fill_indexes_to_finish.HdlNone(),
config,
},
);
}
}
}
m.extern_module_simulation_fn(
(
cd,
memory_interface,
from_next_pc,
i_cache_port,
state_for_debug,
),
|(cd, memory_interface, from_next_pc, i_cache_port, state_for_debug), mut sim| async move {
let config = memory_interface.ty().config;
let cache_line_ty = CacheLine[config];
sim.write(i_cache_port.clk, false).await; // externally overridden with cd.clk, so just write a constant here
sim.resettable(
cd,
|mut sim: ExternModuleSimulationState| async move {
sim.write(
memory_interface.start.data,
memory_interface.ty().start.data.HdlNone(),
)
.await;
sim.write(memory_interface.finish.ready, false).await;
sim.write(
from_next_pc.next_fetch_block_ids,
from_next_pc.ty().next_fetch_block_ids.HdlNone(),
)
.await;
sim.write(from_next_pc.fetch.ready, false).await;
sim.write(from_next_pc.cancel.ready, false).await;
sim.write(i_cache_port.addr, 0u8.cast_to(i_cache_port.addr.ty()))
.await;
sim.write(i_cache_port.en, false).await;
sim.write(i_cache_port.wmode, false).await;
sim.write(
i_cache_port.wdata,
CacheLine::sim_value_default(cache_line_ty),
)
.await;
sim.write(
i_cache_port.wmask,
splat_mask(cache_line_ty, false.to_expr()),
)
.await;
sim.write(state_for_debug, state_for_debug.ty().sim_value_default())
.await;
},
|sim, ()| {
run(
sim,
cd,
memory_interface,
from_next_pc,
i_cache_port,
state_for_debug,
)
},
)
.await;
},
);
}
/// implements a direct-mapped L1 I-Cache
#[hdl_module]
pub fn l1_i_cache(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let memory_interface: MemoryInterface<PhantomConst<CpuConfig>> =
m.output(MemoryInterface[config]);
#[hdl]
let from_next_pc: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
m.input(NextPcToFetchInterface[config]);
let cache_line_ty = CacheLine[config];
let cache_line_count = CpuConfigL1ICacheLineCount[config];
// TODO: convert to memory with single read/write port once semantics
// for read/write latencies are properly implemented in the simulator:
// https://git.libre-chip.org/libre-chip/fayalite/src/commit/c632e5d570d4763e8e18d764e95b7a9e515ebf99/crates/fayalite/src/sim/compiler.rs#L4774
// which depends on:
// https://github.com/chipsalliance/firrtl-spec/issues/263
#[hdl]
let i_cache = reg_builder()
.clock_domain(cd)
.no_reset(ArrayType[cache_line_ty][cache_line_count]);
#[hdl]
let l1_i_cache_impl = instance(l1_i_cache_impl(config));
connect(l1_i_cache_impl.cd, cd);
connect(memory_interface, l1_i_cache_impl.memory_interface);
connect(l1_i_cache_impl.from_next_pc, from_next_pc);
#[hdl]
let ReadWriteStruct::<_, _> {
addr,
en,
clk: _,
rdata,
wmode,
wdata,
wmask,
} = l1_i_cache_impl.i_cache_port;
connect(rdata, rdata.ty().uninit());
#[hdl]
if en {
let i_cache_line = i_cache[addr];
#[hdl]
if wmode {
#[hdl]
let CacheLine::<_> {
data: wdata_data,
addr: wdata_addr,
config: _,
} = wdata;
for ((dest, src), mask) in i_cache_line
.data
.into_iter()
.zip(wdata_data)
.zip(wmask.data)
{
#[hdl]
if mask {
connect(dest, src);
}
}
#[hdl]
if wmask.addr {
connect(i_cache_line.addr, wdata_addr);
}
} else {
connect(rdata, i_cache_line);
}
}
}
#[hdl_module]
pub fn fetch(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let memory_interface: MemoryInterface<PhantomConst<CpuConfig>> =
m.output(MemoryInterface[config]);
#[hdl]
let from_next_pc: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
m.input(NextPcToFetchInterface[config]);
#[hdl]
let l1_i_cache = instance(l1_i_cache(config));
connect(l1_i_cache.cd, cd);
connect(memory_interface, l1_i_cache.memory_interface);
connect(l1_i_cache.from_next_pc, from_next_pc);
}

View file

@ -2,6 +2,7 @@
// See Notices.txt for copyright information
pub mod config;
pub mod decoder;
pub mod fetch;
pub mod instruction;
pub mod next_pc;
pub mod powerisa_instructions_xml;

View file

@ -2719,13 +2719,13 @@ impl SimValueDefault for BranchPredictionState {
#[derive(Copy, Clone, Debug)]
#[must_use]
enum ResetStatus {
pub(crate) enum ResetStatus {
Done,
Working,
}
impl ResetStatus {
fn and(self, other: Self) -> Self {
pub(crate) fn and(self, other: Self) -> Self {
match (self, other) {
(ResetStatus::Done, ResetStatus::Done) => ResetStatus::Done,
(ResetStatus::Done | ResetStatus::Working, ResetStatus::Working)
@ -2734,7 +2734,7 @@ impl ResetStatus {
}
}
trait SimValueDefault: Type {
pub(crate) trait SimValueDefault: Type {
fn sim_value_default(self) -> SimValue<Self>;
}
@ -2828,7 +2828,7 @@ impl SimValueDefault for WipDecodedInsn {
}
}
trait ResetSteps: Type {
pub(crate) trait ResetSteps: Type {
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus;
}

View file

@ -2,6 +2,24 @@
// See Notices.txt for copyright information
use fayalite::{expr::ops::ExprIndex, int::UIntInRangeInclusiveType, prelude::*};
use std::fmt;
#[derive(Clone, Debug)]
pub struct ArrayVecFullError<V, I: Iterator> {
pub value: V,
pub rest: std::iter::Chain<std::iter::Once<I::Item>, I>,
}
impl<V, I: Iterator> fmt::Display for ArrayVecFullError<V, I> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ArrayVec is full")
}
}
impl<V: fmt::Debug, I: Iterator<Item: fmt::Debug> + fmt::Debug> std::error::Error
for ArrayVecFullError<V, I>
{
}
#[hdl]
pub type Length<Max: Size> = UIntInRangeInclusiveType<ConstUsize<0>, Max>;
@ -46,6 +64,29 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
len: self.elements.len().to_sim_value_with_type(self.len),
}
}
pub fn from_iter_sim<I: IntoIterator<Item: ToSimValueWithType<T>>>(
self,
uninit_element: impl ToSimValueWithType<T>,
iter: I,
) -> Result<SimValue<Self>, ArrayVecFullError<SimValue<Self>, I::IntoIter>> {
let mut value = Self::new_sim(self, uninit_element);
let element = self.element();
let mut iter = iter.into_iter();
for i in 0..self.capacity() {
let Some(v) = iter.next() else {
break;
};
value.elements[i] = v.into_sim_value_with_type(element);
}
if let Some(extra) = iter.next() {
Err(ArrayVecFullError {
value,
rest: std::iter::once(extra).chain(iter),
})
} else {
Ok(value)
}
}
pub fn element(self) -> T {
self.elements.element()
}

45574
crates/cpu/tests/expected/fetch.vcd generated Normal file

File diff suppressed because it is too large Load diff

198
crates/cpu/tests/fetch.rs Normal file
View file

@ -0,0 +1,198 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use cpu::{
config::{CpuConfig, UnitConfig},
fetch::{MemoryInterface, fetch},
next_pc::NextPcToFetchInterface,
unit::UnitKind,
util::array_vec::ArrayVec,
};
use fayalite::{
prelude::*,
sim::vcd::VcdWriterDecls,
util::{DebugAsDisplay, RcWriter},
};
use std::{cell::Cell, collections::VecDeque, num::NonZeroUsize};
const MEMORY_QUEUE_SIZE: usize = 32;
#[hdl]
struct MemoryQueueEntry {
addr: UInt<64>,
cycles_left: UInt<8>,
}
impl MemoryQueueEntry {
#[hdl]
fn default_sim(self) -> SimValue<Self> {
#[hdl(sim)]
Self {
addr: 0u64,
cycles_left: 0u8,
}
}
}
#[hdl_module(extern)]
fn mock_memory(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let memory_interface: MemoryInterface<PhantomConst<CpuConfig>> =
m.input(MemoryInterface[config]);
#[hdl]
let queue_debug: ArrayVec<MemoryQueueEntry, ConstUsize<{ MEMORY_QUEUE_SIZE }>> = m.output();
m.register_clock_for_past(cd.clk);
m.extern_module_simulation_fn(
(cd, memory_interface, queue_debug),
|(cd, memory_interface, queue_debug), mut sim| async move {
// intentionally have a different sequence each time we're reset
let delay_sequence_index = Cell::new(0);
sim.resettable(
cd,
async |mut sim| {
sim.write(memory_interface.start.ready, false).await;
sim.write(
memory_interface.finish.data,
memory_interface.ty().finish.data.HdlNone(),
)
.await;
sim.write(
queue_debug,
queue_debug.ty().new_sim(MemoryQueueEntry.default_sim()),
)
.await;
},
|sim, ()| {
run_fn(
cd,
memory_interface,
queue_debug,
&delay_sequence_index,
sim,
)
},
)
.await;
},
);
#[hdl]
async fn run_fn(
cd: Expr<ClockDomain>,
memory_interface: Expr<MemoryInterface<PhantomConst<CpuConfig>>>,
queue_debug: Expr<ArrayVec<MemoryQueueEntry, ConstUsize<{ MEMORY_QUEUE_SIZE }>>>,
delay_sequence_index: &Cell<u64>,
mut sim: ExternModuleSimulationState,
) {
let config = memory_interface.config.ty();
let mut queue: VecDeque<SimValue<MemoryQueueEntry>> = VecDeque::new();
loop {
let mut sim_queue = queue_debug.ty().new_sim(MemoryQueueEntry.default_sim());
for entry in &queue {
ArrayVec::try_push_sim(&mut sim_queue, entry)
.ok()
.expect("queue is known to be small enough");
}
sim.write(queue_debug, sim_queue).await;
// TODO:
sim.wait_for_clock_edge(cd.clk).await;
println!(
"Dump mock memory queue: {:#?}",
Vec::from_iter(
queue
.iter()
.map(|v| { DebugAsDisplay(format!("addr={:#x}", v.addr.as_int())) })
)
);
}
}
}
#[hdl_module]
fn dut(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let from_next_pc: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
m.input(NextPcToFetchInterface[config]);
#[hdl]
let fetch = instance(fetch(config));
#[hdl]
let fetch {
cd: fetch_cd,
memory_interface: fetch_memory_interface,
from_next_pc: fetch_from_next_pc,
} = fetch;
connect(fetch_cd, cd);
connect(fetch_from_next_pc, from_next_pc);
#[hdl]
let mock_memory = instance(mock_memory(config));
#[hdl]
let mock_memory {
cd: mock_memory_cd,
memory_interface: mock_memory_interface,
queue_debug: _,
} = mock_memory;
connect(mock_memory_cd, cd);
connect(mock_memory_interface, fetch_memory_interface);
}
#[test]
fn test_fetch() {
let _n = SourceLocation::normalize_files_for_tests();
let mut config = CpuConfig::new(
vec![
UnitConfig::new(UnitKind::AluBranch),
UnitConfig::new(UnitKind::AluBranch),
],
NonZeroUsize::new(20).unwrap(),
);
config.fetch_width = NonZeroUsize::new(2).unwrap();
config.log2_fetch_width_in_bytes = 4;
config.l1_i_cache_line_count = NonZeroUsize::new(16).unwrap();
let m = dut(PhantomConst::new_sized(config));
let mut sim = Simulation::new(m);
let writer = RcWriter::default();
sim.add_trace_writer(VcdWriterDecls::new(writer.clone()));
struct DumpVcdOnDrop {
writer: Option<RcWriter>,
}
impl Drop for DumpVcdOnDrop {
fn drop(&mut self) {
if let Some(mut writer) = self.writer.take() {
let vcd = String::from_utf8(writer.take()).unwrap();
println!("####### VCD:\n{vcd}\n#######");
}
}
}
let mut writer = DumpVcdOnDrop {
writer: Some(writer),
};
let from_next_pc_ty = sim.io().from_next_pc.ty();
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, true);
sim.write(
sim.io().from_next_pc.cancel.data,
from_next_pc_ty.cancel.data.HdlNone(),
);
sim.write(
sim.io().from_next_pc.fetch.data,
from_next_pc_ty.fetch.data.HdlNone(),
);
for cycle in 0..2000 {
// TODO: drive from_next_pc
sim.advance_time(SimDuration::from_nanos(500));
println!("clock tick: {cycle}");
sim.write_clock(sim.io().cd.clk, true);
sim.advance_time(SimDuration::from_nanos(500));
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, false);
}
// FIXME: vcd is just whatever fetch does now, which isn't known to be correct
let vcd = String::from_utf8(writer.writer.take().unwrap().take()).unwrap();
println!("####### VCD:\n{vcd}\n#######");
if vcd != include_str!("expected/fetch.vcd") {
panic!();
}
}