WIP adding next_pc: added mock_fetch_decode_pipe

This commit is contained in:
Jacob Lifshay 2025-10-27 22:41:33 -07:00
parent 61d52bd028
commit 7a77c02cda
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
6 changed files with 14654 additions and 4 deletions

View file

@ -34,6 +34,8 @@ pub struct CpuConfig {
pub units: Vec<UnitConfig>,
pub out_reg_num_width: usize,
pub fetch_width: NonZeroUsize,
pub max_branches_per_fetch: NonZeroUsize,
pub log2_fetch_width_in_bytes: u8,
/// default value for [`UnitConfig::max_in_flight`]
pub default_unit_max_in_flight: NonZeroUsize,
pub rob_size: NonZeroUsize,
@ -47,6 +49,13 @@ impl CpuConfig {
};
v
};
pub const DEFAULT_MAX_BRANCHES_PER_FETCH: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(1) else {
unreachable!();
};
v
};
pub const DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES: u8 = 3;
pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(8) else {
unreachable!();
@ -58,6 +67,8 @@ impl CpuConfig {
units,
out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH,
fetch_width: Self::DEFAULT_FETCH_WIDTH,
max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH,
log2_fetch_width_in_bytes: Self::DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES,
default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT,
rob_size,
}
@ -117,4 +128,21 @@ impl CpuConfig {
UnitToRegAlloc[mop_ty][extra_out_ty][self.unit_num_width()][self.out_reg_num_width]
[self.non_const_unit_nums().len()]
}
pub fn fetch_width_in_bytes(&self) -> usize {
1usize
.checked_shl(self.log2_fetch_width_in_bytes.into())
.expect("log2_fetch_width_in_bytes is too big")
}
}
#[hdl(get(|c| c.fetch_width.get()))]
pub type CpuConfigFetchWidth<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.max_branches_per_fetch.get()))]
pub type CpuConfigMaxBranchesPerFetch<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.log2_fetch_width_in_bytes.into()))]
pub type CpuConfigLog2FetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.fetch_width_in_bytes()))]
pub type CpuConfigFetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;

View file

@ -2,6 +2,7 @@
// See Notices.txt for copyright information
pub mod config;
pub mod instruction;
pub mod next_pc;
pub mod reg_alloc;
pub mod register;
pub mod unit;

1159
crates/cpu/src/next_pc.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -22,6 +22,18 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
len: 0u8.cast_to(self.len),
}
}
#[hdl]
pub fn new_sim(self, uninit_element: impl ToSimValueWithType<T>) -> SimValue<Self> {
let uninit_element = uninit_element.into_sim_value_with_type(self.element());
#[hdl(sim)]
ArrayVec::<_, _> {
elements: SimValue::from_array_elements(
self.elements,
(0..self.elements.len()).map(|_| uninit_element.clone()),
),
len: 0u8.cast_to(self.len),
}
}
pub fn element(self) -> T {
self.elements.element()
}
@ -52,6 +64,9 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
pub fn len(this: impl ToExpr<Type = Self>) -> Expr<Length<N>> {
this.to_expr().len
}
pub fn len_sim(this: &SimValue<Self>) -> &SimValue<Length<N>> {
&this.len
}
pub fn is_empty(this: impl ToExpr<Type = Self>) -> Expr<Bool> {
let len = Self::len(this);
len.cmp_eq(0u8)
@ -75,6 +90,69 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
}
}
}
pub fn elements_sim_ref(this: &SimValue<Self>) -> &[SimValue<T>] {
&this.elements[..*this.len]
}
pub fn elements_sim_mut(this: &mut SimValue<Self>) -> &mut [SimValue<T>] {
let len = *this.len;
&mut this.elements[..len]
}
#[hdl]
pub async fn async_for_each_sim(
this: impl ToSimValue<Type = Self>,
mut f: impl AsyncFnMut(usize, SimValue<T>),
) {
#[hdl(sim)]
let ArrayVec::<_, _> { elements, len } = this.into_sim_value();
for (index, element) in elements.into_iter().enumerate() {
if index.cmp_lt(*len) {
f(index, element).await;
}
}
}
#[hdl]
pub async fn async_for_each_sim_ref<'a>(
this: &'a SimValue<Self>,
mut f: impl AsyncFnMut(usize, &'a SimValue<T>),
) {
#[hdl(sim)]
let ArrayVec::<_, _> { elements, len } = this;
for (index, element) in elements.iter().enumerate() {
if index.cmp_lt(**len) {
f(index, element).await;
}
}
}
#[hdl]
pub async fn async_for_each_sim_mut<'a>(
this: &'a mut SimValue<Self>,
mut f: impl AsyncFnMut(usize, &'a mut SimValue<T>),
) {
#[hdl(sim)]
let ArrayVec::<_, _> { elements, len } = this;
for (index, element) in elements.iter_mut().enumerate() {
if index.cmp_lt(**len) {
f(index, element).await;
}
}
}
#[hdl]
pub fn try_push_sim(
this: &mut SimValue<Self>,
value: impl ToSimValueWithType<T>,
) -> Result<(), SimValue<T>> {
let value = value.into_sim_value_with_type(this.ty().element());
let capacity = this.ty().capacity();
#[hdl(sim)]
let ArrayVec::<_, _> { elements, len } = this;
if **len < capacity {
elements[**len] = value;
**len += 1;
Ok(())
} else {
Err(value)
}
}
pub fn mapped_ty<U: Type>(self, new_element_ty: U) -> ArrayVec<U, N> {
ArrayVec {
elements: ArrayType[new_element_ty][N::from_usize(self.elements.len())],
@ -100,10 +178,8 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
pub fn as_array_of_options(this: impl ToExpr<Type = Self>) -> Expr<ArrayType<HdlOption<T>, N>> {
let this = this.to_expr();
#[hdl]
let array_vec_as_array_of_options = wire(
ArrayType[HdlOption[this.ty().element()]]
[N::from_usize(this.ty().capacity())],
);
let array_vec_as_array_of_options =
wire(ArrayType[HdlOption[this.ty().element()]][N::from_usize(this.ty().capacity())]);
for element in array_vec_as_array_of_options {
connect(element, element.ty().HdlNone());
}

File diff suppressed because it is too large Load diff

340
crates/cpu/tests/next_pc.rs Normal file
View file

@ -0,0 +1,340 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use cpu::{
config::{CpuConfig, UnitConfig},
next_pc::{
DecodeToPostDecodeInterface, DecodeToPostDecodeInterfaceInner, FETCH_BLOCK_ID_WIDTH,
NextPcToFetchInterface, NextPcToFetchInterfaceInner, WipDecodedInsn, WipDecodedInsnKind,
next_pc,
},
unit::UnitKind,
util::array_vec::ArrayVec,
};
use fayalite::{prelude::*, sim::vcd::VcdWriterDecls, util::RcWriter};
use std::{
cell::Cell,
collections::{BTreeMap, VecDeque},
num::NonZeroUsize,
};
#[derive(Copy, Clone, Debug)]
enum MockInsn {
Nop4,
Jump { target: u64 },
CondBranch { target: u64 },
Call { target: u64 },
Ret,
}
impl MockInsn {
fn byte_len(self) -> u64 {
match self {
MockInsn::Nop4 => 4,
MockInsn::Jump { .. } => 4,
MockInsn::CondBranch { .. } => 4,
MockInsn::Call { .. } => 4,
MockInsn::Ret => 4,
}
}
}
#[derive(Debug)]
struct MockInsns {
insns: BTreeMap<u64, MockInsn>,
}
impl MockInsns {
fn new() -> Self {
Self {
insns: BTreeMap::from_iter([
(0x0, MockInsn::Nop4),
(0x4, MockInsn::Nop4),
(0x8, MockInsn::CondBranch { target: 0x4 }),
(0xC, MockInsn::Call { target: 0x18 }),
(0x10, MockInsn::Jump { target: 0x10 }),
(0x14, MockInsn::Jump { target: 0x10 }),
(0x18, MockInsn::Jump { target: 0x1C }),
(0x1C, MockInsn::Ret),
]),
}
}
fn fetch_block(&self, pc_range: std::ops::Range<u64>) -> impl Iterator<Item = (u64, MockInsn)> {
self.insns
.range(pc_range.clone())
.filter_map(move |(&pc, &insn)| {
if pc_range.end >= pc + insn.byte_len() {
Some((pc, insn))
} else {
None
}
})
}
}
const FETCH_PIPE_QUEUE_SIZE: usize = 5;
const DEMO_ILLEGAL_INSN_TRAP: u64 = 0xFF000000u64;
#[hdl]
struct FetchPipeQueueEntry {
fetch_pc: UInt<64>,
cycles_left: UInt<8>,
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
}
impl FetchPipeQueueEntry {
#[hdl]
fn default_sim(self) -> SimValue<Self> {
#[hdl(sim)]
FetchPipeQueueEntry {
fetch_pc: 0u64,
cycles_left: 0u8,
fetch_block_id: 0u8,
}
}
fn get_next_delay(delay_sequence_index: &Cell<u64>) -> u8 {
let index = delay_sequence_index.get();
delay_sequence_index.set(delay_sequence_index.get().wrapping_add(1));
// make a pseudo-random number deterministically based on index
let random = index
.wrapping_add(1)
.wrapping_mul(0x18C49126EABE7A0D) // random prime
.rotate_left(32)
.wrapping_mul(0x92B38C197608A6B) // random prime
.rotate_right(60);
(random % 8) as u8
}
}
#[hdl_module(extern)]
fn mock_fetch_pipe(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let from_fetch: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
m.input(NextPcToFetchInterface[config]);
#[hdl]
let to_post_decode: DecodeToPostDecodeInterface<PhantomConst<CpuConfig>> =
m.output(DecodeToPostDecodeInterface[config]);
#[hdl]
let queue_debug: ArrayVec<FetchPipeQueueEntry, ConstUsize<{ FETCH_PIPE_QUEUE_SIZE }>> =
m.output();
m.register_clock_for_past(cd.clk);
m.extern_module_simulation_fn(
(cd, from_fetch, to_post_decode, queue_debug),
|(cd, from_fetch, to_post_decode, queue_debug), mut sim| async move {
// intentionally have a different sequence each time we're reset
let delay_sequence_index = Cell::new(0);
sim.resettable(
cd,
async |mut sim| {
sim.write(from_fetch.inner.ready, false).await;
sim.write(
to_post_decode.inner.data,
to_post_decode.ty().inner.data.HdlNone(),
)
.await;
sim.write(
queue_debug,
queue_debug.ty().new_sim(FetchPipeQueueEntry.default_sim()),
)
.await;
},
|sim, ()| {
run_fn(
cd,
from_fetch,
to_post_decode,
queue_debug,
&delay_sequence_index,
sim,
)
},
)
.await;
},
);
#[hdl]
async fn run_fn(
cd: Expr<ClockDomain>,
from_fetch: Expr<NextPcToFetchInterface<PhantomConst<CpuConfig>>>,
to_post_decode: Expr<DecodeToPostDecodeInterface<PhantomConst<CpuConfig>>>,
queue_debug: Expr<ArrayVec<FetchPipeQueueEntry, ConstUsize<{ FETCH_PIPE_QUEUE_SIZE }>>>,
delay_sequence_index: &Cell<u64>,
mut sim: ExternModuleSimulationState,
) {
let config = from_fetch.config.ty();
let mock_insns = MockInsns::new();
let mut queue: VecDeque<SimValue<FetchPipeQueueEntry>> = VecDeque::new();
let mut next_id = 0u32;
loop {
let mut sim_queue = queue_debug.ty().new_sim(FetchPipeQueueEntry.default_sim());
for entry in &queue {
ArrayVec::try_push_sim(&mut sim_queue, entry)
.ok()
.expect("queue is known to be small enough");
}
sim.write(queue_debug, sim_queue).await;
if let Some(front) = queue.front().filter(|v| v.cycles_left.as_int() == 0) {
#[hdl(sim)]
let FetchPipeQueueEntry {
fetch_pc,
cycles_left: _,
fetch_block_id,
} = front;
let fetch_pc = fetch_pc.as_int();
let fetch_end =
(fetch_pc + 1).next_multiple_of(config.get().fetch_width_in_bytes() as u64);
let insns = to_post_decode.ty().inner.data.HdlSome.insns;
let zeroed_insn = UInt[insns.element().canonical().bit_width()]
.zero()
.cast_bits_to(insns.element());
let mut insns = insns.new_sim(zeroed_insn);
let mut expected_pc = fetch_pc;
// TODO: handle instructions that go past the end of a fetch block
for (pc, insn) in mock_insns.fetch_block(fetch_pc..fetch_end) {
let next_pc = pc + insn.byte_len();
if pc != expected_pc {
break;
}
expected_pc = next_pc;
let kind = match insn {
MockInsn::Nop4 => WipDecodedInsnKind.NonBranch(),
MockInsn::Jump { target } => WipDecodedInsnKind.Branch(target),
MockInsn::CondBranch { target } => WipDecodedInsnKind.BranchCond(target),
MockInsn::Call { target } => WipDecodedInsnKind.Call(target),
MockInsn::Ret => WipDecodedInsnKind.Ret(),
};
let insn = #[hdl(sim)]
WipDecodedInsn {
fetch_block_id,
id: next_id.cast_to_static::<UInt<_>>(),
pc,
size_in_bytes: insn.byte_len().cast_to_static::<UInt<_>>(),
kind,
};
match ArrayVec::try_push_sim(&mut insns, insn) {
Ok(()) => next_id = next_id.wrapping_add(1),
Err(_) => break,
}
}
if **ArrayVec::len_sim(&insns) == 0 {
let Ok(()) = ArrayVec::try_push_sim(
&mut insns,
#[hdl(sim)]
WipDecodedInsn {
fetch_block_id,
id: next_id.cast_to_static::<UInt<_>>(),
pc: fetch_pc,
size_in_bytes: 0u8.cast_to_static::<UInt<_>>(),
kind: WipDecodedInsnKind.Interrupt(DEMO_ILLEGAL_INSN_TRAP),
},
) else {
unreachable!();
};
next_id = next_id.wrapping_add(1);
}
sim.write(
to_post_decode.inner.data,
HdlSome(
#[hdl(sim)]
DecodeToPostDecodeInterfaceInner::<_> { insns, config },
),
)
.await;
} else {
sim.write(
to_post_decode.inner.data,
to_post_decode.ty().inner.data.HdlNone(),
)
.await;
}
sim.write(from_fetch.inner.ready, queue.len() < FETCH_PIPE_QUEUE_SIZE)
.await;
sim.wait_for_clock_edge(cd.clk).await;
if sim.read_past_bool(to_post_decode.inner.ready, cd.clk).await {
#[hdl(sim)]
if let HdlSome(_) = sim.read_past(to_post_decode.inner.data, cd.clk).await {
queue.pop_front();
}
}
for entry in &mut queue {
if entry.cycles_left.as_int() > 0 {
entry.cycles_left = (entry.cycles_left.as_int() - 1u8).to_sim_value();
}
}
if !sim.read_past_bool(from_fetch.inner.ready, cd.clk).await {
continue;
}
#[hdl(sim)]
if let HdlSome(inner) = sim.read_past(from_fetch.inner.data, cd.clk).await {
#[hdl(sim)]
let NextPcToFetchInterfaceInner {
next_fetch_pc,
fetch_block_id,
in_progress_fetches_to_cancel,
} = &inner;
// cancel in-progress fetches from newest to oldest
for _ in 0..in_progress_fetches_to_cancel.as_int() {
let _ = queue.pop_back();
}
queue.push_back(
#[hdl(sim)]
FetchPipeQueueEntry {
fetch_pc: next_fetch_pc,
cycles_left: FetchPipeQueueEntry::get_next_delay(delay_sequence_index),
fetch_block_id,
},
);
}
}
}
}
#[hdl_module]
fn dut(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let next_pc = instance(next_pc(config));
connect(next_pc.cd, cd);
#[hdl]
let mock_fetch_pipe = instance(mock_fetch_pipe(config));
connect(mock_fetch_pipe.cd, cd);
connect(mock_fetch_pipe.from_fetch, next_pc.to_fetch);
connect(next_pc.from_decode, mock_fetch_pipe.to_post_decode);
}
#[hdl]
#[test]
fn test_next_pc() {
let _n = SourceLocation::normalize_files_for_tests();
let mut config = CpuConfig::new(
vec![
UnitConfig::new(UnitKind::AluBranch),
UnitConfig::new(UnitKind::AluBranch),
],
NonZeroUsize::new(20).unwrap(),
);
config.fetch_width = NonZeroUsize::new(2).unwrap();
let m = dut(PhantomConst::new_sized(config));
let mut sim = Simulation::new(m);
let mut writer = RcWriter::default();
sim.add_trace_writer(VcdWriterDecls::new(writer.clone()));
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, true);
for _cycle in 0..300 {
sim.advance_time(SimDuration::from_nanos(500));
sim.write_clock(sim.io().cd.clk, true);
sim.advance_time(SimDuration::from_nanos(500));
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, false);
}
// FIXME: vcd is just whatever next_pc does now, which isn't known to be correct
let vcd = String::from_utf8(writer.take()).unwrap();
println!("####### VCD:\n{vcd}\n#######");
if vcd != include_str!("expected/next_pc.vcd") {
panic!();
}
}