From 618862c7d96498f9ef742447d24bab7ddfa28456 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Thu, 9 Apr 2026 22:23:18 -0700 Subject: [PATCH] WIP adding rename_execute_retire --- crates/cpu/Cargo.toml | 3 + crates/cpu/src/config.rs | 61 ++--- crates/cpu/src/instruction.rs | 34 +-- crates/cpu/src/lib.rs | 2 + crates/cpu/src/rename_execute_retire.rs | 295 ++++++++++++++++++++++++ crates/cpu/src/unit.rs | 36 +-- crates/cpu/src/unit/alu_branch.rs | 52 ++--- crates/cpu/src/unit/unit_base.rs | 93 ++++---- 8 files changed, 420 insertions(+), 156 deletions(-) create mode 100644 crates/cpu/src/rename_execute_retire.rs diff --git a/crates/cpu/Cargo.toml b/crates/cpu/Cargo.toml index f346e88..ba472bb 100644 --- a/crates/cpu/Cargo.toml +++ b/crates/cpu/Cargo.toml @@ -33,3 +33,6 @@ hex-literal.workspace = true regex = "1.12.2" sha2.workspace = true which.workspace = true + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(todo)'] } diff --git a/crates/cpu/src/config.rs b/crates/cpu/src/config.rs index 9826955..2dfa8b2 100644 --- a/crates/cpu/src/config.rs +++ b/crates/cpu/src/config.rs @@ -1,12 +1,6 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information -use crate::{ - instruction::{CONST_ZERO_UNIT_NUM, MOpTrait, PRegNum, RenamedMOp, UnitNum, UnitOutRegNum}, - unit::{ - UnitCancelInput, UnitKind, UnitOutputWrite, - unit_base::{UnitForwardingInfo, UnitToRegAlloc}, - }, -}; +use crate::{instruction::CONST_ZERO_UNIT_NUM, unit::UnitKind}; use fayalite::prelude::*; use serde::{Deserialize, Serialize}; use std::num::NonZeroUsize; @@ -101,55 +95,14 @@ impl CpuConfig { pub fn unit_num_width(&self) -> usize { UInt::range(CONST_ZERO_UNIT_NUM..self.non_const_unit_nums().end).width() } - pub fn unit_num(&self) -> UnitNum { - UnitNum[self.unit_num_width()] - } - pub fn unit_out_reg_num(&self) -> UnitOutRegNum { - UnitOutRegNum[self.out_reg_num_width] - } - pub fn p_reg_num(&self) -> PRegNum { - PRegNum[self.unit_num_width()][self.out_reg_num_width] - } pub fn p_reg_num_width(&self) -> usize { self.unit_num_width() + self.out_reg_num_width } - pub fn renamed_mop_in_unit(&self) -> RenamedMOp, DynSize> { - RenamedMOp[self.unit_out_reg_num()][self.p_reg_num_width()] - } - pub fn unit_output_write(&self) -> UnitOutputWrite { - UnitOutputWrite[self.out_reg_num_width] - } - pub fn unit_output_writes(&self) -> Array>> { - Array[HdlOption[self.unit_output_write()]][self.non_const_unit_nums().len()] - } - pub fn unit_cancel_input(&self) -> UnitCancelInput { - UnitCancelInput[self.out_reg_num_width] - } - pub fn unit_forwarding_info(&self) -> UnitForwardingInfo { - UnitForwardingInfo[self.unit_num_width()][self.out_reg_num_width] - [self.non_const_unit_nums().len()] - } pub fn unit_max_in_flight(&self, unit_index: usize) -> NonZeroUsize { self.units[unit_index] .max_in_flight .unwrap_or(self.default_unit_max_in_flight) } - pub fn unit_to_reg_alloc< - MOp: Type + MOpTrait, SrcRegWidth = DynSize>, - ExtraOut: Type, - >( - &self, - mop_ty: MOp, - extra_out_ty: ExtraOut, - ) -> UnitToRegAlloc { - assert_eq!( - mop_ty.dest_reg_ty(), - self.unit_out_reg_num(), - "inconsistent types", - ); - UnitToRegAlloc[mop_ty][extra_out_ty][self.unit_num_width()][self.out_reg_num_width] - [self.non_const_unit_nums().len()] - } pub fn fetch_width_in_bytes(&self) -> usize { 1usize .checked_shl(self.log2_fetch_width_in_bytes.into()) @@ -188,6 +141,18 @@ impl CpuConfig { } } +#[hdl(get(|c| c.out_reg_num_width))] +pub type CpuConfigOutRegNumWidth> = DynSize; + +#[hdl(get(|c| c.unit_num_width()))] +pub type CpuConfigUnitNumWidth> = DynSize; + +#[hdl(get(|c| c.p_reg_num_width()))] +pub type CpuConfigPRegNumWidth> = DynSize; + +#[hdl(get(|c| c.non_const_unit_nums().len()))] +pub type CpuConfigUnitCount> = DynSize; + #[hdl(get(|c| c.fetch_width.get()))] pub type CpuConfigFetchWidth> = DynSize; diff --git a/crates/cpu/src/instruction.rs b/crates/cpu/src/instruction.rs index 1f9f5ae..ac87efc 100644 --- a/crates/cpu/src/instruction.rs +++ b/crates/cpu/src/instruction.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information use crate::{ + config::{CpuConfig, CpuConfigOutRegNumWidth, CpuConfigUnitNumWidth, PhantomConstCpuConfig}, register::{PRegFlags, PRegFlagsViewTrait, PRegValue, ViewUnused}, unit::UnitMOp, util::{Rotate, range_u32_len}, @@ -2595,19 +2596,21 @@ impl MoveRegMOp { } } -#[hdl(cmp_eq)] +#[hdl(cmp_eq, no_static)] /// there may be more than one unit of a given kind, so UnitNum is not the same as UnitKind. /// zero is used for built-in constants, such as the zero register -pub struct UnitNum { - pub adj_value: UIntType, +pub struct UnitNum> { + pub adj_value: UIntType>, + pub config: C, } -impl UnitNum { +impl UnitNum { #[hdl] pub fn const_zero(self) -> Expr { #[hdl] UnitNum { adj_value: CONST_ZERO_UNIT_NUM.cast_to(self.adj_value), + config: self.config, } } #[hdl] @@ -2615,6 +2618,7 @@ impl UnitNum { #[hdl] UnitNum { adj_value: (index + 1).cast_to(self.adj_value), + config: self.config, } } pub fn is_index(expr: impl ToExpr, index: usize) -> Expr { @@ -2622,7 +2626,9 @@ impl UnitNum { expr.ty().from_index(index).adj_value.cmp_eq(expr.adj_value) } #[hdl] - pub fn as_index(expr: impl ToExpr) -> Expr>> { + pub fn as_index( + expr: impl ToExpr, + ) -> Expr>>> { let expr = expr.to_expr(); #[hdl] let unit_index = wire(HdlOption[expr.ty().adj_value]); @@ -2640,19 +2646,20 @@ impl UnitNum { pub const CONST_ZERO_UNIT_NUM: usize = 0; -#[hdl(cmp_eq)] -pub struct UnitOutRegNum { - pub value: UIntType, +#[hdl(cmp_eq, no_static)] +pub struct UnitOutRegNum> { + pub value: UIntType>, + pub config: C, } -#[hdl(cmp_eq)] +#[hdl(cmp_eq, no_static)] /// Physical Register Number -- registers in the CPU's backend -pub struct PRegNum { - pub unit_num: UnitNum, - pub unit_out_reg: UnitOutRegNum, +pub struct PRegNum> { + pub unit_num: UnitNum, + pub unit_out_reg: UnitOutRegNum, } -impl PRegNum { +impl PRegNum { #[hdl] pub fn const_zero(self) -> Expr { #[hdl] @@ -2661,6 +2668,7 @@ impl PRegNum { + pub fetch_block_id: UInt<8>, + pub id: UInt<12>, + pub pc: UInt<64>, + /// initialized to 0 by decoder, overwritten by `next_pc()` + pub predicted_next_pc: UInt<64>, + pub size_in_bytes: UInt<4>, + /// `true` if this µOp is the first µOp in the ISA-level instruction. + /// In general, a single µOp can't be cancelled by itself, + /// it needs to be cancelled along with all other µOps that + /// come from the same ISA-level instruction. + pub is_first_mop_in_insn: Bool, + pub mop: MOp, +} + +#[hdl(no_static)] +/// TODO: merge with [`crate::next_pc::PostDecodeOutputInterface`] +pub struct PostDecodeOutputInterface> { + pub insns: ArrayVec, CpuConfigFetchWidth>, + #[hdl(flip)] + pub ready: UIntInRangeInclusiveType, CpuConfigFetchWidth>, + /// tells the rename/execute/retire circuit to cancel all non-retired instructions + pub cancel: ReadyValid<()>, + pub config: C, +} + +#[hdl(no_static)] +/// handles updating speculative branch predictor state (e.g. branch histories) +/// when instructions retire, as well as updating state when a +/// branch instruction is mis-speculated. +pub struct RetireToNextPcInterface> { + pub inner: ReadyValid>, + /// only for debugging + pub next_insns: HdlOption, CpuConfigRobSize>>, +} + +fn zeroed(ty: T) -> SimValue { + SimValue::from_opaque( + ty, + OpaqueSimValue::from_bits(UInt::new(ty.canonical().bit_width()).zero()), + ) +} + +impl SimValueDefault for RenameExecuteRetireDebugState { + fn sim_value_default(self) -> SimValue { + zeroed(self) + } +} + +#[derive(Debug, Clone)] +struct RenameTable { + entries: Box<[SimValue>; 1 << MOpRegNum::WIDTH]>, + config: C, +} + +impl RenameTable { + fn new(config: C) -> Self { + Self { + entries: vec![PRegNum[config].const_zero().into_sim_value(); 1 << MOpRegNum::WIDTH] + .try_into() + .expect("size is known to match"), + config, + } + } + fn to_debug_state(&self) -> SimValue, { 1 << MOpRegNum::WIDTH }>> { + self.entries + .to_sim_value_with_type(Array[PRegNum[self.config]][ConstUsize]) + } +} + +#[hdl(no_static)] +struct RobEntryDebugState> { + mop: MOpInstance, CpuConfigPRegNumWidth>>, + config: C, +} + +impl SimValueDefault for RobEntryDebugState { + fn sim_value_default(self) -> SimValue { + zeroed(self) + } +} + +#[derive(Debug)] +struct RobEntry { + mop: SimValue, CpuConfigPRegNumWidth>>>, + config: C, +} + +#[hdl] +struct OrigMOpQueueEntryDebugState { + mop: MOpInstance, + /// number of renamed µOps that this non-renamed µOp corresponds to + renamed_mop_count: UInt<8>, +} + +#[derive(Debug)] +struct OrigMOpQueueEntry { + mop: SimValue>, + /// number of renamed µOps that this non-renamed µOp corresponds to + renamed_mop_count: u8, +} + +#[hdl(no_static)] +pub struct RenameExecuteRetireDebugState> { + rename_table: Array, { 1 << MOpRegNum::WIDTH }>, + retire_rename_table: Array, { 1 << MOpRegNum::WIDTH }>, + rob: ArrayVec, CpuConfigRobSize>, + orig_mop_queue: ArrayVec>, + cancelling: Bool, +} + +#[derive(Debug)] +struct RenameExecuteRetireState { + rename_table: RenameTable, + retire_rename_table: RenameTable, + rob: VecDeque>, + orig_mop_queue: VecDeque, + cancelling: bool, + config: C, +} + +impl RenameExecuteRetireState { + fn new(config: C) -> Self { + let rename_table = RenameTable::new(config); + Self { + rename_table: rename_table.clone(), + retire_rename_table: rename_table, + rob: VecDeque::with_capacity(CpuConfigRobSize[config]), + orig_mop_queue: VecDeque::with_capacity(CpuConfigRobSize[config]), + cancelling: false, + config, + } + } + #[hdl] + async fn write_for_debug( + &self, + sim: &mut ExternModuleSimulationState, + state_for_debug: Expr>, + ) { + let Self { + ref rename_table, + ref retire_rename_table, + ref rob, + ref orig_mop_queue, + cancelling, + config, + } = *self; + sim.write( + state_for_debug, + #[hdl(sim)] + RenameExecuteRetireDebugState::<_> { + rename_table: rename_table.to_debug_state(), + retire_rename_table: retire_rename_table.to_debug_state(), + rob: state_for_debug + .ty() + .rob + .from_iter_sim( + zeroed(RobEntryDebugState[config]), + rob.iter().map(|entry| { + let RobEntry { mop, config: _ } = entry; + #[hdl(sim)] + RobEntryDebugState::<_> { mop, config } + }), + ) + .expect("known to fit"), + orig_mop_queue: state_for_debug + .ty() + .orig_mop_queue + .from_iter_sim( + zeroed(OrigMOpQueueEntryDebugState), + orig_mop_queue.iter().map(|entry| { + let OrigMOpQueueEntry { + mop, + renamed_mop_count, + } = entry; + #[hdl(sim)] + OrigMOpQueueEntryDebugState { + mop, + renamed_mop_count, + } + }), + ) + .expect("known to fit"), + cancelling, + }, + ) + .await; + } + #[hdl] + async fn write_to_next_pc_next_insns( + &self, + sim: &mut ExternModuleSimulationState, + next_insns: Expr, CpuConfigRobSize>>>, + ) { + sim.write( + next_insns, + #[hdl(sim)] + (next_insns.ty()).HdlSome( + next_insns + .ty() + .HdlSome + .from_iter_sim( + zeroed(MOpInstance[MOp]), + self.orig_mop_queue.iter().map(|entry| &entry.mop), + ) + .expect("known to fit"), + ), + ) + .await; + } +} + +#[hdl] +async fn rename_execute_retire_run( + mut sim: ExternModuleSimulationState, + cd: Expr, + from_post_decode: Expr>>, + to_next_pc: Expr>>, + state_for_debug: Expr>>, + config: PhantomConst, +) { + let mut state = RenameExecuteRetireState::new(config); + loop { + state + .write_to_next_pc_next_insns(&mut sim, to_next_pc.next_insns) + .await; + state.write_for_debug(&mut sim, state_for_debug).await; + sim.wait_for_clock_edge(cd.clk).await; + todo!("step state based on I/O"); + } +} + +#[hdl_module(extern)] +pub fn rename_execute_retire(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let from_post_decode: PostDecodeOutputInterface> = + m.input(PostDecodeOutputInterface[config]); + #[hdl] + let to_next_pc: RetireToNextPcInterface> = + m.output(RetireToNextPcInterface[config]); + #[hdl] + let state_for_debug: RenameExecuteRetireDebugState> = + m.output(RenameExecuteRetireDebugState[config]); + m.register_clock_for_past(cd.clk); + m.extern_module_simulation_fn( + (cd, from_post_decode, to_next_pc, state_for_debug, config), + |(cd, from_post_decode, to_next_pc, state_for_debug, config), mut sim| async move { + sim.write(state_for_debug, state_for_debug.ty().sim_value_default()) + .await; + sim.resettable( + cd, + |mut sim: ExternModuleSimulationState| async move { + sim.write(from_post_decode.ready, 0usize).await; + sim.write(from_post_decode.cancel.ready, false).await; + sim.write(to_next_pc.inner.data, to_next_pc.ty().inner.data.HdlNone()) + .await; + sim.write(to_next_pc.next_insns, to_next_pc.ty().next_insns.HdlNone()) + .await; + }, + |sim, ()| { + rename_execute_retire_run( + sim, + cd, + from_post_decode, + to_next_pc, + state_for_debug, + config, + ) + }, + ) + .await; + }, + ); +} diff --git a/crates/cpu/src/unit.rs b/crates/cpu/src/unit.rs index 400358c..e59b892 100644 --- a/crates/cpu/src/unit.rs +++ b/crates/cpu/src/unit.rs @@ -2,7 +2,7 @@ // See Notices.txt for copyright information use crate::{ - config::CpuConfig, + config::{CpuConfig, PhantomConstCpuConfig}, instruction::{ AluBranchMOp, LoadStoreMOp, MOp, MOpDestReg, MOpInto, MOpRegNum, MOpTrait, MOpVariantVisitOps, MOpVariantVisitor, MOpVisitVariants, RenamedMOp, UnitOutRegNum, @@ -48,7 +48,7 @@ macro_rules! all_units { } impl $UnitKind { - pub fn unit(self, config: &CpuConfig, unit_index: usize) -> DynUnit { + pub fn unit(self, config: PhantomConst, unit_index: usize) -> DynUnit { match self { $($UnitKind::$Unit => $create_dyn_unit_fn(config, unit_index),)* } @@ -277,9 +277,9 @@ pub struct UnitResultCompleted { pub extra_out: ExtraOut, } -#[hdl(cmp_eq)] -pub struct UnitOutputWrite { - pub which: UnitOutRegNum, +#[hdl(cmp_eq, no_static)] +pub struct UnitOutputWrite> { + pub which: UnitOutRegNum, pub value: PRegValue, } @@ -300,21 +300,21 @@ impl UnitResult { } } -#[hdl] -pub struct UnitOutput { - pub which: UnitOutRegNum, +#[hdl(no_static)] +pub struct UnitOutput, ExtraOut> { + pub which: UnitOutRegNum, pub result: UnitResult, } -impl UnitOutput { +impl UnitOutput { pub fn extra_out_ty(self) -> ExtraOut { self.result.extra_out_ty() } } -#[hdl(cmp_eq)] -pub struct UnitCancelInput { - pub which: UnitOutRegNum, +#[hdl(cmp_eq, no_static)] +pub struct UnitCancelInput> { + pub which: UnitOutRegNum, } pub trait UnitTrait: @@ -332,7 +332,7 @@ pub trait UnitTrait: fn extract_mop( &self, - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, ) -> Expr>; fn module(&self) -> Interned>; @@ -340,7 +340,7 @@ pub trait UnitTrait: fn unit_to_reg_alloc( &self, this: Expr, - ) -> Expr>; + ) -> Expr, Self::MOp, Self::ExtraOut>>; fn cd(&self, this: Expr) -> Expr; @@ -390,7 +390,7 @@ impl UnitTrait for DynUnit { fn extract_mop( &self, - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, ) -> Expr> { self.unit.extract_mop(mop) } @@ -402,7 +402,7 @@ impl UnitTrait for DynUnit { fn unit_to_reg_alloc( &self, this: Expr, - ) -> Expr> { + ) -> Expr, Self::MOp, Self::ExtraOut>> { self.unit.unit_to_reg_alloc(this) } @@ -445,7 +445,7 @@ impl UnitTrait for DynUnitWrapper, DynSize>>, + mop: Expr>, DynSize>>, ) -> Expr> { Expr::from_enum(Expr::as_enum(self.0.extract_mop(mop))) } @@ -457,7 +457,7 @@ impl UnitTrait for DynUnitWrapper, - ) -> Expr> { + ) -> Expr, Self::MOp, Self::ExtraOut>> { Expr::from_bundle(Expr::as_bundle( self.0.unit_to_reg_alloc(Expr::from_bundle(this)), )) diff --git a/crates/cpu/src/unit/alu_branch.rs b/crates/cpu/src/unit/alu_branch.rs index 6815ae6..a86b43b 100644 --- a/crates/cpu/src/unit/alu_branch.rs +++ b/crates/cpu/src/unit/alu_branch.rs @@ -19,16 +19,13 @@ use crate::{ }, }; use fayalite::{ - intern::{Intern, Interned}, - module::wire_with_loc, - prelude::*, - util::ready_valid::ReadyValid, + intern::Interned, module::wire_with_loc, prelude::*, util::ready_valid::ReadyValid, }; use std::{collections::HashMap, ops::RangeTo}; #[hdl] fn add_sub( - mop: Expr, DynSize, SrcCount>>, + mop: Expr>, DynSize, SrcCount>>, pc: Expr>, flags_mode: Expr, src_values: Expr>, @@ -245,7 +242,7 @@ fn add_sub( #[hdl] fn logical_flags( - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -259,7 +256,7 @@ fn logical_flags( #[hdl] fn logical( - mop: Expr, DynSize, ConstUsize<2>>>, + mop: Expr>, DynSize, ConstUsize<2>>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -273,7 +270,7 @@ fn logical( #[hdl] fn logical_i( - mop: Expr, DynSize, ConstUsize<1>>>, + mop: Expr>, DynSize, ConstUsize<1>>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -287,7 +284,7 @@ fn logical_i( #[hdl] fn shift_rotate( - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -301,7 +298,7 @@ fn shift_rotate( #[hdl] fn compare( - mop: Expr, DynSize, SrcCount>>, + mop: Expr>, DynSize, SrcCount>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -315,7 +312,7 @@ fn compare( #[hdl] fn branch( - mop: Expr, DynSize, SrcCount>>, + mop: Expr>, DynSize, SrcCount>>, pc: Expr>, flags_mode: Expr, src_values: Expr>, @@ -330,7 +327,7 @@ fn branch( #[hdl] fn read_special( - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, pc: Expr>, flags_mode: Expr, src_values: Expr>, @@ -344,20 +341,18 @@ fn read_special( } #[hdl_module] -pub fn alu_branch(config: &CpuConfig, unit_index: usize) { +pub fn alu_branch(config: PhantomConst, unit_index: usize) { #[hdl] let cd: ClockDomain = m.input(); #[hdl] let unit_to_reg_alloc: UnitToRegAlloc< - AluBranchMOp, DynSize>, + PhantomConst, + AluBranchMOp>, DynSize>, (), - DynSize, - DynSize, - DynSize, - > = m.output(config.unit_to_reg_alloc( - AluBranchMOp[config.unit_out_reg_num()][config.p_reg_num_width()], - (), - )); + > = m.output( + UnitToRegAlloc[config][AluBranchMOp[UnitOutRegNum[config]][config.get().p_reg_num_width()]] + [()], + ); #[hdl] let global_state: GlobalState = m.input(); @@ -375,10 +370,11 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { #[hdl] if let HdlSome(execute_start) = ReadyValid::firing_data(unit_base.execute_start) { #[hdl] - let ExecuteStart::<_> { + let ExecuteStart::<_, _> { mop, pc, src_values, + config: _, } = execute_start; #[hdl] match mop { @@ -580,14 +576,14 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct AluBranch { - config: Interned, + config: PhantomConst, module: Interned>, } impl AluBranch { - pub fn new(config: &CpuConfig, unit_index: usize) -> Self { + pub fn new(config: PhantomConst, unit_index: usize) -> Self { Self { - config: config.intern(), + config, module: alu_branch(config, unit_index), } } @@ -596,7 +592,7 @@ impl AluBranch { impl UnitTrait for AluBranch { type Type = alu_branch; type ExtraOut = (); - type MOp = AluBranchMOp, DynSize>; + type MOp = AluBranchMOp>, DynSize>; fn ty(&self) -> Self::Type { self.module.io_ty() @@ -616,7 +612,7 @@ impl UnitTrait for AluBranch { fn extract_mop( &self, - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, ) -> Expr> { UnitMOp::alu_branch_mop(mop) } @@ -628,7 +624,7 @@ impl UnitTrait for AluBranch { fn unit_to_reg_alloc( &self, this: Expr, - ) -> Expr> { + ) -> Expr, Self::MOp, Self::ExtraOut>> { this.unit_to_reg_alloc } diff --git a/crates/cpu/src/unit/unit_base.rs b/crates/cpu/src/unit/unit_base.rs index 4e665e0..cc7c754 100644 --- a/crates/cpu/src/unit/unit_base.rs +++ b/crates/cpu/src/unit/unit_base.rs @@ -2,7 +2,7 @@ // See Notices.txt for copyright information use crate::{ - config::CpuConfig, + config::{CpuConfig, CpuConfigUnitCount, PhantomConstCpuConfig}, instruction::{COMMON_MOP_SRC_LEN, MOpTrait, PRegNum, UnitNum, UnitOutRegNum}, register::PRegValue, unit::{UnitCancelInput, UnitOutput, UnitOutputWrite}, @@ -15,13 +15,11 @@ use fayalite::{ ty::StaticType, util::ready_valid::ReadyValid, }; -use std::marker::PhantomData; -#[hdl] -pub struct UnitForwardingInfo { - pub unit_output_writes: ArrayType>, UnitCount>, - pub unit_reg_frees: ArrayType>, UnitCount>, - pub _phantom: PhantomData, +#[hdl(no_static)] +pub struct UnitForwardingInfo> { + pub unit_output_writes: ArrayType>, CpuConfigUnitCount>, + pub unit_reg_frees: ArrayType>, CpuConfigUnitCount>, } #[hdl] @@ -30,26 +28,18 @@ pub struct UnitInput { pub pc: UInt<64>, } -#[hdl] -pub struct UnitToRegAlloc< - MOp: Type, - ExtraOut: Type, - UnitNumWidth: Size, - OutRegNumWidth: Size, - UnitCount: Size, -> { +#[hdl(no_static)] +pub struct UnitToRegAlloc, MOp: Type, ExtraOut: Type> { #[hdl(flip)] - pub unit_forwarding_info: UnitForwardingInfo, + pub unit_forwarding_info: UnitForwardingInfo, #[hdl(flip)] pub input: ReadyValid>, #[hdl(flip)] - pub cancel_input: HdlOption>, - pub output: HdlOption>, + pub cancel_input: HdlOption>, + pub output: HdlOption>, } -impl - UnitToRegAlloc -{ +impl UnitToRegAlloc { pub fn mop_ty(self) -> MOp { self.input.data.HdlSome.mop } @@ -58,16 +48,20 @@ impl>> { +#[hdl(no_static)] +pub struct ExecuteStart< + C: PhantomConstGet, + MOp: Type + MOpTrait>, +> { pub mop: MOp, pub pc: UInt<64>, pub src_values: Array, + pub config: C, } -#[hdl] -pub struct ExecuteEnd { - pub unit_output: UnitOutput, +#[hdl(no_static)] +pub struct ExecuteEnd, ExtraOut> { + pub unit_output: UnitOutput, } #[hdl] @@ -240,10 +234,10 @@ impl InFlightOpsSummary { #[hdl_module] pub fn unit_base< - MOp: Type + MOpTrait, SrcRegWidth = DynSize>, + MOp: Type + MOpTrait>, SrcRegWidth = DynSize>, ExtraOut: Type, >( - config: &CpuConfig, + config: PhantomConst, unit_index: usize, mop_ty: MOp, extra_out_ty: ExtraOut, @@ -251,17 +245,18 @@ pub fn unit_base< #[hdl] let cd: ClockDomain = m.input(); #[hdl] - let unit_to_reg_alloc: UnitToRegAlloc = - m.output(config.unit_to_reg_alloc(mop_ty, extra_out_ty)); + let unit_to_reg_alloc: UnitToRegAlloc, MOp, ExtraOut> = + m.output(UnitToRegAlloc[config][mop_ty][extra_out_ty]); #[hdl] - let execute_start: ReadyValid> = m.output(ReadyValid[ExecuteStart[mop_ty]]); + let execute_start: ReadyValid, MOp>> = + m.output(ReadyValid[ExecuteStart[config][mop_ty]]); #[hdl] - let execute_end: HdlOption> = - m.input(HdlOption[ExecuteEnd[config.out_reg_num_width][extra_out_ty]]); + let execute_end: HdlOption, ExtraOut>> = + m.input(HdlOption[ExecuteEnd[config][extra_out_ty]]); connect(execute_start.data, execute_start.ty().data.HdlNone()); - let max_in_flight = config.unit_max_in_flight(unit_index).get(); + let max_in_flight = config.get().unit_max_in_flight(unit_index).get(); let in_flight_op_ty = InFlightOp[mop_ty]; #[hdl] let in_flight_ops = reg_builder() @@ -279,16 +274,15 @@ pub fn unit_base< ); #[hdl] - let UnitForwardingInfo::<_, _, _> { + let UnitForwardingInfo::<_> { unit_output_writes, unit_reg_frees, - _phantom: _, } = unit_to_reg_alloc.unit_forwarding_info; #[hdl] let read_src_regs = wire(mop_ty.src_regs_ty()); connect( read_src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); #[hdl] let read_src_values = wire(); @@ -297,7 +291,7 @@ pub fn unit_base< let input_src_regs = wire(mop_ty.src_regs_ty()); connect( input_src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); #[hdl] let input_src_regs_valid = wire(); @@ -309,7 +303,7 @@ pub fn unit_base< Bool, SourceLocation::caller(), ); - mem.depth(1 << config.out_reg_num_width); + mem.depth(1 << config.get().out_reg_num_width); mem }) .collect(); @@ -319,11 +313,11 @@ pub fn unit_base< PRegValue, SourceLocation::caller(), ); - unit_output_regs.depth(1 << config.out_reg_num_width); + unit_output_regs.depth(1 << config.get().out_reg_num_width); for src_index in 0..COMMON_MOP_SRC_LEN { let read_port = unit_output_regs.new_read_port(); - let p_reg_num = read_src_regs[src_index].cast_bits_to(config.p_reg_num()); + let p_reg_num = read_src_regs[src_index].cast_bits_to(PRegNum[config]); connect_any(read_port.addr, p_reg_num.unit_out_reg.value); connect(read_port.en, false); connect(read_port.clk, cd.clk); @@ -336,7 +330,7 @@ pub fn unit_base< for src_index in 0..COMMON_MOP_SRC_LEN { let read_port = unit_output_regs_valid[unit_index].new_read_port(); - let p_reg_num = input_src_regs[src_index].cast_bits_to(config.p_reg_num()); + let p_reg_num = input_src_regs[src_index].cast_bits_to(PRegNum[config]); connect_any(read_port.addr, p_reg_num.unit_out_reg.value); connect(read_port.en, false); connect(read_port.clk, cd.clk); @@ -367,8 +361,8 @@ pub fn unit_base< connect_any(ready_write_port.addr, unit_output_write.which.value); connect(ready_write_port.en, true); let p_reg_num = #[hdl] - PRegNum::<_, _> { - unit_num: config.unit_num().from_index(unit_index), + PRegNum::<_> { + unit_num: UnitNum[config].from_index(unit_index), unit_out_reg: unit_output_write.which, }; for src_index in 0..COMMON_MOP_SRC_LEN { @@ -399,10 +393,11 @@ pub fn unit_base< execute_start.data, HdlSome( #[hdl] - ExecuteStart::<_> { + ExecuteStart::<_, _> { mop: in_flight_op.mop, pc: in_flight_op.pc, src_values: read_src_values, + config, }, ), ); @@ -425,7 +420,7 @@ pub fn unit_base< let input_mop_src_regs = wire(mop_ty.src_regs_ty()); connect( input_mop_src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); MOp::connect_src_regs(mop, input_mop_src_regs); let src_ready_flags = wire_with_loc( @@ -497,7 +492,7 @@ pub fn unit_base< ); connect( src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); MOp::connect_src_regs(mop, src_regs); @@ -521,8 +516,8 @@ pub fn unit_base< value: _, } = unit_output_write; let p_reg_num = #[hdl] - PRegNum::<_, _> { - unit_num: config.unit_num().from_index(unit_index), + PRegNum::<_> { + unit_num: UnitNum[config].from_index(unit_index), unit_out_reg, }; for src_index in 0..COMMON_MOP_SRC_LEN {