From 6a0af6517fcd4250fae113ee61993f600c7115b8 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Thu, 9 Apr 2026 22:23:18 -0700 Subject: [PATCH] WIP adding rename_execute_retire --- crates/cpu/Cargo.toml | 3 + crates/cpu/src/config.rs | 73 +-- crates/cpu/src/instruction.rs | 354 ++++++++++++- crates/cpu/src/lib.rs | 2 + crates/cpu/src/rename_execute_retire.rs | 660 ++++++++++++++++++++++++ crates/cpu/src/unit.rs | 81 ++- crates/cpu/src/unit/alu_branch.rs | 52 +- crates/cpu/src/unit/unit_base.rs | 93 ++-- crates/cpu/tests/reg_alloc.rs | 1 + 9 files changed, 1153 insertions(+), 166 deletions(-) create mode 100644 crates/cpu/src/rename_execute_retire.rs diff --git a/crates/cpu/Cargo.toml b/crates/cpu/Cargo.toml index f346e88..ba472bb 100644 --- a/crates/cpu/Cargo.toml +++ b/crates/cpu/Cargo.toml @@ -33,3 +33,6 @@ hex-literal.workspace = true regex = "1.12.2" sha2.workspace = true which.workspace = true + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(todo)'] } diff --git a/crates/cpu/src/config.rs b/crates/cpu/src/config.rs index 9826955..9c10f59 100644 --- a/crates/cpu/src/config.rs +++ b/crates/cpu/src/config.rs @@ -1,12 +1,6 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information -use crate::{ - instruction::{CONST_ZERO_UNIT_NUM, MOpTrait, PRegNum, RenamedMOp, UnitNum, UnitOutRegNum}, - unit::{ - UnitCancelInput, UnitKind, UnitOutputWrite, - unit_base::{UnitForwardingInfo, UnitToRegAlloc}, - }, -}; +use crate::{instruction::CONST_ZERO_UNIT_NUM, unit::UnitKind}; use fayalite::prelude::*; use serde::{Deserialize, Serialize}; use std::num::NonZeroUsize; @@ -101,54 +95,20 @@ impl CpuConfig { pub fn unit_num_width(&self) -> usize { UInt::range(CONST_ZERO_UNIT_NUM..self.non_const_unit_nums().end).width() } - pub fn unit_num(&self) -> UnitNum { - UnitNum[self.unit_num_width()] - } - pub fn unit_out_reg_num(&self) -> UnitOutRegNum { - UnitOutRegNum[self.out_reg_num_width] - } - pub fn p_reg_num(&self) -> PRegNum { - PRegNum[self.unit_num_width()][self.out_reg_num_width] - } pub fn p_reg_num_width(&self) -> usize { self.unit_num_width() + self.out_reg_num_width } - pub fn renamed_mop_in_unit(&self) -> RenamedMOp, DynSize> { - RenamedMOp[self.unit_out_reg_num()][self.p_reg_num_width()] - } - pub fn unit_output_write(&self) -> UnitOutputWrite { - UnitOutputWrite[self.out_reg_num_width] - } - pub fn unit_output_writes(&self) -> Array>> { - Array[HdlOption[self.unit_output_write()]][self.non_const_unit_nums().len()] - } - pub fn unit_cancel_input(&self) -> UnitCancelInput { - UnitCancelInput[self.out_reg_num_width] - } - pub fn unit_forwarding_info(&self) -> UnitForwardingInfo { - UnitForwardingInfo[self.unit_num_width()][self.out_reg_num_width] - [self.non_const_unit_nums().len()] - } pub fn unit_max_in_flight(&self, unit_index: usize) -> NonZeroUsize { self.units[unit_index] .max_in_flight .unwrap_or(self.default_unit_max_in_flight) } - pub fn unit_to_reg_alloc< - MOp: Type + MOpTrait, SrcRegWidth = DynSize>, - ExtraOut: Type, - >( - &self, - mop_ty: MOp, - extra_out_ty: ExtraOut, - ) -> UnitToRegAlloc { - assert_eq!( - mop_ty.dest_reg_ty(), - self.unit_out_reg_num(), - "inconsistent types", - ); - UnitToRegAlloc[mop_ty][extra_out_ty][self.unit_num_width()][self.out_reg_num_width] - [self.non_const_unit_nums().len()] + /// the maximum of all [`unit_max_in_flight()`][Self::unit_max_in_flight()] + pub fn max_unit_max_in_flight(&self) -> NonZeroUsize { + (0..self.units.len()) + .map(|unit_index| self.unit_max_in_flight(unit_index)) + .max() + .unwrap_or(self.default_unit_max_in_flight) } pub fn fetch_width_in_bytes(&self) -> usize { 1usize @@ -188,6 +148,21 @@ impl CpuConfig { } } +#[hdl(get(|c| c.out_reg_num_width))] +pub type CpuConfigOutRegNumWidth> = DynSize; + +#[hdl(get(|c| c.unit_num_width()))] +pub type CpuConfigUnitNumWidth> = DynSize; + +#[hdl(get(|c| c.p_reg_num_width()))] +pub type CpuConfigPRegNumWidth> = DynSize; + +#[hdl(get(|c| 1 << c.out_reg_num_width))] +pub type CpuConfig2PowOutRegNumWidth> = DynSize; + +#[hdl(get(|c| c.units.len()))] +pub type CpuConfigUnitCount> = DynSize; + #[hdl(get(|c| c.fetch_width.get()))] pub type CpuConfigFetchWidth> = DynSize; @@ -236,6 +211,10 @@ pub type CpuConfigL1ICacheMaxMissesInFlight> = Dyn #[hdl(get(|c| c.rob_size.get()))] pub type CpuConfigRobSize> = DynSize; +/// the maximum of all [`unit_max_in_flight()`][CpuConfig::unit_max_in_flight()] +#[hdl(get(|c| c.max_unit_max_in_flight().get()))] +pub type CpuConfigMaxUnitMaxInFlight> = DynSize; + pub trait PhantomConstCpuConfig: PhantomConstGet + Into> diff --git a/crates/cpu/src/instruction.rs b/crates/cpu/src/instruction.rs index 1f9f5ae..5526895 100644 --- a/crates/cpu/src/instruction.rs +++ b/crates/cpu/src/instruction.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information use crate::{ + config::{CpuConfig, CpuConfigOutRegNumWidth, CpuConfigUnitNumWidth, PhantomConstCpuConfig}, register::{PRegFlags, PRegFlagsViewTrait, PRegValue, ViewUnused}, unit::UnitMOp, util::{Rotate, range_u32_len}, @@ -91,6 +92,9 @@ pub trait MOpTrait: Type { type SrcRegWidth: Size; fn dest_reg_ty(self) -> Self::DestReg; fn dest_reg(input: impl ToExpr) -> Expr; + fn dest_reg_sim(input: impl ToSimValue) -> SimValue; + fn dest_reg_sim_ref(input: &SimValue) -> &SimValue; + fn dest_reg_sim_mut(input: &mut SimValue) -> &mut SimValue; fn src_reg_width(self) -> ::SizeType; fn src_reg_ty(self) -> UIntType { UInt[self.src_reg_width()] @@ -102,6 +106,18 @@ pub trait MOpTrait: Type { input: impl ToExpr, f: &mut impl FnMut(Expr>, usize), ); + fn for_each_src_reg_sim( + input: SimValue, + f: &mut impl FnMut(SimValue>, usize), + ); + fn for_each_src_reg_sim_ref( + input: &SimValue, + f: &mut impl FnMut(&SimValue>, usize), + ); + fn for_each_src_reg_sim_mut( + input: &mut SimValue, + f: &mut impl FnMut(&mut SimValue>, usize), + ); fn connect_src_regs( input: impl ToExpr, src_regs: impl ToExpr, { COMMON_MOP_SRC_LEN }>>, @@ -125,6 +141,15 @@ pub trait MOpTrait: Type { usize, ) -> Expr>, ) -> Expr>; + fn map_regs_sim( + input: impl ToSimValue, + new_dest: impl ToSimValue, + new_src_reg_width: NewSrcRegWidth::SizeType, + map_src: &mut impl FnMut( + SimValue>, + usize, + ) -> SimValue>, + ) -> SimValue>; } pub trait CommonMOpTrait: MOpTrait { @@ -146,6 +171,21 @@ pub trait CommonMOpTrait: MOpTrait { fn common_mop( input: impl ToExpr, ) -> Expr>; + fn common_mop_sim( + input: impl ToSimValue, + ) -> SimValue< + CommonMOp, + >; + fn common_mop_sim_ref( + input: &SimValue, + ) -> &SimValue< + CommonMOp, + >; + fn common_mop_sim_mut( + input: &mut SimValue, + ) -> &mut SimValue< + CommonMOp, + >; fn with_common_mop_ty( self, new_common_mop_ty: CommonMOp< @@ -168,6 +208,18 @@ pub trait CommonMOpTrait: MOpTrait { >, >, ) -> Expr>; + fn with_common_mop_sim( + input: impl ToSimValue, + new_common_mop: impl ToSimValue< + Type = CommonMOp< + Self::PrefixPad, + NewDestReg, + NewSrcRegWidth, + Self::SrcCount, + Self::Imm, + >, + >, + ) -> SimValue>; } pub type CommonMOpFor = CommonMOp< @@ -189,6 +241,15 @@ impl MOpTrait for T { fn dest_reg(input: impl ToExpr) -> Expr { T::common_mop(input).dest } + fn dest_reg_sim(input: impl ToSimValue) -> SimValue { + SimValue::into_value(T::common_mop_sim(input)).dest + } + fn dest_reg_sim_ref(input: &SimValue) -> &SimValue { + &T::common_mop_sim_ref(input).dest + } + fn dest_reg_sim_mut(input: &mut SimValue) -> &mut SimValue { + &mut T::common_mop_sim_mut(input).dest + } fn src_reg_width(self) -> ::SizeType { self.common_mop_ty().src.element().width } @@ -202,6 +263,37 @@ impl MOpTrait for T { f(common.src[index], index); } } + fn for_each_src_reg_sim( + input: SimValue, + f: &mut impl FnMut(SimValue>, usize), + ) { + let common = SimValue::into_value(T::common_mop_sim(input)); + for (index, src) in SimValue::into_value(common.src) + .into_iter() + .take(T::SrcCount::VALUE) + .enumerate() + { + f(src, index); + } + } + fn for_each_src_reg_sim_ref( + input: &SimValue, + f: &mut impl FnMut(&SimValue>, usize), + ) { + let common = T::common_mop_sim_ref(input); + for index in 0..T::SrcCount::VALUE { + f(&common.src[index], index); + } + } + fn for_each_src_reg_sim_mut( + input: &mut SimValue, + f: &mut impl FnMut(&mut SimValue>, usize), + ) { + let common = T::common_mop_sim_mut(input); + for index in 0..T::SrcCount::VALUE { + f(&mut common.src[index], index); + } + } fn mapped_ty( self, new_dest_reg: NewDestReg, @@ -244,6 +336,30 @@ impl MOpTrait for T { }, ) } + #[hdl] + fn map_regs_sim( + input: impl ToSimValue, + new_dest: impl ToSimValue, + new_src_reg_width: NewSrcRegWidth::SizeType, + map_src: &mut impl FnMut( + SimValue>, + usize, + ) -> SimValue>, + ) -> SimValue> { + let input = input.into_sim_value(); + let common = T::common_mop_sim_ref(&input); + let common = #[hdl(sim)] + CommonMOp::<_, _, _, _, _> { + prefix_pad: &common.prefix_pad, + dest: new_dest, + src: SimValue::from_array_elements( + ArrayType[UIntType[new_src_reg_width]][T::SrcCount::SIZE], + (0..T::SrcCount::VALUE).map(|index| map_src(common.src[index].clone(), index)), + ), + imm: common.imm, + }; + T::with_common_mop_sim(input, common) + } } impl MOpVisitVariants for T { @@ -532,6 +648,27 @@ impl, + ) -> SimValue< + CommonMOp, + > { + input.into_sim_value() + } + fn common_mop_sim_ref( + input: &SimValue, + ) -> &SimValue< + CommonMOp, + > { + input + } + fn common_mop_sim_mut( + input: &mut SimValue, + ) -> &mut SimValue< + CommonMOp, + > { + input + } fn with_common_mop_ty( self, new_common_mop_ty: CommonMOp< @@ -564,6 +701,24 @@ impl( + input: impl ToSimValue, + new_common_mop: impl ToSimValue< + Type = CommonMOp< + Self::PrefixPad, + NewDestReg, + NewSrcRegWidth, + Self::SrcCount, + Self::Imm, + >, + >, + ) -> SimValue> { + let input = input.into_sim_value(); + let new_common_mop = new_common_mop.into_sim_value(); + input.ty().validate(); + new_common_mop.ty().validate(); + new_common_mop + } } pub const COMMON_MOP_0_IMM_WIDTH: usize = common_mop_max_imm_size(0); @@ -613,6 +768,27 @@ macro_rules! common_mop_struct { ) -> Expr> { CommonMOpTrait::common_mop(input.to_expr().$common) } + fn common_mop_sim( + input: impl ToSimValue, + ) -> SimValue< + CommonMOp, + > { + CommonMOpTrait::common_mop_sim(SimValue::into_value(input.into_sim_value()).$common) + } + fn common_mop_sim_ref( + input: &SimValue, + ) -> &SimValue< + CommonMOp, + > { + CommonMOpTrait::common_mop_sim_ref(&input.$common) + } + fn common_mop_sim_mut( + input: &mut SimValue, + ) -> &mut SimValue< + CommonMOp, + > { + CommonMOpTrait::common_mop_sim_mut(&mut input.$common) + } fn with_common_mop_ty( self, new_common_mop_ty: CommonMOp, @@ -636,6 +812,26 @@ macro_rules! common_mop_struct { $($field: input.$field,)* } } + #[hdl] + fn with_common_mop_sim( + input: impl ToSimValue, + new_common_mop: impl ToSimValue< + Type = CommonMOp< + Self::PrefixPad, + NewDestReg, + NewSrcRegWidth, + Self::SrcCount, + Self::Imm, + >, + >, + ) -> SimValue> { + let input = SimValue::into_value(input.into_sim_value()); + #[hdl(sim)] + Self::Mapped:: { + $common: CommonMOpTrait::with_common_mop_sim(input.$common, new_common_mop), + $($field: input.$field,)* + } + } } }; } @@ -741,6 +937,37 @@ macro_rules! mop_enum { } dest_reg } + #[hdl] + fn dest_reg_sim(input: impl ToSimValue) -> SimValue { + #![allow(unreachable_patterns)] + let input = input.into_sim_value(); + #[hdl(sim)] + match input { + Self::$FirstVariant(v) => <$first_ty as MOpTrait>::dest_reg_sim(v), + $(Self::$Variant(v) => <$ty as MOpTrait>::dest_reg_sim(v),)* + _ => unreachable!(), + } + } + #[hdl] + fn dest_reg_sim_ref(input: &SimValue) -> &SimValue { + #![allow(unreachable_patterns)] + #[hdl(sim)] + match input { + Self::$FirstVariant(v) => <$first_ty as MOpTrait>::dest_reg_sim_ref(v), + $(Self::$Variant(v) => <$ty as MOpTrait>::dest_reg_sim_ref(v),)* + _ => unreachable!(), + } + } + #[hdl] + fn dest_reg_sim_mut(input: &mut SimValue) -> &mut SimValue { + #![allow(unreachable_patterns)] + #[hdl(sim)] + match input { + Self::$FirstVariant(v) => <$first_ty as MOpTrait>::dest_reg_sim_mut(v), + $(Self::$Variant(v) => <$ty as MOpTrait>::dest_reg_sim_mut(v),)* + _ => unreachable!(), + } + } fn src_reg_width(self) -> ::SizeType { self.$FirstVariant.src_reg_width() } @@ -755,6 +982,45 @@ macro_rules! mop_enum { $(Self::$Variant(v) => MOpTrait::for_each_src_reg(v, f),)* } } + #[hdl] + fn for_each_src_reg_sim( + input: SimValue, + f: &mut impl FnMut(SimValue>, usize), + ) { + #![allow(unreachable_patterns)] + #[hdl(sim)] + match input { + Self::$FirstVariant(v) => MOpTrait::for_each_src_reg_sim(v, f), + $(Self::$Variant(v) => MOpTrait::for_each_src_reg_sim(v, f),)* + _ => unreachable!(), + } + } + #[hdl] + fn for_each_src_reg_sim_ref( + input: &SimValue, + f: &mut impl FnMut(&SimValue>, usize), + ) { + #![allow(unreachable_patterns)] + #[hdl(sim)] + match input { + Self::$FirstVariant(v) => MOpTrait::for_each_src_reg_sim_ref(v, f), + $(Self::$Variant(v) => MOpTrait::for_each_src_reg_sim_ref(v, f),)* + _ => unreachable!(), + } + } + #[hdl] + fn for_each_src_reg_sim_mut( + input: &mut SimValue, + f: &mut impl FnMut(&mut SimValue>, usize), + ) { + #![allow(unreachable_patterns)] + #[hdl(sim)] + match input { + Self::$FirstVariant(v) => MOpTrait::for_each_src_reg_sim_mut(v, f), + $(Self::$Variant(v) => MOpTrait::for_each_src_reg_sim_mut(v, f),)* + _ => unreachable!(), + } + } fn mapped_ty( self, new_dest_reg: NewDestReg, @@ -784,6 +1050,33 @@ macro_rules! mop_enum { } mapped_regs } + #[hdl] + fn map_regs_sim( + input: impl ToSimValue, + new_dest: impl ToSimValue, + new_src_reg_width: NewSrcRegWidth::SizeType, + map_src: &mut impl FnMut( + SimValue>, + usize, + ) -> SimValue>, + ) -> SimValue> { + #![allow(unreachable_patterns)] + let input = input.into_sim_value(); + let new_dest = new_dest.into_sim_value(); + let mapped_ty = input.ty().mapped_ty(new_dest.ty(), new_src_reg_width); + #[hdl(sim)] + match input { + Self::$FirstVariant(v) => { + #[hdl(sim)] + mapped_ty.$FirstVariant(MOpTrait::map_regs_sim(v, new_dest, new_src_reg_width, map_src)) + } + $(Self::$Variant(v) => { + #[hdl(sim)] + mapped_ty.$Variant(MOpTrait::map_regs_sim(v, new_dest, new_src_reg_width, map_src)) + })* + _ => unreachable!(), + } + } } }; ( @@ -2595,34 +2888,53 @@ impl MoveRegMOp { } } -#[hdl(cmp_eq)] +#[hdl(cmp_eq, no_static)] /// there may be more than one unit of a given kind, so UnitNum is not the same as UnitKind. /// zero is used for built-in constants, such as the zero register -pub struct UnitNum { - pub adj_value: UIntType, +pub struct UnitNum> { + pub adj_value: UIntType>, + pub config: C, } -impl UnitNum { - #[hdl] +impl UnitNum { pub fn const_zero(self) -> Expr { - #[hdl] - UnitNum { - adj_value: CONST_ZERO_UNIT_NUM.cast_to(self.adj_value), - } + self.const_zero_sim().to_expr() } #[hdl] + pub fn const_zero_sim(self) -> SimValue { + #[hdl(sim)] + UnitNum::<_> { + adj_value: CONST_ZERO_UNIT_NUM.cast_to(self.adj_value), + config: self.config, + } + } pub fn from_index(self, index: usize) -> Expr { - #[hdl] - UnitNum { + self.from_index_sim(index).to_expr() + } + #[hdl] + pub fn from_index_sim(self, index: usize) -> SimValue { + #[hdl(sim)] + UnitNum::<_> { adj_value: (index + 1).cast_to(self.adj_value), + config: self.config, } } pub fn is_index(expr: impl ToExpr, index: usize) -> Expr { let expr = expr.to_expr(); expr.ty().from_index(index).adj_value.cmp_eq(expr.adj_value) } + pub fn index_sim(expr: &SimValue) -> Option { + let adj_value = expr.adj_value.cast_to_static::>().as_int(); + if adj_value == 0 { + None + } else { + Some(adj_value as usize - 1) + } + } #[hdl] - pub fn as_index(expr: impl ToExpr) -> Expr>> { + pub fn as_index( + expr: impl ToExpr, + ) -> Expr>>> { let expr = expr.to_expr(); #[hdl] let unit_index = wire(HdlOption[expr.ty().adj_value]); @@ -2640,19 +2952,20 @@ impl UnitNum { pub const CONST_ZERO_UNIT_NUM: usize = 0; -#[hdl(cmp_eq)] -pub struct UnitOutRegNum { - pub value: UIntType, +#[hdl(cmp_eq, no_static)] +pub struct UnitOutRegNum> { + pub value: UIntType>, + pub config: C, } -#[hdl(cmp_eq)] +#[hdl(cmp_eq, no_static)] /// Physical Register Number -- registers in the CPU's backend -pub struct PRegNum { - pub unit_num: UnitNum, - pub unit_out_reg: UnitOutRegNum, +pub struct PRegNum> { + pub unit_num: UnitNum, + pub unit_out_reg: UnitOutRegNum, } -impl PRegNum { +impl PRegNum { #[hdl] pub fn const_zero(self) -> Expr { #[hdl] @@ -2661,6 +2974,7 @@ impl PRegNum; + +#[hdl] +/// A µOp along with the state needed for this instance of the µOp. +pub struct MOpInstance { + pub fetch_block_id: UInt<8>, + pub id: MOpId, + pub pc: UInt<64>, + /// initialized to 0 by decoder, overwritten by `next_pc()` + pub predicted_next_pc: UInt<64>, + pub size_in_bytes: UInt<4>, + /// `true` if this µOp is the first µOp in the ISA-level instruction. + /// In general, a single µOp can't be cancelled by itself, + /// it needs to be cancelled along with all other µOps that + /// come from the same ISA-level instruction. + pub is_first_mop_in_insn: Bool, + pub mop: MOp, +} + +#[hdl(no_static)] +/// TODO: merge with [`crate::next_pc::PostDecodeOutputInterface`] +pub struct PostDecodeOutputInterface> { + pub insns: ArrayVec, CpuConfigFetchWidth>, + #[hdl(flip)] + pub ready: UIntInRangeInclusiveType, CpuConfigFetchWidth>, + /// tells the rename/execute/retire circuit to cancel all non-retired instructions + pub cancel: ReadyValid<()>, + pub config: C, +} + +#[hdl(no_static)] +/// handles updating speculative branch predictor state (e.g. branch histories) +/// when instructions retire, as well as updating state when a +/// branch instruction is mis-speculated. +pub struct RetireToNextPcInterface> { + pub inner: ReadyValid>, + /// only for debugging + pub next_insns: HdlOption, CpuConfigRobSize>>, +} + +fn zeroed(ty: T) -> SimValue { + SimValue::from_opaque( + ty, + OpaqueSimValue::from_bits(UInt::new(ty.canonical().bit_width()).zero()), + ) +} + +impl SimValueDefault for RenameExecuteRetireDebugState { + fn sim_value_default(self) -> SimValue { + zeroed(self) + } +} + +#[hdl(no_static)] +struct RenameTableDebugState> { + entries: Array, { 1 << MOpRegNum::WIDTH }>, + prev_entries: Array, { 1 << MOpRegNum::WIDTH }>, + free_regs: ArrayType>, CpuConfigUnitCount>, + free_l2_regs: Array, + config: C, +} + +#[derive(Debug)] +struct RenameTable { + entries: Box<[SimValue>; 1 << MOpRegNum::WIDTH]>, + prev_entries: Box<[SimValue>; 1 << MOpRegNum::WIDTH]>, + free_regs: Box<[Box<[bool]>]>, + free_l2_regs: Box<[bool; 1 << MOpRegNum::WIDTH]>, + config: C, +} + +impl Clone for RenameTable { + fn clone(&self) -> Self { + Self { + entries: self.entries.clone(), + prev_entries: self.prev_entries.clone(), + free_regs: self.free_regs.clone(), + free_l2_regs: self.free_l2_regs.clone(), + config: self.config.clone(), + } + } + fn clone_from(&mut self, source: &Self) { + let Self { + entries, + prev_entries, + free_regs, + free_l2_regs, + config, + } = self; + entries.clone_from(&source.entries); + prev_entries.clone_from(&source.prev_entries); + free_regs.clone_from(&source.free_regs); + free_l2_regs.clone_from(&source.free_l2_regs); + *config = source.config; + } +} + +impl RenameTable { + fn new(config: C) -> Self { + let entries: Box<[SimValue>; 1 << MOpRegNum::WIDTH]> = + vec![PRegNum[config].const_zero().into_sim_value(); 1 << MOpRegNum::WIDTH] + .try_into() + .expect("size is known to match"); + let free_regs_for_unit = vec![true; CpuConfig2PowOutRegNumWidth[config]].into_boxed_slice(); + let free_regs = vec![free_regs_for_unit; CpuConfigUnitCount[config]].into_boxed_slice(); + Self { + entries: entries.clone(), + prev_entries: entries, + free_regs, + free_l2_regs: vec![true; 1 << MOpRegNum::WIDTH] + .try_into() + .expect("size is known to match"), + config, + } + } + #[hdl] + fn to_debug_state(&self) -> SimValue> { + let Self { + entries, + prev_entries, + free_regs, + free_l2_regs, + config, + } = self; + let ty = RenameTableDebugState[*config]; + #[hdl(sim)] + RenameTableDebugState::<_> { + entries: entries.to_sim_value_with_type(ty.entries), + prev_entries: prev_entries.to_sim_value_with_type(ty.prev_entries), + free_regs: free_regs.to_sim_value_with_type(ty.free_regs), + free_l2_regs, + config, + } + } +} + +#[hdl(no_static)] +struct RobEntryDebugState> { + mop: MOpInstance, CpuConfigPRegNumWidth>>, + unit_num: UnitNum, + config: C, +} + +impl SimValueDefault for RobEntryDebugState { + fn sim_value_default(self) -> SimValue { + zeroed(self) + } +} + +#[derive(Debug)] +struct RobEntry { + mop: SimValue, CpuConfigPRegNumWidth>>>, + unit_index: usize, + config: C, +} + +#[hdl] +struct OrigMOpQueueEntryDebugState { + mop: MOpInstance, + /// number of renamed µOps that this non-renamed µOp corresponds to + renamed_mop_count: UInt<8>, +} + +#[derive(Debug)] +struct OrigMOpQueueEntry { + mop: SimValue>, + /// number of renamed µOps that this non-renamed µOp corresponds to + renamed_mop_count: u8, +} + +#[hdl(no_static)] +struct UnitDebugState> { + assigned_rob_entries: ArrayVec>, + /// see [`UnitState::started_l2_store`] + started_l2_store: Bool, + config: C, +} + +#[derive(Debug)] +struct UnitState { + assigned_rob_entries: VecDeque>, + /// `true` if a L2 register file write was started for this unit after the last µOp was + /// assigned to this unit. + /// So, if this unit runs out of registers and a L2 register file write is started, this gets + /// set to `true`, and if a new µOp is assigned to this unit, this gets set to `false`. + started_l2_store: bool, + config: C, +} + +impl UnitState { + fn finish_cancel(&mut self) { + let Self { + assigned_rob_entries, + started_l2_store, + config: _, + } = self; + assigned_rob_entries.clear(); + *started_l2_store = false; + } +} + +#[hdl(no_static)] +pub struct RenameExecuteRetireDebugState> { + next_mop_id: MOpId, + rename_delayed: ArrayVec, TwiceCpuConfigFetchWidth>, + next_renamed_mop_count: UInt<8>, + rename_table: RenameTableDebugState, + retire_rename_table: RenameTableDebugState, + rob: ArrayVec, CpuConfigRobSize>, + orig_mop_queue: ArrayVec>, + units: ArrayType, CpuConfigUnitCount>, + canceling: Bool, +} + +#[derive(Debug)] +struct RenameExecuteRetireState { + next_mop_id: SimValue, + rename_delayed: VecDeque>>, + /// count of renamed µOps that have been started that correspond to the next un-renamed µOp in `rename_delayed` + next_renamed_mop_count: u8, + rename_table: RenameTable, + retire_rename_table: RenameTable, + rob: VecDeque>, + orig_mop_queue: VecDeque, + units: Box<[UnitState]>, + canceling: bool, + l2_reg_file_unit_index: usize, + config: C, +} + +impl RenameExecuteRetireState { + fn new(config: C) -> Self { + let rename_table = RenameTable::new(config); + Self { + next_mop_id: MOpId.zero().into_sim_value(), + rename_delayed: VecDeque::with_capacity(TwiceCpuConfigFetchWidth[config]), + next_renamed_mop_count: 0, + rename_table: rename_table.clone(), + retire_rename_table: rename_table, + rob: VecDeque::with_capacity(CpuConfigRobSize[config]), + orig_mop_queue: VecDeque::with_capacity(CpuConfigRobSize[config]), + units: Box::from_iter((0..config.get().units.len()).map(|unit_index| UnitState { + assigned_rob_entries: VecDeque::with_capacity( + config.get().unit_max_in_flight(unit_index).get(), + ), + started_l2_store: false, + config, + })), + canceling: false, + l2_reg_file_unit_index: config + .get() + .units + .iter() + .position(|unit| unit.kind == UnitKind::TransformedMove) + .expect("Unit for L2 register file is missing"), + config, + } + } + #[hdl] + async fn write_for_debug( + &self, + sim: &mut ExternModuleSimulationState, + state_for_debug: Expr>, + ) { + let Self { + ref next_mop_id, + ref rename_delayed, + next_renamed_mop_count, + ref rename_table, + ref retire_rename_table, + ref rob, + ref orig_mop_queue, + ref units, + canceling, + l2_reg_file_unit_index: _, + config, + } = *self; + sim.write( + state_for_debug, + #[hdl(sim)] + RenameExecuteRetireDebugState::<_> { + next_mop_id, + rename_delayed: state_for_debug + .ty() + .rename_delayed + .from_iter_sim(zeroed(StaticType::TYPE), rename_delayed) + .expect("known to fit"), + next_renamed_mop_count, + rename_table: rename_table.to_debug_state(), + retire_rename_table: retire_rename_table.to_debug_state(), + rob: state_for_debug + .ty() + .rob + .from_iter_sim( + zeroed(RobEntryDebugState[config]), + rob.iter().map(|entry| { + let RobEntry { + mop, + unit_index, + config: _, + } = entry; + #[hdl(sim)] + RobEntryDebugState::<_> { + mop, + unit_num: UnitNum[config].from_index_sim(*unit_index), + config, + } + }), + ) + .expect("known to fit"), + orig_mop_queue: state_for_debug + .ty() + .orig_mop_queue + .from_iter_sim( + zeroed(OrigMOpQueueEntryDebugState), + orig_mop_queue.iter().map(|entry| { + let OrigMOpQueueEntry { + mop, + renamed_mop_count, + } = entry; + #[hdl(sim)] + OrigMOpQueueEntryDebugState { + mop, + renamed_mop_count, + } + }), + ) + .expect("known to fit"), + units: SimValue::from_array_elements( + state_for_debug.ty().units, + units.iter().map(|unit| { + let UnitState { + assigned_rob_entries, + started_l2_store, + config: _, + } = unit; + let ty = UnitDebugState[config]; + #[hdl(sim)] + UnitDebugState::<_> { + assigned_rob_entries: ty + .assigned_rob_entries + .from_iter_sim(zeroed(UInt::new_static()), assigned_rob_entries) + .expect("known to fit"), + started_l2_store, + config, + } + }), + ), + canceling, + }, + ) + .await; + } + #[hdl] + async fn write_to_next_pc_next_insns( + &self, + sim: &mut ExternModuleSimulationState, + next_insns: Expr, CpuConfigRobSize>>>, + ) { + sim.write( + next_insns, + if self.canceling { + #[hdl(sim)] + (next_insns.ty()).HdlNone() + } else { + #[hdl(sim)] + (next_insns.ty()).HdlSome( + next_insns + .ty() + .HdlSome + .from_iter_sim( + zeroed(MOpInstance[MOp]), + self.rename_delayed + .iter() + .chain(self.orig_mop_queue.iter().map(|entry| &entry.mop)), + ) + .expect("known to fit"), + ) + }, + ) + .await; + } + fn space_available_for_unit(&self, unit_index: usize) -> usize { + self.config + .get() + .unit_max_in_flight(unit_index) + .get() + .saturating_sub(self.units[unit_index].assigned_rob_entries.len()) + } + //#[hdl] + fn try_rename( + &mut self, + insn: SimValue>, + ) -> Result<(), SimValue>> { + let unit_kind = UnitMOp::kind_sim(&insn.mop); + if let UnitKind::TransformedMove = unit_kind { + todo!("handle reg-reg moves in rename stage"); + } + #[derive(Clone, Copy)] + struct ChosenUnit { + unit_index: usize, + out_reg_num: Option, + space_available: usize, + } + impl ChosenUnit { + fn is_better_than(self, other: Self) -> bool { + let Self { + unit_index: _, + out_reg_num, + space_available, + } = self; + if out_reg_num.is_some() != other.out_reg_num.is_some() { + out_reg_num.is_some() + } else { + space_available > other.space_available + } + } + } + let mut chosen_unit = None; + for (unit_index, unit_state) in self.units.iter().enumerate() { + if self.config.get().units[unit_index].kind != unit_kind { + continue; + } + let cur_unit = ChosenUnit { + unit_index, + out_reg_num: self.rename_table.free_regs[unit_index] + .iter() + .position(|v| *v), + space_available: self.space_available_for_unit(unit_index), + }; + let chosen_unit = chosen_unit.get_or_insert(cur_unit); + if cur_unit.is_better_than(*chosen_unit) { + *chosen_unit = cur_unit; + } + } + let Some(ChosenUnit { + unit_index, + out_reg_num, + space_available, + }) = chosen_unit + else { + panic!( + "there are no units of kind: {unit_kind:?}:\n{:?}", + self.config, + ); + }; + if space_available == 0 { + return Err(insn); + } + let Some(out_reg_num) = out_reg_num else { + if self.units[unit_index].started_l2_store { + if self.space_available_for_unit(self.l2_reg_file_unit_index) > 0 { + todo!("start a L2 register file store"); + } + } + return Err(insn); + }; + todo!() + } + fn get_from_post_decode_ready(&self) -> usize { + if self.canceling { + 0 + } else { + TwiceCpuConfigFetchWidth[self.config] + .saturating_sub(self.rename_delayed.len()) + .min(CpuConfigFetchWidth[self.config]) + } + } + fn handle_from_post_decode(&mut self, insns: &[SimValue>]) { + if insns.is_empty() { + return; + } + assert!(!self.canceling); + for insn in insns { + self.rename_delayed.push_back(insn.clone()); + } + for _ in 0..CpuConfigFetchWidth[self.config] { + let Some(insn) = self.rename_delayed.pop_front() else { + break; + }; + match self.try_rename(insn) { + Ok(()) => {} + Err(insn) => { + self.rename_delayed.push_front(insn); + break; + } + } + } + } + fn finish_cancel(&mut self) { + let Self { + next_mop_id: _, + rename_delayed, + next_renamed_mop_count, + rename_table, + retire_rename_table, + rob, + orig_mop_queue, + units, + canceling, + l2_reg_file_unit_index: _, + config: _, + } = self; + assert!(*canceling); + rename_delayed.clear(); + *next_renamed_mop_count = 0; + rename_table.clone_from(retire_rename_table); + rob.clear(); + orig_mop_queue.clear(); + for unit in units { + unit.finish_cancel(); + } + *canceling = false; + } + fn retire_peek(&self) -> Vec>> { + if self.canceling { + return Vec::new(); + } + // TODO: implement + Vec::new() + } + fn retire_one(&mut self, retire: SimValue>) { + assert!(!self.canceling); + todo!("{retire:#?}"); + } +} + +#[hdl] +async fn rename_execute_retire_run( + mut sim: ExternModuleSimulationState, + cd: Expr, + from_post_decode: Expr>>, + to_next_pc: Expr>>, + state_for_debug: Expr>>, + config: PhantomConst, +) { + let mut state = RenameExecuteRetireState::new(config); + loop { + state + .write_to_next_pc_next_insns(&mut sim, to_next_pc.next_insns) + .await; + state.write_for_debug(&mut sim, state_for_debug).await; + let from_post_decode_ready = state.get_from_post_decode_ready(); + assert!(from_post_decode_ready <= from_post_decode.ty().ready.end()); + sim.write(from_post_decode.ready, from_post_decode_ready) + .await; + sim.write(from_post_decode.cancel.ready, state.canceling) + .await; + let retire_peek = state.retire_peek(); + sim.write( + to_next_pc.inner.data, + if retire_peek.is_empty() { + #[hdl(sim)] + (to_next_pc.ty().inner.data).HdlNone() + } else { + let inner_ty = RetireToNextPcInterfaceInner[config]; + #[hdl(sim)] + (to_next_pc.ty().inner.data).HdlSome( + #[hdl(sim)] + RetireToNextPcInterfaceInner::<_> { + insns: inner_ty + .insns + .from_iter_sim(zeroed(inner_ty.insns.element()), &retire_peek) + .expect("known to fit"), + config, + }, + ) + }, + ) + .await; + sim.wait_for_clock_edge(cd.clk).await; + let from_post_decode_insns = sim.read_past(from_post_decode.insns, cd.clk).await; + let from_post_decode_insns = ArrayVec::elements_sim_ref(&from_post_decode_insns); + state.handle_from_post_decode( + from_post_decode_insns + .get(..from_post_decode_ready) + .unwrap_or(from_post_decode_insns), + ); + if state.canceling { + #[hdl(sim)] + if let HdlSome(_) = sim.read_past(from_post_decode.cancel.data, cd.clk).await { + state.finish_cancel(); + } + } + if sim.read_past_bool(to_next_pc.inner.ready, cd.clk).await { + for retire in retire_peek { + state.retire_one(retire); + } + } + } +} + +#[hdl_module(extern)] +pub fn rename_execute_retire(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let from_post_decode: PostDecodeOutputInterface> = + m.input(PostDecodeOutputInterface[config]); + #[hdl] + let to_next_pc: RetireToNextPcInterface> = + m.output(RetireToNextPcInterface[config]); + #[hdl] + let state_for_debug: RenameExecuteRetireDebugState> = + m.output(RenameExecuteRetireDebugState[config]); + m.register_clock_for_past(cd.clk); + m.extern_module_simulation_fn( + (cd, from_post_decode, to_next_pc, state_for_debug, config), + |(cd, from_post_decode, to_next_pc, state_for_debug, config), mut sim| async move { + sim.write(state_for_debug, state_for_debug.ty().sim_value_default()) + .await; + sim.resettable( + cd, + |mut sim: ExternModuleSimulationState| async move { + sim.write(from_post_decode.ready, 0usize).await; + sim.write(from_post_decode.cancel.ready, false).await; + sim.write(to_next_pc.inner.data, to_next_pc.ty().inner.data.HdlNone()) + .await; + sim.write(to_next_pc.next_insns, to_next_pc.ty().next_insns.HdlNone()) + .await; + }, + |sim, ()| { + rename_execute_retire_run( + sim, + cd, + from_post_decode, + to_next_pc, + state_for_debug, + config, + ) + }, + ) + .await; + }, + ); +} diff --git a/crates/cpu/src/unit.rs b/crates/cpu/src/unit.rs index 400358c..afeb4eb 100644 --- a/crates/cpu/src/unit.rs +++ b/crates/cpu/src/unit.rs @@ -2,7 +2,7 @@ // See Notices.txt for copyright information use crate::{ - config::CpuConfig, + config::{CpuConfig, PhantomConstCpuConfig}, instruction::{ AluBranchMOp, LoadStoreMOp, MOp, MOpDestReg, MOpInto, MOpRegNum, MOpTrait, MOpVariantVisitOps, MOpVariantVisitor, MOpVisitVariants, RenamedMOp, UnitOutRegNum, @@ -32,7 +32,7 @@ macro_rules! all_units { $( $(#[transformed_move $($transformed_move:tt)*])? #[create_dyn_unit_fn = $create_dyn_unit_fn:expr] - #[extract = $extract:ident] + #[extract($extract:ident, $extract_sim:ident, $extract_sim_ref:ident, $extract_sim_mut:ident)] $(#[$variant_meta:meta])* $Unit:ident($Op:ty), )* @@ -48,7 +48,7 @@ macro_rules! all_units { } impl $UnitKind { - pub fn unit(self, config: &CpuConfig, unit_index: usize) -> DynUnit { + pub fn unit(self, config: PhantomConst, unit_index: usize) -> DynUnit { match self { $($UnitKind::$Unit => $create_dyn_unit_fn(config, unit_index),)* } @@ -112,6 +112,15 @@ macro_rules! all_units { } unit_kind } + #[hdl] + $vis fn kind_sim(expr: &SimValue) -> UnitKind { + #![allow(unreachable_patterns)] + #[hdl(sim)] + match expr { + $(Self::$Unit(_) => $UnitKind::$Unit,)* + _ => unreachable!(), + } + } $( #[hdl] $vis fn $extract(expr: impl ToExpr) -> Expr> { @@ -126,6 +135,34 @@ macro_rules! all_units { } $extract } + #[hdl] + $vis fn $extract_sim(expr: impl ToSimValue) -> Option> { + let expr = expr.into_sim_value(); + #[hdl(sim)] + if let Self::$Unit(v) = expr { + Some(v) + } else { + None + } + } + #[hdl] + $vis fn $extract_sim_ref(expr: &SimValue) -> Option<&SimValue<$Op>> { + #[hdl(sim)] + if let Self::$Unit(v) = expr { + Some(v) + } else { + None + } + } + #[hdl] + $vis fn $extract_sim_mut(expr: &mut SimValue) -> Option<&mut SimValue<$Op>> { + #[hdl(sim)] + if let Self::$Unit(v) = expr { + Some(v) + } else { + None + } + } )* $vis fn with_transformed_move_op_ty(self, new_transformed_move_op_ty: T) -> $UnitMOpEnum<$DestReg, $SrcRegWidth, T> where @@ -254,14 +291,14 @@ all_units! { })] TransformedMoveOp: Type > { #[create_dyn_unit_fn = |config, unit_index| alu_branch::AluBranch::new(config, unit_index).to_dyn()] - #[extract = alu_branch_mop] + #[extract(alu_branch_mop, alu_branch_mop_sim, alu_branch_mop_sim_ref, alu_branch_mop_sim_mut)] AluBranch(AluBranchMOp), #[transformed_move] #[create_dyn_unit_fn = |config, unit_index| todo!()] - #[extract = transformed_move_mop] + #[extract(transformed_move_mop, transformed_move_mop_sim, transformed_move_mop_sim_ref, transformed_move_mop_sim_mut)] TransformedMove(TransformedMoveOp), #[create_dyn_unit_fn = |config, unit_index| todo!()] - #[extract = load_store_mop] + #[extract(load_store_mop, load_store_mop_sim, load_store_mop_sim_ref, load_store_mop_sim_mut)] LoadStore(LoadStoreMOp), } } @@ -277,9 +314,9 @@ pub struct UnitResultCompleted { pub extra_out: ExtraOut, } -#[hdl(cmp_eq)] -pub struct UnitOutputWrite { - pub which: UnitOutRegNum, +#[hdl(cmp_eq, no_static)] +pub struct UnitOutputWrite> { + pub which: UnitOutRegNum, pub value: PRegValue, } @@ -300,21 +337,21 @@ impl UnitResult { } } -#[hdl] -pub struct UnitOutput { - pub which: UnitOutRegNum, +#[hdl(no_static)] +pub struct UnitOutput, ExtraOut> { + pub which: UnitOutRegNum, pub result: UnitResult, } -impl UnitOutput { +impl UnitOutput { pub fn extra_out_ty(self) -> ExtraOut { self.result.extra_out_ty() } } -#[hdl(cmp_eq)] -pub struct UnitCancelInput { - pub which: UnitOutRegNum, +#[hdl(cmp_eq, no_static)] +pub struct UnitCancelInput> { + pub which: UnitOutRegNum, } pub trait UnitTrait: @@ -332,7 +369,7 @@ pub trait UnitTrait: fn extract_mop( &self, - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, ) -> Expr>; fn module(&self) -> Interned>; @@ -340,7 +377,7 @@ pub trait UnitTrait: fn unit_to_reg_alloc( &self, this: Expr, - ) -> Expr>; + ) -> Expr, Self::MOp, Self::ExtraOut>>; fn cd(&self, this: Expr) -> Expr; @@ -390,7 +427,7 @@ impl UnitTrait for DynUnit { fn extract_mop( &self, - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, ) -> Expr> { self.unit.extract_mop(mop) } @@ -402,7 +439,7 @@ impl UnitTrait for DynUnit { fn unit_to_reg_alloc( &self, this: Expr, - ) -> Expr> { + ) -> Expr, Self::MOp, Self::ExtraOut>> { self.unit.unit_to_reg_alloc(this) } @@ -445,7 +482,7 @@ impl UnitTrait for DynUnitWrapper, DynSize>>, + mop: Expr>, DynSize>>, ) -> Expr> { Expr::from_enum(Expr::as_enum(self.0.extract_mop(mop))) } @@ -457,7 +494,7 @@ impl UnitTrait for DynUnitWrapper, - ) -> Expr> { + ) -> Expr, Self::MOp, Self::ExtraOut>> { Expr::from_bundle(Expr::as_bundle( self.0.unit_to_reg_alloc(Expr::from_bundle(this)), )) diff --git a/crates/cpu/src/unit/alu_branch.rs b/crates/cpu/src/unit/alu_branch.rs index 6815ae6..a86b43b 100644 --- a/crates/cpu/src/unit/alu_branch.rs +++ b/crates/cpu/src/unit/alu_branch.rs @@ -19,16 +19,13 @@ use crate::{ }, }; use fayalite::{ - intern::{Intern, Interned}, - module::wire_with_loc, - prelude::*, - util::ready_valid::ReadyValid, + intern::Interned, module::wire_with_loc, prelude::*, util::ready_valid::ReadyValid, }; use std::{collections::HashMap, ops::RangeTo}; #[hdl] fn add_sub( - mop: Expr, DynSize, SrcCount>>, + mop: Expr>, DynSize, SrcCount>>, pc: Expr>, flags_mode: Expr, src_values: Expr>, @@ -245,7 +242,7 @@ fn add_sub( #[hdl] fn logical_flags( - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -259,7 +256,7 @@ fn logical_flags( #[hdl] fn logical( - mop: Expr, DynSize, ConstUsize<2>>>, + mop: Expr>, DynSize, ConstUsize<2>>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -273,7 +270,7 @@ fn logical( #[hdl] fn logical_i( - mop: Expr, DynSize, ConstUsize<1>>>, + mop: Expr>, DynSize, ConstUsize<1>>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -287,7 +284,7 @@ fn logical_i( #[hdl] fn shift_rotate( - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -301,7 +298,7 @@ fn shift_rotate( #[hdl] fn compare( - mop: Expr, DynSize, SrcCount>>, + mop: Expr>, DynSize, SrcCount>>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -315,7 +312,7 @@ fn compare( #[hdl] fn branch( - mop: Expr, DynSize, SrcCount>>, + mop: Expr>, DynSize, SrcCount>>, pc: Expr>, flags_mode: Expr, src_values: Expr>, @@ -330,7 +327,7 @@ fn branch( #[hdl] fn read_special( - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, pc: Expr>, flags_mode: Expr, src_values: Expr>, @@ -344,20 +341,18 @@ fn read_special( } #[hdl_module] -pub fn alu_branch(config: &CpuConfig, unit_index: usize) { +pub fn alu_branch(config: PhantomConst, unit_index: usize) { #[hdl] let cd: ClockDomain = m.input(); #[hdl] let unit_to_reg_alloc: UnitToRegAlloc< - AluBranchMOp, DynSize>, + PhantomConst, + AluBranchMOp>, DynSize>, (), - DynSize, - DynSize, - DynSize, - > = m.output(config.unit_to_reg_alloc( - AluBranchMOp[config.unit_out_reg_num()][config.p_reg_num_width()], - (), - )); + > = m.output( + UnitToRegAlloc[config][AluBranchMOp[UnitOutRegNum[config]][config.get().p_reg_num_width()]] + [()], + ); #[hdl] let global_state: GlobalState = m.input(); @@ -375,10 +370,11 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { #[hdl] if let HdlSome(execute_start) = ReadyValid::firing_data(unit_base.execute_start) { #[hdl] - let ExecuteStart::<_> { + let ExecuteStart::<_, _> { mop, pc, src_values, + config: _, } = execute_start; #[hdl] match mop { @@ -580,14 +576,14 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct AluBranch { - config: Interned, + config: PhantomConst, module: Interned>, } impl AluBranch { - pub fn new(config: &CpuConfig, unit_index: usize) -> Self { + pub fn new(config: PhantomConst, unit_index: usize) -> Self { Self { - config: config.intern(), + config, module: alu_branch(config, unit_index), } } @@ -596,7 +592,7 @@ impl AluBranch { impl UnitTrait for AluBranch { type Type = alu_branch; type ExtraOut = (); - type MOp = AluBranchMOp, DynSize>; + type MOp = AluBranchMOp>, DynSize>; fn ty(&self) -> Self::Type { self.module.io_ty() @@ -616,7 +612,7 @@ impl UnitTrait for AluBranch { fn extract_mop( &self, - mop: Expr, DynSize>>, + mop: Expr>, DynSize>>, ) -> Expr> { UnitMOp::alu_branch_mop(mop) } @@ -628,7 +624,7 @@ impl UnitTrait for AluBranch { fn unit_to_reg_alloc( &self, this: Expr, - ) -> Expr> { + ) -> Expr, Self::MOp, Self::ExtraOut>> { this.unit_to_reg_alloc } diff --git a/crates/cpu/src/unit/unit_base.rs b/crates/cpu/src/unit/unit_base.rs index 4e665e0..cc7c754 100644 --- a/crates/cpu/src/unit/unit_base.rs +++ b/crates/cpu/src/unit/unit_base.rs @@ -2,7 +2,7 @@ // See Notices.txt for copyright information use crate::{ - config::CpuConfig, + config::{CpuConfig, CpuConfigUnitCount, PhantomConstCpuConfig}, instruction::{COMMON_MOP_SRC_LEN, MOpTrait, PRegNum, UnitNum, UnitOutRegNum}, register::PRegValue, unit::{UnitCancelInput, UnitOutput, UnitOutputWrite}, @@ -15,13 +15,11 @@ use fayalite::{ ty::StaticType, util::ready_valid::ReadyValid, }; -use std::marker::PhantomData; -#[hdl] -pub struct UnitForwardingInfo { - pub unit_output_writes: ArrayType>, UnitCount>, - pub unit_reg_frees: ArrayType>, UnitCount>, - pub _phantom: PhantomData, +#[hdl(no_static)] +pub struct UnitForwardingInfo> { + pub unit_output_writes: ArrayType>, CpuConfigUnitCount>, + pub unit_reg_frees: ArrayType>, CpuConfigUnitCount>, } #[hdl] @@ -30,26 +28,18 @@ pub struct UnitInput { pub pc: UInt<64>, } -#[hdl] -pub struct UnitToRegAlloc< - MOp: Type, - ExtraOut: Type, - UnitNumWidth: Size, - OutRegNumWidth: Size, - UnitCount: Size, -> { +#[hdl(no_static)] +pub struct UnitToRegAlloc, MOp: Type, ExtraOut: Type> { #[hdl(flip)] - pub unit_forwarding_info: UnitForwardingInfo, + pub unit_forwarding_info: UnitForwardingInfo, #[hdl(flip)] pub input: ReadyValid>, #[hdl(flip)] - pub cancel_input: HdlOption>, - pub output: HdlOption>, + pub cancel_input: HdlOption>, + pub output: HdlOption>, } -impl - UnitToRegAlloc -{ +impl UnitToRegAlloc { pub fn mop_ty(self) -> MOp { self.input.data.HdlSome.mop } @@ -58,16 +48,20 @@ impl>> { +#[hdl(no_static)] +pub struct ExecuteStart< + C: PhantomConstGet, + MOp: Type + MOpTrait>, +> { pub mop: MOp, pub pc: UInt<64>, pub src_values: Array, + pub config: C, } -#[hdl] -pub struct ExecuteEnd { - pub unit_output: UnitOutput, +#[hdl(no_static)] +pub struct ExecuteEnd, ExtraOut> { + pub unit_output: UnitOutput, } #[hdl] @@ -240,10 +234,10 @@ impl InFlightOpsSummary { #[hdl_module] pub fn unit_base< - MOp: Type + MOpTrait, SrcRegWidth = DynSize>, + MOp: Type + MOpTrait>, SrcRegWidth = DynSize>, ExtraOut: Type, >( - config: &CpuConfig, + config: PhantomConst, unit_index: usize, mop_ty: MOp, extra_out_ty: ExtraOut, @@ -251,17 +245,18 @@ pub fn unit_base< #[hdl] let cd: ClockDomain = m.input(); #[hdl] - let unit_to_reg_alloc: UnitToRegAlloc = - m.output(config.unit_to_reg_alloc(mop_ty, extra_out_ty)); + let unit_to_reg_alloc: UnitToRegAlloc, MOp, ExtraOut> = + m.output(UnitToRegAlloc[config][mop_ty][extra_out_ty]); #[hdl] - let execute_start: ReadyValid> = m.output(ReadyValid[ExecuteStart[mop_ty]]); + let execute_start: ReadyValid, MOp>> = + m.output(ReadyValid[ExecuteStart[config][mop_ty]]); #[hdl] - let execute_end: HdlOption> = - m.input(HdlOption[ExecuteEnd[config.out_reg_num_width][extra_out_ty]]); + let execute_end: HdlOption, ExtraOut>> = + m.input(HdlOption[ExecuteEnd[config][extra_out_ty]]); connect(execute_start.data, execute_start.ty().data.HdlNone()); - let max_in_flight = config.unit_max_in_flight(unit_index).get(); + let max_in_flight = config.get().unit_max_in_flight(unit_index).get(); let in_flight_op_ty = InFlightOp[mop_ty]; #[hdl] let in_flight_ops = reg_builder() @@ -279,16 +274,15 @@ pub fn unit_base< ); #[hdl] - let UnitForwardingInfo::<_, _, _> { + let UnitForwardingInfo::<_> { unit_output_writes, unit_reg_frees, - _phantom: _, } = unit_to_reg_alloc.unit_forwarding_info; #[hdl] let read_src_regs = wire(mop_ty.src_regs_ty()); connect( read_src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); #[hdl] let read_src_values = wire(); @@ -297,7 +291,7 @@ pub fn unit_base< let input_src_regs = wire(mop_ty.src_regs_ty()); connect( input_src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); #[hdl] let input_src_regs_valid = wire(); @@ -309,7 +303,7 @@ pub fn unit_base< Bool, SourceLocation::caller(), ); - mem.depth(1 << config.out_reg_num_width); + mem.depth(1 << config.get().out_reg_num_width); mem }) .collect(); @@ -319,11 +313,11 @@ pub fn unit_base< PRegValue, SourceLocation::caller(), ); - unit_output_regs.depth(1 << config.out_reg_num_width); + unit_output_regs.depth(1 << config.get().out_reg_num_width); for src_index in 0..COMMON_MOP_SRC_LEN { let read_port = unit_output_regs.new_read_port(); - let p_reg_num = read_src_regs[src_index].cast_bits_to(config.p_reg_num()); + let p_reg_num = read_src_regs[src_index].cast_bits_to(PRegNum[config]); connect_any(read_port.addr, p_reg_num.unit_out_reg.value); connect(read_port.en, false); connect(read_port.clk, cd.clk); @@ -336,7 +330,7 @@ pub fn unit_base< for src_index in 0..COMMON_MOP_SRC_LEN { let read_port = unit_output_regs_valid[unit_index].new_read_port(); - let p_reg_num = input_src_regs[src_index].cast_bits_to(config.p_reg_num()); + let p_reg_num = input_src_regs[src_index].cast_bits_to(PRegNum[config]); connect_any(read_port.addr, p_reg_num.unit_out_reg.value); connect(read_port.en, false); connect(read_port.clk, cd.clk); @@ -367,8 +361,8 @@ pub fn unit_base< connect_any(ready_write_port.addr, unit_output_write.which.value); connect(ready_write_port.en, true); let p_reg_num = #[hdl] - PRegNum::<_, _> { - unit_num: config.unit_num().from_index(unit_index), + PRegNum::<_> { + unit_num: UnitNum[config].from_index(unit_index), unit_out_reg: unit_output_write.which, }; for src_index in 0..COMMON_MOP_SRC_LEN { @@ -399,10 +393,11 @@ pub fn unit_base< execute_start.data, HdlSome( #[hdl] - ExecuteStart::<_> { + ExecuteStart::<_, _> { mop: in_flight_op.mop, pc: in_flight_op.pc, src_values: read_src_values, + config, }, ), ); @@ -425,7 +420,7 @@ pub fn unit_base< let input_mop_src_regs = wire(mop_ty.src_regs_ty()); connect( input_mop_src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); MOp::connect_src_regs(mop, input_mop_src_regs); let src_ready_flags = wire_with_loc( @@ -497,7 +492,7 @@ pub fn unit_base< ); connect( src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); MOp::connect_src_regs(mop, src_regs); @@ -521,8 +516,8 @@ pub fn unit_base< value: _, } = unit_output_write; let p_reg_num = #[hdl] - PRegNum::<_, _> { - unit_num: config.unit_num().from_index(unit_index), + PRegNum::<_> { + unit_num: UnitNum[config].from_index(unit_index), unit_out_reg, }; for src_index in 0..COMMON_MOP_SRC_LEN { diff --git a/crates/cpu/tests/reg_alloc.rs b/crates/cpu/tests/reg_alloc.rs index 105c2a6..8294087 100644 --- a/crates/cpu/tests/reg_alloc.rs +++ b/crates/cpu/tests/reg_alloc.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information +#![cfg(todo)] use cpu::{ config::{CpuConfig, UnitConfig},