From c87a1b8e1e6bb3a27358b6d3516de584da730bd4 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Thu, 11 Dec 2025 00:31:00 -0800 Subject: [PATCH] wrote out all of next_pc and tests/next_pc --- crates/cpu/src/config.rs | 12 + crates/cpu/src/next_pc.rs | 2448 +++++++++++++++++++++--------- crates/cpu/src/util/array_vec.rs | 60 + crates/cpu/tests/next_pc.rs | 54 +- 4 files changed, 1871 insertions(+), 703 deletions(-) diff --git a/crates/cpu/src/config.rs b/crates/cpu/src/config.rs index a7dd7d9..cf2fd08 100644 --- a/crates/cpu/src/config.rs +++ b/crates/cpu/src/config.rs @@ -163,3 +163,15 @@ pub type CpuConfigFetchWidthInBytes> = DynSize; #[hdl(get(|c| c.rob_size.get()))] pub type CpuConfigRobSize> = DynSize; + +pub trait PhantomConstCpuConfig: + PhantomConstGet + + Into> + + From> + + Type + + ToSimValue + + ToExpr +{ +} + +impl PhantomConstCpuConfig for PhantomConst {} diff --git a/crates/cpu/src/next_pc.rs b/crates/cpu/src/next_pc.rs index 45c3db5..b0f1478 100644 --- a/crates/cpu/src/next_pc.rs +++ b/crates/cpu/src/next_pc.rs @@ -14,7 +14,7 @@ use crate::{ config::{ CpuConfig, CpuConfigFetchWidth, CpuConfigMaxFetchesInFlight, CpuConfigRobSize, - TwiceCpuConfigFetchWidth, + PhantomConstCpuConfig, TwiceCpuConfigFetchWidth, }, util::array_vec::ArrayVec, }; @@ -40,6 +40,7 @@ pub struct NextPcToFetchInterfaceInner { #[hdl(no_static)] pub struct NextPcToFetchInterface> { pub fetch: ReadyValid, + /// when both fetch and cancel are triggered in the same clock cycle, that means to cancel and then start a new fetch pub cancel: ReadyValid, CpuConfigMaxFetchesInFlight>>, pub config: C, } @@ -115,7 +116,7 @@ pub struct RetireToNextPcInterfacePerInsn> { pub config: C, } -impl SimValueDefault for RetireToNextPcInterfacePerInsn> { +impl SimValueDefault for RetireToNextPcInterfacePerInsn { #[hdl] fn sim_value_default(self) -> SimValue { let Self { @@ -159,7 +160,7 @@ pub struct DecodeToPostDecodeInterfaceInner> { pub config: C, } -impl SimValueDefault for DecodeToPostDecodeInterfaceInner> { +impl SimValueDefault for DecodeToPostDecodeInterfaceInner { #[hdl] fn sim_value_default(self) -> SimValue { let Self { insns, config } = self; @@ -204,150 +205,488 @@ struct Cancel> { } /// the output of [`Stage::run`]. -/// when cancelling operations, the returned [`StageOutput.cancel`] should be the state after -/// running all operations returned in [`StageOutput.output`]. +/// when cancelling operations, the returned [`StageRunOutput.cancel`] should be the state after +/// running all operations returned in [`StageRunOutput.output`]. #[hdl(no_static)] -struct StageOutput> { - outputs: ArrayVec, +struct StageRunOutput + PhantomConstCpuConfig, S: Type + Stage> { + outputs: ArrayVec, StageMaxOutputsPerStep>, /// when set to [`HdlSome`], [`Stage::cancel`] is called on all previous stages cancel: HdlOption>, } -trait Stage: Type + SimValueDefault + ResetSteps { - type Input: Type; +trait Stages: Type { + type Outputs: Type + SimValueDefault; + type SimValueOutputQueueRefs<'a>: 'a + Copy; + type SimValueOutputQueueMuts<'a>: 'a; + fn outputs_ty(config: C) -> Self::Outputs; + fn reborrow_output_queues_as_refs<'a>( + output_queues: &'a Self::SimValueOutputQueueMuts<'_>, + ) -> Self::SimValueOutputQueueRefs<'a>; + fn reborrow_output_queue_muts<'a>( + output_queues: &'a mut Self::SimValueOutputQueueMuts<'_>, + ) -> Self::SimValueOutputQueueMuts<'a>; + fn peek_output_queues( + output_queues: Self::SimValueOutputQueueRefs<'_>, + max_peek_len: usize, + ) -> impl Iterator>; + /// pops all output queues; if this function returns `None`, none of the output queues will be modified + fn pop_output_queues( + output_queues: Self::SimValueOutputQueueMuts<'_>, + ) -> Option>; + fn visit_sim_value_ref>(this: &SimValue, visitor: &mut V); +} + +impl Stages for () { + type Outputs = (); + type SimValueOutputQueueRefs<'a> = (); + type SimValueOutputQueueMuts<'a> = (); + fn outputs_ty(_config: C) -> Self::Outputs { + () + } + fn reborrow_output_queues_as_refs<'a>( + output_queues: &'a Self::SimValueOutputQueueMuts<'_>, + ) -> Self::SimValueOutputQueueRefs<'a> { + let () = output_queues; + () + } + fn reborrow_output_queue_muts<'a>( + output_queues: &'a mut Self::SimValueOutputQueueMuts<'_>, + ) -> Self::SimValueOutputQueueMuts<'a> { + let () = output_queues; + () + } + #[hdl] + fn peek_output_queues( + output_queues: Self::SimValueOutputQueueRefs<'_>, + max_peek_len: usize, + ) -> impl Iterator> { + let () = output_queues; + std::iter::repeat_n( + #[hdl(sim)] + (), + max_peek_len, + ) + } + #[hdl] + fn pop_output_queues( + output_queues: Self::SimValueOutputQueueMuts<'_>, + ) -> Option> { + let () = output_queues; + Some( + #[hdl(sim)] + (), + ) + } + #[hdl] + fn visit_sim_value_ref>(this: &SimValue, _visitor: &mut V) { + #[hdl(sim)] + let () = this; + } +} + +impl> Stages for S1 { + type Outputs = S1::Output; + type SimValueOutputQueueRefs<'a> = + &'a SimValue, StageOutputQueueSize>>; + type SimValueOutputQueueMuts<'a> = + &'a mut SimValue, StageOutputQueueSize>>; + fn outputs_ty(config: C) -> Self::Outputs { + S1::output_ty(config) + } + fn reborrow_output_queues_as_refs<'a>( + output_queues: &'a Self::SimValueOutputQueueMuts<'_>, + ) -> Self::SimValueOutputQueueRefs<'a> { + output_queues + } + fn reborrow_output_queue_muts<'a>( + output_queues: &'a mut Self::SimValueOutputQueueMuts<'_>, + ) -> Self::SimValueOutputQueueMuts<'a> { + output_queues + } + fn peek_output_queues( + output_queues: Self::SimValueOutputQueueRefs<'_>, + max_peek_len: usize, + ) -> impl Iterator> { + Queue::peek_iter(output_queues).take(max_peek_len) + } + fn pop_output_queues( + output_queues: Self::SimValueOutputQueueMuts<'_>, + ) -> Option> { + Queue::pop(output_queues) + } + fn visit_sim_value_ref>(this: &SimValue, visitor: &mut V) { + visitor.visit(this); + } +} + +impl, S2: Stage> Stages for (S1, S2) { + type Outputs = (S1::Output, S2::Output); + type SimValueOutputQueueRefs<'a> = ( + &'a SimValue, StageOutputQueueSize>>, + &'a SimValue, StageOutputQueueSize>>, + ); + type SimValueOutputQueueMuts<'a> = ( + &'a mut SimValue, StageOutputQueueSize>>, + &'a mut SimValue, StageOutputQueueSize>>, + ); + fn outputs_ty(config: C) -> Self::Outputs { + (S1::output_ty(config), S2::output_ty(config)) + } + fn reborrow_output_queues_as_refs<'a>( + output_queues: &'a Self::SimValueOutputQueueMuts<'_>, + ) -> Self::SimValueOutputQueueRefs<'a> { + let (s1, s2) = output_queues; + (s1, s2) + } + fn reborrow_output_queue_muts<'a>( + output_queues: &'a mut Self::SimValueOutputQueueMuts<'_>, + ) -> Self::SimValueOutputQueueMuts<'a> { + let (s1, s2) = output_queues; + (s1, s2) + } + fn peek_output_queues( + output_queues: Self::SimValueOutputQueueRefs<'_>, + max_peek_len: usize, + ) -> impl Iterator> { + let (s1, s2) = output_queues; + Queue::peek_iter(s1) + .zip(Queue::peek_iter(s2)) + .take(max_peek_len) + .map(ToSimValue::into_sim_value) + } + #[hdl] + fn pop_output_queues( + output_queues: Self::SimValueOutputQueueMuts<'_>, + ) -> Option> { + let (s1, s2) = output_queues; + // make sure to only pop if all pops will succeed + if Queue::is_empty(s1) || Queue::is_empty(s2) { + None + } else { + Some( + #[hdl(sim)] + ( + Queue::pop(s1).expect("just checked"), + Queue::pop(s2).expect("just checked"), + ), + ) + } + } + #[hdl] + fn visit_sim_value_ref>(this: &SimValue, visitor: &mut V) { + #[hdl(sim)] + let (s1, s2) = this; + visitor.visit(s1); + visitor.visit(s2); + } +} + +trait StagesVisitSimValueRef { + fn visit>(&mut self, stage: &SimValue); +} + +/// `Self` is either `()`` or the sibling stage of `Sibling`. +/// Sibling stages must have `ExternalPipeIoWidth = ConstUsize<1>` to ensure they can accept input in lock-step +trait SiblingStageOrUnit>: Type { + /// Self if Self: Stage otherwise some arbitrary stage + type StageOrSomething: Stage; + type SimValueStageWithQueues; + type SimValueStageWithQueuesInputs; + type SimValueCancel; + type Cancel: Type; + const IS_STAGE: bool; + /// return Some(v) if Self: Stage, otherwise return None + fn sim_value_stage_with_queues_opt( + v: &Self::SimValueStageWithQueues, + ) -> Option<&SimValue>>; + /// return Some(v) if Self: Stage, otherwise return None + fn sim_value_stage_with_queues_inputs_opt( + v: &Self::SimValueStageWithQueuesInputs, + ) -> Option<&SimValue>>; + fn cancel_ty(self, config: C) -> Self::Cancel; + fn make_sim_value_cancel( + v: Option>>, + ) -> Self::SimValueCancel; +} + +impl> SiblingStageOrUnit + for () +{ + type StageOrSomething = Sibling; + type SimValueStageWithQueues = (); + type SimValueStageWithQueuesInputs = (); + type SimValueCancel = (); + type Cancel = (); + const IS_STAGE: bool = false; + fn sim_value_stage_with_queues_opt( + _v: &Self::SimValueStageWithQueues, + ) -> Option<&SimValue>> { + None + } + fn sim_value_stage_with_queues_inputs_opt( + _v: &Self::SimValueStageWithQueuesInputs, + ) -> Option<&SimValue>> { + None + } + fn cancel_ty(self, _config: C) -> Self::Cancel { + () + } + fn make_sim_value_cancel( + v: Option>>, + ) -> Self::SimValueCancel { + assert!(v.is_none()); + () + } +} + +impl SiblingStageOrUnit for S +where + C: PhantomConstCpuConfig, + S: Stage>, + S::SiblingStage: Stage, SiblingStage = S>, +{ + type StageOrSomething = Self; + type SimValueStageWithQueues = SimValue>; + type SimValueStageWithQueuesInputs = SimValue>; + type SimValueCancel = SimValue>; + type Cancel = CancelInProgressForStageWithQueues; + const IS_STAGE: bool = { + assert!( + S::HAS_EXTERNAL_PIPE != S::SiblingStage::HAS_EXTERNAL_PIPE, + "only one sibling in a pair can have an external pipe" + ); + true + }; + fn sim_value_stage_with_queues_opt( + v: &Self::SimValueStageWithQueues, + ) -> Option<&SimValue>> { + // evaluate assert in IS_STAGE + let _ = Self::IS_STAGE; + Some(v) + } + fn sim_value_stage_with_queues_inputs_opt( + v: &Self::SimValueStageWithQueuesInputs, + ) -> Option<&SimValue>> { + // evaluate assert in IS_STAGE + let _ = Self::IS_STAGE; + Some(v) + } + fn cancel_ty(self, config: C) -> Self::Cancel { + // evaluate assert in IS_STAGE + let _ = Self::IS_STAGE; + CancelInProgressForStageWithQueues[config][self] + } + fn make_sim_value_cancel( + v: Option>>, + ) -> Self::SimValueCancel { + // evaluate assert in IS_STAGE + let _ = Self::IS_STAGE; + let Some(v) = v else { + panic!("expected Some"); + }; + v + } +} + +trait Stage: Type + SimValueDefault + ResetSteps { + type InputStages: Stages; + type SiblingStage: SiblingStageOrUnit; type Inputs: Type; - type Output: Type; + type Output: Type + SimValueDefault; + type ToExternalPipeInputInterface: Type; + type FromExternalPipeOutputInterface: Type; + type FromExternalPipeOutputItem: Type; type MaxOutputsPerStep: Size; + type ExternalPipeIoWidth: Size; type InputQueueSize: Size; type OutputQueueSize: Size; + const HAS_EXTERNAL_PIPE: bool; - fn input_ty(config: PhantomConst) -> Self::Input; - fn inputs_ty(config: PhantomConst) -> Self::Inputs; - fn output_ty(config: PhantomConst) -> Self::Output; - fn max_outputs_per_step( - config: PhantomConst, - ) -> ::SizeType; - fn input_queue_size( - config: PhantomConst, - ) -> ::SizeType; - fn output_queue_size( - config: PhantomConst, - ) -> ::SizeType; - fn stage_output_ty( - config: PhantomConst, - ) -> StageOutput> { - StageOutput[Self::output_ty(config)][Self::max_outputs_per_step(config)][config] - } - /// see [`StageOutput`] for docs on output + fn inputs_ty(config: C) -> Self::Inputs; + fn output_ty(config: C) -> Self::Output; + fn to_external_pipe_input_interface_ty(config: C) -> Self::ToExternalPipeInputInterface; + fn from_external_pipe_output_interface_ty(config: C) -> Self::FromExternalPipeOutputInterface; + fn from_external_pipe_output_item_ty(config: C) -> Self::FromExternalPipeOutputItem; + + fn max_outputs_per_step(config: C) -> ::SizeType; + fn external_pipe_io_width(config: C) -> ::SizeType; + fn input_queue_size(config: C) -> ::SizeType; + fn output_queue_size(config: C) -> ::SizeType; + + fn cancel_in_progress_for_stage_ref( + cancel: &SimValue>, + ) -> &SimValue>; + fn cancel_in_progress_for_stage_mut( + cancel: &mut SimValue>, + ) -> &mut SimValue>; + + fn make_inputs( + input_stages_outputs: &SimValue>, + from_external_pipe_output_item: &SimValue, + ) -> SimValue; + + /// see [`StageRunOutput`] for docs on output fn run( state: &mut SimValue, inputs: &SimValue, - ) -> SimValue>>; + ) -> SimValue>; /// changes state to match `cancel` - fn cancel(state: &mut SimValue, cancel: &SimValue>>); + fn cancel(state: &mut SimValue, cancel: &SimValue>); } macro_rules! hdl_type_alias_with_generics { ( #[without_generics = $WithoutGenerics:ident, $OneGeneric:ident] #[ty = $ty:expr] - $vis:vis type $Type:ident<$Arg:ident: $Trait:ident, $C:ident: PhantomConstGet> = $Target:ty; + $vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty; ) => { - $vis type $Type<$Arg, $C> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<$C>>::Type; + $vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<$C>>::Type; $vis struct $WithoutGenerics {} #[allow(non_upper_case_globals)] $vis const $Type: $WithoutGenerics = $WithoutGenerics {}; - #[derive(Clone, PartialEq, Eq, Hash, Debug)] - $vis struct $OneGeneric<$Arg: $Trait>($Arg); + const _: () = { + #[derive(Clone, PartialEq, Eq, Hash, Debug)] + $vis struct $OneGeneric<$C: $PhantomConstCpuConfig>($C); - impl<$Arg: $Trait> std::ops::Index<$Arg> for $WithoutGenerics { - type Output = $OneGeneric<$Arg>; + impl<$C: $PhantomConstCpuConfig> std::ops::Index<$C> for $WithoutGenerics { + type Output = $OneGeneric<$C>; - fn index(&self, arg: $Arg) -> &Self::Output { - fayalite::intern::Interned::into_inner(fayalite::intern::Intern::intern_sized($OneGeneric(arg))) + fn index(&self, config: $C) -> &Self::Output { + fayalite::intern::Interned::into_inner(fayalite::intern::Intern::intern_sized($OneGeneric(config))) + } } - } - impl<$Arg: $Trait, $C: PhantomConstGet> std::ops::Index<$C> for $OneGeneric<$Arg> { - type Output = $Type<$Arg, $C>; + impl<$C: $PhantomConstCpuConfig, $Arg: $Trait<$TraitC>> std::ops::Index<$Arg> for $OneGeneric<$C> { + type Output = $Type<$C, $Arg>; - fn index(&self, config: $C) -> &Self::Output { - fayalite::intern::Interned::into_inner(fayalite::intern::Intern::intern_sized($ty(self.0, config))) + fn index(&self, arg: $Arg) -> &Self::Output { + fayalite::intern::Interned::into_inner(fayalite::intern::Intern::intern_sized($ty(self.0, arg))) + } } - } + }; }; ( #[without_generics = $WithoutGenerics:ident, $OneGeneric:ident] #[size = $size:expr] - $vis:vis type $Type:ident<$Arg:ident: $Trait:ident, $C:ident: PhantomConstGet> = $Target:ty; + $vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty; ) => { - $vis type $Type<$Arg, $C> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<$C>>::Type; + $vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<$C>>::Type; $vis struct $WithoutGenerics {} #[allow(non_upper_case_globals)] $vis const $Type: $WithoutGenerics = $WithoutGenerics {}; - #[derive(Clone, PartialEq, Eq, Hash, Debug)] - $vis struct $OneGeneric<$Arg: $Trait>($Arg); + const _: () = { + #[derive(Clone, PartialEq, Eq, Hash, Debug)] + $vis struct $OneGeneric<$C: $PhantomConstCpuConfig>($C); - impl<$Arg: $Trait> std::ops::Index<$Arg> for $WithoutGenerics { - type Output = $OneGeneric<$Arg>; + impl<$C: $PhantomConstCpuConfig> std::ops::Index<$C> for $WithoutGenerics { + type Output = $OneGeneric<$C>; - fn index(&self, arg: $Arg) -> &Self::Output { - fayalite::intern::Interned::into_inner(fayalite::intern::Intern::intern_sized($OneGeneric(arg))) + fn index(&self, config: $C) -> &Self::Output { + fayalite::intern::Interned::into_inner(fayalite::intern::Intern::intern_sized($OneGeneric(config))) + } } - } - impl<$Arg: $Trait, $C: PhantomConstGet> std::ops::Index<$C> for $OneGeneric<$Arg> { - type Output = <$Type<$Arg, $C> as Size>::SizeType; + impl<$C: $PhantomConstCpuConfig, $Arg: $Trait<$TraitC>> std::ops::Index<$Arg> for $OneGeneric<$C> { + type Output = <$Type<$C, $Arg> as Size>::SizeType; - fn index(&self, config: $C) -> &Self::Output { - fayalite::intern::Interned::into_inner(fayalite::intern::Intern::intern_sized($size(self.0, config))) + fn index(&self, arg: $Arg) -> &Self::Output { + fayalite::intern::Interned::into_inner(fayalite::intern::Intern::intern_sized($size(self.0, arg))) + } } - } + }; }; } hdl_type_alias_with_generics! { - #[without_generics = StageTraitInputWithoutGenerics, StageTraitInputWithStage] - #[ty = |_stage, config: C| T::input_ty(PhantomConst::new_interned(config.get()))] - type StageTraitInput> = ::Input; + #[without_generics = StageInputStagesOutputsWithoutGenerics, StageInputStagesOutputsWithStage] + #[ty = |config: C, _stage| >::InputStages::outputs_ty(config)] + type StageInputStagesOutputs> = <>::InputStages as Stages>::Outputs; } hdl_type_alias_with_generics! { - #[without_generics = StageTraitInputsWithoutGenerics, StageTraitInputsWithStage] - #[ty = |_stage, config: C| T::inputs_ty(PhantomConst::new_interned(config.get()))] - type StageTraitInputs> = ::Inputs; + #[without_generics = StageInputsWithoutGenerics, StageInputsWithStage] + #[ty = |config: C, _stage| T::inputs_ty(config)] + type StageInputs> = >::Inputs; } hdl_type_alias_with_generics! { - #[without_generics = StageTraitOutputWithoutGenerics, StageTraitOutputWithStage] - #[ty = |_stage, config: C| T::output_ty(PhantomConst::new_interned(config.get()))] - type StageTraitOutput> = ::Output; + #[without_generics = StageOutputWithoutGenerics, StageOutputWithStage] + #[ty = |config: C, _stage| T::output_ty(config)] + type StageOutput> = >::Output; +} + +hdl_type_alias_with_generics! { + #[without_generics = StageToExternalPipeInputInterfaceWithoutGenerics, StageToExternalPipeInputInterfaceWithStage] + #[ty = |config: C, _stage| T::to_external_pipe_input_interface_ty(config)] + type StageToExternalPipeInputInterface> = >::ToExternalPipeInputInterface; +} + +hdl_type_alias_with_generics! { + #[without_generics = StageFromExternalPipeOutputInterfaceWithoutGenerics, StageFromExternalPipeOutputInterfaceWithStage] + #[ty = |config: C, _stage| T::from_external_pipe_output_interface_ty(config)] + type StageFromExternalPipeOutputInterface> = >::FromExternalPipeOutputInterface; +} + +hdl_type_alias_with_generics! { + #[without_generics = StageFromExternalPipeOutputItemWithoutGenerics, StageFromExternalPipeOutputItemWithStage] + #[ty = |config: C, _stage| T::from_external_pipe_output_item_ty(config)] + type StageFromExternalPipeOutputItem> = >::FromExternalPipeOutputItem; } hdl_type_alias_with_generics! { #[without_generics = StageMaxOutputsPerStepWithoutGenerics, StageMaxOutputsPerStepWithStage] - #[size = |_stage, config: C| T::max_outputs_per_step(PhantomConst::new_interned(config.get()))] - type StageMaxOutputsPerStep> = ::MaxOutputsPerStep; + #[size = |config: C, _stage| T::max_outputs_per_step(config)] + type StageMaxOutputsPerStep> = >::MaxOutputsPerStep; } hdl_type_alias_with_generics! { #[without_generics = StageInputQueueSizeWithoutGenerics, StageInputQueueSizeWithStage] - #[size = |_stage, config: C| T::input_queue_size(PhantomConst::new_interned(config.get()))] - type StageInputQueueSize> = ::InputQueueSize; + #[size = |config: C, _stage| T::input_queue_size(config)] + type StageInputQueueSize> = >::InputQueueSize; +} + +hdl_type_alias_with_generics! { + #[without_generics = StageExternalPipeIoWidthWithoutGenerics, StageExternalPipeIoWidthWithStage] + #[size = |config: C, _stage| T::external_pipe_io_width(config)] + type StageExternalPipeIoWidth> = >::ExternalPipeIoWidth; } hdl_type_alias_with_generics! { #[without_generics = StageOutputQueueSizeWithoutGenerics, StageOutputQueueSizeWithStage] - #[size = |_stage, config: C| T::output_queue_size(PhantomConst::new_interned(config.get()))] - type StageOutputQueueSize> = ::OutputQueueSize; + #[size = |config: C, _stage| T::output_queue_size(config)] + type StageOutputQueueSize> = >::OutputQueueSize; } +#[hdl] +type StageToExternalPipeInputInput< + C: PhantomConstGet + PhantomConstCpuConfig, + S: Type + Stage, +> = ArrayVec, StageExternalPipeIoWidth>; + +#[hdl] +type StageToExternalPipeInputCancel< + C: PhantomConstGet + PhantomConstCpuConfig, + S: Type + Stage, +> = HdlOption, StageInputQueueSize>>; + +#[hdl] +type StageExternalPipeIoReady< + C: PhantomConstGet + PhantomConstCpuConfig, + S: Type + Stage, +> = UIntInRangeInclusiveType, StageExternalPipeIoWidth>; + +#[hdl] +type StageFromExternalPipeOutputData< + C: PhantomConstGet + PhantomConstCpuConfig, + S: Type + Stage, +> = ArrayVec, StageExternalPipeIoWidth>; + #[hdl(no_static)] struct NextPcStageOutput> { start_pc: UInt<64>, @@ -361,7 +700,7 @@ struct NextPcStageOutput> { config: C, } -impl SimValueDefault for NextPcStageOutput> { +impl SimValueDefault for NextPcStageOutput { #[hdl] fn sim_value_default(self) -> SimValue { let Self { @@ -386,7 +725,7 @@ impl SimValueDefault for NextPcStageOutput> { } #[hdl(no_static)] -struct NextPcStageState> { +struct NextPcStageState + PhantomConstCpuConfig> { call_stack: CallStack, branch_target_buffer: BranchTargetBuffer, next_pc: UInt<64>, @@ -394,7 +733,7 @@ struct NextPcStageState> { config: C, } -impl SimValueDefault for NextPcStageState> { +impl SimValueDefault for NextPcStageState { #[hdl] fn sim_value_default(self) -> SimValue { let Self { @@ -417,7 +756,7 @@ impl SimValueDefault for NextPcStageState> { } } -impl ResetSteps for NextPcStageState> { +impl ResetSteps for NextPcStageState { #[hdl] fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { #[hdl(sim)] @@ -436,49 +775,87 @@ impl ResetSteps for NextPcStageState> { } } -impl Stage for NextPcStageState> { - type Input = (); +impl Stage for NextPcStageState { + type InputStages = (); + type SiblingStage = (); type Inputs = (); - type Output = NextPcStageOutput>; + type Output = NextPcStageOutput; + type ToExternalPipeInputInterface = (); + type FromExternalPipeOutputInterface = (); + type FromExternalPipeOutputItem = (); type MaxOutputsPerStep = ConstUsize<1>; + type ExternalPipeIoWidth = ConstUsize<1>; type InputQueueSize = ConstUsize<1>; type OutputQueueSize = ConstUsize<1>; + const HAS_EXTERNAL_PIPE: bool = false; - fn input_ty(_config: PhantomConst) -> Self::Input { + fn inputs_ty(_config: C) -> Self::Inputs { () } - fn inputs_ty(_config: PhantomConst) -> Self::Inputs { - () - } - - fn output_ty(config: PhantomConst) -> Self::Output { + fn output_ty(config: C) -> Self::Output { NextPcStageOutput[config] } - fn max_outputs_per_step( - _config: PhantomConst, - ) -> ::SizeType { + fn to_external_pipe_input_interface_ty(_config: C) -> Self::ToExternalPipeInputInterface { + () + } + + fn from_external_pipe_output_interface_ty(_config: C) -> Self::FromExternalPipeOutputInterface { + () + } + + fn from_external_pipe_output_item_ty(_config: C) -> Self::FromExternalPipeOutputItem { + () + } + + fn max_outputs_per_step(_config: C) -> ::SizeType { ConstUsize } - fn input_queue_size( - _config: PhantomConst, - ) -> ::SizeType { + fn external_pipe_io_width(_config: C) -> ::SizeType { ConstUsize } - fn output_queue_size( - _config: PhantomConst, - ) -> ::SizeType { + fn input_queue_size(_config: C) -> ::SizeType { ConstUsize } + fn output_queue_size(_config: C) -> ::SizeType { + ConstUsize + } + + fn cancel_in_progress_for_stage_ref( + cancel: &SimValue>, + ) -> &SimValue> { + &cancel.next_pc + } + + fn cancel_in_progress_for_stage_mut( + cancel: &mut SimValue>, + ) -> &mut SimValue> { + &mut cancel.next_pc + } + + #[hdl] + fn make_inputs( + input_stages_outputs: &SimValue>, + from_external_pipe_output_item: &SimValue, + ) -> SimValue { + #[hdl(sim)] + let () = input_stages_outputs; + #[hdl(sim)] + let () = from_external_pipe_output_item; + #[hdl(sim)] + () + } + #[hdl] fn run( state: &mut SimValue, _inputs: &SimValue, - ) -> SimValue>> { + ) -> SimValue> { + let this_ty = state.ty(); let config = state.config.ty(); let start_call_stack = state.call_stack.clone(); let fetch_block_id = state.next_fetch_block_id.as_int(); @@ -557,15 +934,17 @@ impl Stage for NextPcStageState> { config, }; #[hdl(sim)] - StageOutput::<_, _, _> { - outputs: Self::stage_output_ty(config).outputs.new_full_sim([output]), + StageRunOutput::<_, _> { + outputs: StageRunOutput[config][this_ty] + .outputs + .new_full_sim([output]), cancel: #[hdl(sim)] (HdlOption[Cancel[config]]).HdlNone(), } } #[hdl] - fn cancel(state: &mut SimValue, cancel: &SimValue>>) { + fn cancel(state: &mut SimValue, cancel: &SimValue>) { #[hdl(sim)] let Self { call_stack, @@ -645,7 +1024,7 @@ struct BrPredStageOutput> { config: C, } -impl SimValueDefault for BrPredStageOutput> { +impl SimValueDefault for BrPredStageOutput { #[hdl] fn sim_value_default(self) -> SimValue { #[hdl(sim)] @@ -659,7 +1038,7 @@ impl SimValueDefault for BrPredStageOutput> { } #[hdl(no_static)] -struct BrPredStageState> { +struct BrPredStageState + PhantomConstCpuConfig> { branch_history: UInt<6>, branch_predictor: Array, config: C, @@ -670,7 +1049,7 @@ fn step_branch_history(branch_history: &mut SimValue>, taken: bool) { ((&**branch_history << 1) | taken.cast_to_static::>()).cast_to_static::>(); } -impl BrPredStageState> { +impl BrPredStageState { fn branch_predictor_index(this: &SimValue, branch_pc: u64) -> usize { let mut t = this.branch_history.cast_to_static::>().as_int(); t ^= t.rotate_left(5) & !branch_pc.rotate_right(3); @@ -684,7 +1063,7 @@ impl BrPredStageState> { } } -impl SimValueDefault for BrPredStageState> { +impl SimValueDefault for BrPredStageState { #[hdl] fn sim_value_default(self) -> SimValue { let Self { @@ -705,7 +1084,7 @@ impl SimValueDefault for BrPredStageState> { } } -impl ResetSteps for BrPredStageState> { +impl ResetSteps for BrPredStageState { #[hdl] fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { #[hdl(sim)] @@ -719,49 +1098,84 @@ impl ResetSteps for BrPredStageState> { } } -impl Stage for BrPredStageState> { - type Input = NextPcStageOutput>; - type Inputs = NextPcStageOutput>; - type Output = BrPredStageOutput>; +impl Stage for BrPredStageState { + type InputStages = NextPcStageState; + type SiblingStage = FetchDecodeStageState; + type Inputs = NextPcStageOutput; + type Output = BrPredStageOutput; + type ToExternalPipeInputInterface = (); + type FromExternalPipeOutputInterface = (); + type FromExternalPipeOutputItem = (); type MaxOutputsPerStep = ConstUsize<1>; + type ExternalPipeIoWidth = ConstUsize<1>; type InputQueueSize = ConstUsize<1>; - type OutputQueueSize = CpuConfigMaxFetchesInFlight>; + type OutputQueueSize = CpuConfigMaxFetchesInFlight; + const HAS_EXTERNAL_PIPE: bool = false; - fn input_ty(config: PhantomConst) -> Self::Input { + fn inputs_ty(config: C) -> Self::Inputs { NextPcStageOutput[config] } - fn inputs_ty(config: PhantomConst) -> Self::Inputs { - NextPcStageOutput[config] - } - - fn output_ty(config: PhantomConst) -> Self::Output { + fn output_ty(config: C) -> Self::Output { BrPredStageOutput[config] } - fn max_outputs_per_step( - _config: PhantomConst, - ) -> ::SizeType { + fn to_external_pipe_input_interface_ty(_config: C) -> Self::ToExternalPipeInputInterface { + () + } + + fn from_external_pipe_output_interface_ty(_config: C) -> Self::FromExternalPipeOutputInterface { + () + } + + fn from_external_pipe_output_item_ty(_config: C) -> Self::FromExternalPipeOutputItem { + () + } + + fn max_outputs_per_step(_config: C) -> ::SizeType { ConstUsize } - fn input_queue_size( - _config: PhantomConst, - ) -> ::SizeType { + fn external_pipe_io_width(_config: C) -> ::SizeType { ConstUsize } - fn output_queue_size( - config: PhantomConst, - ) -> ::SizeType { + fn input_queue_size(_config: C) -> ::SizeType { + ConstUsize + } + + fn output_queue_size(config: C) -> ::SizeType { CpuConfigMaxFetchesInFlight[config] } + fn cancel_in_progress_for_stage_ref( + cancel: &SimValue>, + ) -> &SimValue> { + &cancel.br_pred + } + + fn cancel_in_progress_for_stage_mut( + cancel: &mut SimValue>, + ) -> &mut SimValue> { + &mut cancel.br_pred + } + + #[hdl] + fn make_inputs( + input_stages_outputs: &SimValue>, + from_external_pipe_output_item: &SimValue, + ) -> SimValue { + #[hdl(sim)] + let () = from_external_pipe_output_item; + input_stages_outputs.clone() + } + #[hdl] fn run( state: &mut SimValue, inputs: &SimValue, - ) -> SimValue>> { + ) -> SimValue> { + let this_ty = state.ty(); let config = state.config.ty(); #[hdl(sim)] let NextPcStageOutput::<_> { @@ -806,9 +1220,9 @@ impl Stage for BrPredStageState> { let btb_entry_index = &btb_entry.0; let mut btb_entry = btb_entry.1.clone(); btb_entry.addr_kind = opposite_addr_kind; - let StageOutput { outputs, cancel } = Self::stage_output_ty(config); + let StageRunOutput { outputs, cancel } = StageRunOutput[config][this_ty]; let retval = #[hdl(sim)] - StageOutput::<_, _, _> { + StageRunOutput::<_, _> { outputs: outputs.sim_value_default(), cancel: #[hdl(sim)] cancel.HdlSome( @@ -839,15 +1253,17 @@ impl Stage for BrPredStageState> { config, }; #[hdl(sim)] - StageOutput::<_, _, _> { - outputs: Self::stage_output_ty(config).outputs.new_full_sim([output]), + StageRunOutput::<_, _> { + outputs: StageRunOutput[config][this_ty] + .outputs + .new_full_sim([output]), cancel: #[hdl(sim)] (HdlOption[Cancel[config]]).HdlNone(), } } #[hdl] - fn cancel(state: &mut SimValue, cancel: &SimValue>>) { + fn cancel(state: &mut SimValue, cancel: &SimValue>) { #[hdl(sim)] let Cancel::<_> { call_stack: _, @@ -861,7 +1277,7 @@ impl Stage for BrPredStageState> { } } -impl BrPredStageState> { +impl BrPredStageState { #[hdl] fn train_branch_predictor( this: &mut SimValue, @@ -884,11 +1300,11 @@ impl BrPredStageState> { } #[hdl(no_static)] -struct FetchDecodeStageState> { +struct FetchDecodeStageState + PhantomConstCpuConfig> { config: C, } -impl SimValueDefault for FetchDecodeStageState> { +impl SimValueDefault for FetchDecodeStageState { #[hdl] fn sim_value_default(self) -> SimValue { #[hdl(sim)] @@ -898,7 +1314,7 @@ impl SimValueDefault for FetchDecodeStageState> { } } -impl ResetSteps for FetchDecodeStageState> { +impl ResetSteps for FetchDecodeStageState { #[hdl] fn reset_step(this: &mut SimValue, _step: usize) -> ResetStatus { #[hdl(sim)] @@ -913,7 +1329,7 @@ struct FetchDecodeStageOutput> { decode_output: DecodeToPostDecodeInterfaceInner, } -impl SimValueDefault for FetchDecodeStageOutput> { +impl SimValueDefault for FetchDecodeStageOutput { #[hdl] fn sim_value_default(self) -> SimValue { let Self { @@ -928,55 +1344,92 @@ impl SimValueDefault for FetchDecodeStageOutput> { } } -impl Stage for FetchDecodeStageState> { - type Input = NextPcStageOutput>; - type Inputs = FetchDecodeStageOutput>; - type Output = FetchDecodeStageOutput>; +impl Stage for FetchDecodeStageState { + type InputStages = NextPcStageState; + type SiblingStage = BrPredStageState; + type Inputs = FetchDecodeStageOutput; + type Output = FetchDecodeStageOutput; + type ToExternalPipeInputInterface = NextPcToFetchInterface; + type FromExternalPipeOutputInterface = DecodeToPostDecodeInterface; + type FromExternalPipeOutputItem = DecodeToPostDecodeInterfaceInner; type MaxOutputsPerStep = ConstUsize<1>; - type InputQueueSize = CpuConfigMaxFetchesInFlight>; + type ExternalPipeIoWidth = ConstUsize<1>; + type InputQueueSize = CpuConfigMaxFetchesInFlight; type OutputQueueSize = ConstUsize<1>; + const HAS_EXTERNAL_PIPE: bool = true; - fn input_ty(config: PhantomConst) -> Self::Input { - NextPcStageOutput[config] - } - - fn inputs_ty(config: PhantomConst) -> Self::Inputs { + fn inputs_ty(config: C) -> Self::Inputs { FetchDecodeStageOutput[config] } - fn output_ty(config: PhantomConst) -> Self::Output { + fn output_ty(config: C) -> Self::Output { FetchDecodeStageOutput[config] } - fn max_outputs_per_step( - _config: PhantomConst, - ) -> ::SizeType { + fn to_external_pipe_input_interface_ty(config: C) -> Self::ToExternalPipeInputInterface { + NextPcToFetchInterface[config] + } + + fn from_external_pipe_output_interface_ty(config: C) -> Self::FromExternalPipeOutputInterface { + DecodeToPostDecodeInterface[config] + } + + fn from_external_pipe_output_item_ty(config: C) -> Self::FromExternalPipeOutputItem { + DecodeToPostDecodeInterfaceInner[config] + } + + fn max_outputs_per_step(_config: C) -> ::SizeType { ConstUsize } - fn input_queue_size( - config: PhantomConst, - ) -> ::SizeType { + fn external_pipe_io_width(_config: C) -> ::SizeType { + ConstUsize + } + + fn input_queue_size(config: C) -> ::SizeType { CpuConfigMaxFetchesInFlight[config] } - fn output_queue_size( - _config: PhantomConst, - ) -> ::SizeType { + fn output_queue_size(_config: C) -> ::SizeType { ConstUsize } + fn cancel_in_progress_for_stage_ref( + cancel: &SimValue>, + ) -> &SimValue> { + &cancel.fetch_decode + } + + fn cancel_in_progress_for_stage_mut( + cancel: &mut SimValue>, + ) -> &mut SimValue> { + &mut cancel.fetch_decode + } + + #[hdl] + fn make_inputs( + input_stages_outputs: &SimValue>, + from_external_pipe_output_item: &SimValue, + ) -> SimValue { + #[hdl(sim)] + FetchDecodeStageOutput::<_> { + next_pc_stage_output: input_stages_outputs, + decode_output: from_external_pipe_output_item, + } + } + #[hdl] fn run( state: &mut SimValue, inputs: &SimValue, - ) -> SimValue>> { + ) -> SimValue> { + let this_ty = state.ty(); #[hdl(sim)] let Self { config } = state; let config = config.ty(); - let StageOutput { outputs, cancel } = Self::stage_output_ty(config); + let StageRunOutput { outputs, cancel } = StageRunOutput[config][this_ty]; #[hdl(sim)] - StageOutput::<_, _, _> { + StageRunOutput::<_, _> { outputs: outputs.new_full_sim([inputs]), cancel: #[hdl(sim)] cancel.HdlNone(), @@ -984,14 +1437,14 @@ impl Stage for FetchDecodeStageState> { } #[hdl] - fn cancel(state: &mut SimValue, _cancel: &SimValue>>) { + fn cancel(state: &mut SimValue, _cancel: &SimValue>) { #[hdl(sim)] let Self { config: _ } = state; } } #[hdl(no_static)] -struct PostDecodeStageState> { +struct PostDecodeStageState + PhantomConstCpuConfig> { config: C, } @@ -1001,7 +1454,7 @@ struct PostDecodeStageInput> { br_pred_stage_output: BrPredStageOutput, } -impl SimValueDefault for PostDecodeStageInput> { +impl SimValueDefault for PostDecodeStageInput { #[hdl] fn sim_value_default(self) -> SimValue { #[hdl(sim)] @@ -1023,7 +1476,7 @@ struct PostDecodeStageOutput> { config: C, } -impl SimValueDefault for PostDecodeStageOutput> { +impl SimValueDefault for PostDecodeStageOutput { #[hdl] fn sim_value_default(self) -> SimValue { #[hdl(sim)] @@ -1041,7 +1494,7 @@ impl SimValueDefault for PostDecodeStageOutput> { } } -impl SimValueDefault for PostDecodeStageState> { +impl SimValueDefault for PostDecodeStageState { #[hdl] fn sim_value_default(self) -> SimValue { #[hdl(sim)] @@ -1051,7 +1504,7 @@ impl SimValueDefault for PostDecodeStageState> { } } -impl ResetSteps for PostDecodeStageState> { +impl ResetSteps for PostDecodeStageState { #[hdl] fn reset_step(this: &mut SimValue, _step: usize) -> ResetStatus { #[hdl(sim)] @@ -1060,49 +1513,90 @@ impl ResetSteps for PostDecodeStageState> { } } -impl Stage for PostDecodeStageState> { - type Input = PostDecodeStageInput>; - type Inputs = PostDecodeStageInput>; - type Output = PostDecodeStageOutput>; - type MaxOutputsPerStep = CpuConfigFetchWidth>; +impl Stage for PostDecodeStageState { + type InputStages = (FetchDecodeStageState, BrPredStageState); + type SiblingStage = (); + type Inputs = PostDecodeStageInput; + type Output = PostDecodeStageOutput; + type ToExternalPipeInputInterface = (); + type FromExternalPipeOutputInterface = (); + type FromExternalPipeOutputItem = (); + type MaxOutputsPerStep = CpuConfigFetchWidth; + type ExternalPipeIoWidth = ConstUsize<1>; type InputQueueSize = ConstUsize<1>; - type OutputQueueSize = TwiceCpuConfigFetchWidth>; + type OutputQueueSize = TwiceCpuConfigFetchWidth; + const HAS_EXTERNAL_PIPE: bool = false; - fn input_ty(config: PhantomConst) -> Self::Input { + fn inputs_ty(config: C) -> Self::Inputs { PostDecodeStageInput[config] } - fn inputs_ty(config: PhantomConst) -> Self::Inputs { - PostDecodeStageInput[config] - } - - fn output_ty(config: PhantomConst) -> Self::Output { + fn output_ty(config: C) -> Self::Output { PostDecodeStageOutput[config] } - fn max_outputs_per_step( - config: PhantomConst, - ) -> ::SizeType { + fn to_external_pipe_input_interface_ty(_config: C) -> Self::ToExternalPipeInputInterface { + () + } + + fn from_external_pipe_output_interface_ty(_config: C) -> Self::FromExternalPipeOutputInterface { + () + } + + fn from_external_pipe_output_item_ty(_config: C) -> Self::FromExternalPipeOutputItem { + () + } + + fn max_outputs_per_step(config: C) -> ::SizeType { CpuConfigFetchWidth[config] } - fn input_queue_size( - _config: PhantomConst, - ) -> ::SizeType { + fn external_pipe_io_width(_config: C) -> ::SizeType { ConstUsize } - fn output_queue_size( - config: PhantomConst, - ) -> ::SizeType { + fn input_queue_size(_config: C) -> ::SizeType { + ConstUsize + } + + fn output_queue_size(config: C) -> ::SizeType { TwiceCpuConfigFetchWidth[config] } + fn cancel_in_progress_for_stage_ref( + cancel: &SimValue>, + ) -> &SimValue> { + &cancel.post_decode + } + + fn cancel_in_progress_for_stage_mut( + cancel: &mut SimValue>, + ) -> &mut SimValue> { + &mut cancel.post_decode + } + + #[hdl] + fn make_inputs( + input_stages_outputs: &SimValue>, + from_external_pipe_output_item: &SimValue, + ) -> SimValue { + #[hdl(sim)] + let (fetch_decode_stage_output, br_pred_stage_output) = input_stages_outputs; + #[hdl(sim)] + let () = from_external_pipe_output_item; + #[hdl(sim)] + PostDecodeStageInput::<_> { + fetch_decode_stage_output, + br_pred_stage_output, + } + } + #[hdl] fn run( state: &mut SimValue, inputs: &SimValue, - ) -> SimValue>> { + ) -> SimValue> { + let this_ty = state.ty(); #[hdl(sim)] let Self { config } = state; let config = config.ty(); @@ -1140,10 +1634,10 @@ impl Stage for PostDecodeStageState> { -- either the decoded instructions or a WipDecodedInsnKind::Interrupt", ); let insns = ArrayVec::elements_sim_ref(&insns); - let StageOutput { + let StageRunOutput { outputs: outputs_ty, cancel: cancel_ty, - } = Self::stage_output_ty(config); + } = StageRunOutput[config][this_ty]; assert_eq!(outputs_ty.capacity(), decode_output.insns.ty().capacity()); let mut outputs = outputs_ty.sim_value_default(); let mut add_output_insn = |insn: &SimValue, @@ -1192,7 +1686,7 @@ impl Stage for PostDecodeStageState> { let mut call_stack = start_call_stack.clone(); CallStack::push(&mut call_stack, start_pc); let retval = #[hdl(sim)] - StageOutput::<_, _, _> { + StageRunOutput::<_, _> { outputs, cancel: #[hdl(sim)] cancel_ty.HdlSome( @@ -1389,7 +1883,7 @@ impl Stage for PostDecodeStageState> { }; if *new_btb_entry.cmp_ne(predicted_btb_entry) { #[hdl(sim)] - StageOutput::<_, _, _> { + StageRunOutput::<_, _> { outputs: outputs_ty.sim_value_default(), cancel: #[hdl(sim)] cancel_ty.HdlSome( @@ -1406,7 +1900,7 @@ impl Stage for PostDecodeStageState> { } } else { #[hdl(sim)] - StageOutput::<_, _, _> { + StageRunOutput::<_, _> { outputs, cancel: #[hdl(sim)] cancel_ty.HdlNone(), @@ -1415,7 +1909,7 @@ impl Stage for PostDecodeStageState> { } #[hdl] - fn cancel(state: &mut SimValue, _cancel: &SimValue>>) { + fn cancel(state: &mut SimValue, _cancel: &SimValue>) { #[hdl(sim)] let Self { config: _ } = state; } @@ -1427,7 +1921,7 @@ struct ExecuteRetireStageInput> { retire_interface_per_insn: RetireToNextPcInterfacePerInsn, } -impl SimValueDefault for ExecuteRetireStageInput> { +impl SimValueDefault for ExecuteRetireStageInput { #[hdl] fn sim_value_default(self) -> SimValue { let Self { @@ -1443,7 +1937,7 @@ impl SimValueDefault for ExecuteRetireStageInput> { } #[hdl(no_static)] -struct ExecuteRetireStageState> { +struct ExecuteRetireStageState + PhantomConstCpuConfig> { config: C, } @@ -1453,7 +1947,23 @@ struct ExecuteRetireStageOutput> { config: C, } -impl SimValueDefault for ExecuteRetireStageState> { +impl SimValueDefault for ExecuteRetireStageOutput { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + train_branch_predictor, + config, + } = self; + #[hdl(sim)] + Self { + train_branch_predictor: #[hdl(sim)] + train_branch_predictor.HdlNone(), + config, + } + } +} + +impl SimValueDefault for ExecuteRetireStageState { #[hdl] fn sim_value_default(self) -> SimValue { let Self { config } = self; @@ -1462,7 +1972,7 @@ impl SimValueDefault for ExecuteRetireStageState> { } } -impl ResetSteps for ExecuteRetireStageState> { +impl ResetSteps for ExecuteRetireStageState { #[hdl] fn reset_step(this: &mut SimValue, _step: usize) -> ResetStatus { #[hdl(sim)] @@ -1471,49 +1981,86 @@ impl ResetSteps for ExecuteRetireStageState> { } } -impl Stage for ExecuteRetireStageState> { - type Input = PostDecodeStageOutput>; - type Inputs = ExecuteRetireStageInput>; - type Output = ExecuteRetireStageOutput>; +impl Stage for ExecuteRetireStageState { + type InputStages = PostDecodeStageState; + type SiblingStage = (); + type Inputs = ExecuteRetireStageInput; + type Output = ExecuteRetireStageOutput; + type ToExternalPipeInputInterface = PostDecodeOutputInterface; + type FromExternalPipeOutputInterface = RetireToNextPcInterface; + type FromExternalPipeOutputItem = RetireToNextPcInterfacePerInsn; type MaxOutputsPerStep = ConstUsize<1>; - type InputQueueSize = CpuConfigRobSize>; - type OutputQueueSize = ConstUsize<1>; + type ExternalPipeIoWidth = CpuConfigFetchWidth; + type InputQueueSize = CpuConfigRobSize; + type OutputQueueSize = CpuConfigFetchWidth; + const HAS_EXTERNAL_PIPE: bool = true; - fn input_ty(config: PhantomConst) -> Self::Input { - PostDecodeStageOutput[config] - } - - fn inputs_ty(config: PhantomConst) -> Self::Inputs { + fn inputs_ty(config: C) -> Self::Inputs { ExecuteRetireStageInput[config] } - fn output_ty(config: PhantomConst) -> Self::Output { + fn output_ty(config: C) -> Self::Output { ExecuteRetireStageOutput[config] } - fn max_outputs_per_step( - _config: PhantomConst, - ) -> ::SizeType { + fn to_external_pipe_input_interface_ty(config: C) -> Self::ToExternalPipeInputInterface { + PostDecodeOutputInterface[config] + } + + fn from_external_pipe_output_interface_ty(config: C) -> Self::FromExternalPipeOutputInterface { + RetireToNextPcInterface[config] + } + + fn from_external_pipe_output_item_ty(config: C) -> Self::FromExternalPipeOutputItem { + RetireToNextPcInterfacePerInsn[config] + } + + fn max_outputs_per_step(_config: C) -> ::SizeType { ConstUsize } - fn input_queue_size( - config: PhantomConst, - ) -> ::SizeType { + fn external_pipe_io_width(config: C) -> ::SizeType { + CpuConfigFetchWidth[config] + } + + fn input_queue_size(config: C) -> ::SizeType { CpuConfigRobSize[config] } - fn output_queue_size( - _config: PhantomConst, - ) -> ::SizeType { - ConstUsize + fn output_queue_size(config: C) -> ::SizeType { + CpuConfigFetchWidth[config] + } + + fn cancel_in_progress_for_stage_ref( + cancel: &SimValue>, + ) -> &SimValue> { + &cancel.execute_retire + } + + fn cancel_in_progress_for_stage_mut( + cancel: &mut SimValue>, + ) -> &mut SimValue> { + &mut cancel.execute_retire + } + + #[hdl] + fn make_inputs( + input_stages_outputs: &SimValue>, + from_external_pipe_output_item: &SimValue, + ) -> SimValue { + #[hdl(sim)] + ExecuteRetireStageInput::<_> { + post_decode_stage_output: input_stages_outputs, + retire_interface_per_insn: from_external_pipe_output_item, + } } #[hdl] fn run( state: &mut SimValue, inputs: &SimValue, - ) -> SimValue>> { + ) -> SimValue> { + let this_ty = state.ty(); #[hdl(sim)] let Self { config } = state; let config = config.ty(); @@ -1541,10 +2088,10 @@ impl Stage for ExecuteRetireStageState> { config: _, } = post_decode_stage_output; assert_eq!(*id, insn.id, "instruction queuing out of sync"); - let StageOutput { + let StageRunOutput { outputs: outputs_ty, cancel: cancel_ty, - } = Self::stage_output_ty(config); + } = StageRunOutput[config][this_ty]; let mut branch_history = start_branch_history.clone(); let train_branch_predictor = #[hdl(sim)] if let HdlSome(taken) = cond_br_taken { @@ -1671,7 +2218,7 @@ impl Stage for ExecuteRetireStageState> { CallStackOp::Unknown => unreachable!(), } #[hdl(sim)] - StageOutput::<_, _, _> { + StageRunOutput::<_, _> { outputs: outputs_ty.new_sim( #[hdl(sim)] ExecuteRetireStageOutput::<_> { @@ -1694,7 +2241,7 @@ impl Stage for ExecuteRetireStageState> { } } else { #[hdl(sim)] - StageOutput::<_, _, _> { + StageRunOutput::<_, _> { outputs: outputs_ty.new_full_sim([ #[hdl(sim)] ExecuteRetireStageOutput::<_> { @@ -1709,7 +2256,7 @@ impl Stage for ExecuteRetireStageState> { } #[hdl] - fn cancel(state: &mut SimValue, _cancel: &SimValue>>) { + fn cancel(state: &mut SimValue, _cancel: &SimValue>) { #[hdl(sim)] let Self { config: _ } = state; } @@ -1794,12 +2341,41 @@ impl SimValueDefault for SimOnly { } } +impl SimValueDefault for PhantomConst { + fn sim_value_default(self) -> SimValue { + self.to_sim_value() + } +} + +impl SimValueDefault for () { + fn sim_value_default(self) -> SimValue { + self.to_sim_value() + } +} + +impl SimValueDefault for (T1, T2) { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + (self.0.sim_value_default(), self.1.sim_value_default()) + } +} + impl SimValueDefault for ArrayVec { fn sim_value_default(self) -> SimValue { self.new_sim(self.element().sim_value_default()) } } +impl SimValueDefault for ArrayType { + fn sim_value_default(self) -> SimValue { + SimValue::from_array_elements( + self, + std::iter::repeat_n(self.element().sim_value_default(), self.len()), + ) + } +} + impl SimValueDefault for HdlOption { fn sim_value_default(self) -> SimValue { self.HdlNone().to_sim_value_with_type(self) @@ -2231,6 +2807,8 @@ struct Queue { head: UIntInRangeType, Capacity>, /// exclusive tail: UIntInRangeType, Capacity>, + /// used to disambiguate between a full and an empty queue + eq_head_tail_means_full: Bool, } impl Queue { @@ -2241,21 +2819,27 @@ impl Queue { assert_ne!(self.capacity(), 0); (pos + 1) % self.capacity() } + fn nth_pos_after(self, pos: usize, nth: usize) -> usize { + assert_ne!(self.capacity(), 0); + (pos + nth) % self.capacity() + } fn prev_pos(self, pos: usize) -> usize { assert_ne!(self.capacity(), 0); (pos + self.capacity() - 1) % self.capacity() } fn is_empty(this: &SimValue) -> bool { - this.head == this.tail + this.head == this.tail && !*this.eq_head_tail_means_full } fn is_full(this: &SimValue) -> bool { - let head = *this.head; - let tail = *this.tail; - this.ty().next_pos(head) == tail + this.head == this.tail && *this.eq_head_tail_means_full } fn len(this: &SimValue) -> usize { let capacity = this.ty().capacity(); - (*this.tail + capacity - *this.head) % capacity + if Self::is_full(this) { + capacity + } else { + (*this.tail + capacity - *this.head) % capacity + } } fn space_left(this: &SimValue) -> usize { this.ty().capacity() - Self::len(this) @@ -2263,6 +2847,7 @@ impl Queue { fn clear(this: &mut SimValue) { *this.head = 0; *this.tail = 0; + *this.eq_head_tail_means_full = false; } fn try_push(this: &mut SimValue, value: impl ToSimValueWithType) -> Result<(), ()> { if Self::is_full(this) { @@ -2271,6 +2856,7 @@ impl Queue { let head = *this.head; let head = this.ty().next_pos(head); *this.head = head; + *this.eq_head_tail_means_full = true; let data = &mut this.data[head]; *data = value.to_sim_value_with_type(data.ty()); Ok(()) @@ -2284,6 +2870,7 @@ impl Queue { let data = this.data[head].clone(); let head = this.ty().prev_pos(head); *this.head = head; + *this.eq_head_tail_means_full = false; Some(data) } } @@ -2294,6 +2881,11 @@ impl Queue { Some(this.data[*this.tail].clone()) } } + fn peek_iter( + this: &SimValue, + ) -> impl Clone + DoubleEndedIterator> + ExactSizeIterator { + (0..Self::len(this)).map(|nth| this.data[this.ty().nth_pos_after(*this.tail, nth)].clone()) + } fn pop(this: &mut SimValue) -> Option> { if Self::is_empty(this) { None @@ -2301,6 +2893,7 @@ impl Queue { let tail = *this.tail; let data = this.data[tail].clone(); *this.tail = this.ty().next_pos(tail); + *this.eq_head_tail_means_full = false; Some(data) } } @@ -2309,7 +2902,12 @@ impl Queue { impl SimValueDefault for Queue { #[hdl] fn sim_value_default(self) -> SimValue { - let Self { data, head, tail } = self; + let Self { + data, + head, + tail, + eq_head_tail_means_full: _, + } = self; #[hdl(sim)] Queue:: { data: repeat( @@ -2318,6 +2916,7 @@ impl SimValueDefault for Queue ), head: 0usize.to_sim_value_with_type(head), tail: 0usize.to_sim_value_with_type(tail), + eq_head_tail_means_full: false, } } } @@ -2326,483 +2925,937 @@ impl ResetSteps for Queue { #[hdl] fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { #[hdl(sim)] - let Queue:: { data, head, tail } = this; + let Queue:: { + data, + head, + tail, + eq_head_tail_means_full, + } = this; **head = 0; **tail = 0; + **eq_head_tail_means_full = false; ResetSteps::reset_step(data, step) } } #[hdl(no_static)] -struct StageWithQueues> { - state: S, - input_queue: Queue, StageInputQueueSize>, - output_queue: Queue, StageOutputQueueSize>, +struct CancelInProgressForStageWithQueues< + C: PhantomConstGet + PhantomConstCpuConfig, + S: Type + Stage, +> { + cancel_state: Bool, + input_queue_to_cancel: UIntInRangeInclusiveType, StageInputQueueSize>, + output_queue_to_cancel: UIntInRangeInclusiveType, StageOutputQueueSize>, } -impl StageWithQueues> { - FIXME -} - -#[hdl(no_static)] -struct CancelInProgress> { - cancel: Cancel, - br_pred_stage_inputs_to_cancel: UIntInRangeInclusive<0, 32>, - br_pred_stage_cancel: Bool, - fetch_decode_stage_inputs_to_cancel: - UIntInRangeInclusiveType, CpuConfigMaxFetchesInFlight>, - fetch_decode_stage_cancel: Bool, - post_decode_stage_inputs_to_cancel: UIntInRangeInclusive<0, 1>, - post_decode_stage_cancel: Bool, - post_decode_stage_outputs_to_cancel: - UIntInRangeInclusiveType, TwiceCpuConfigFetchWidth>, - rename_dispatch_execute_stage_inputs_to_cancel: UIntInRangeInclusive<0, 256>, - rename_dispatch_execute_stage_cancel: Bool, - retire_stage_inputs_to_cancel: UIntInRangeInclusive<0, 1>, - retire_stage_cancel: Bool, - config: C, -} - -impl CancelInProgress> { +impl> CancelInProgressForStageWithQueues { #[hdl] - fn to_fetch_cancel_data( - this: &SimValue, - ) -> SimValue< - HdlOption< - UIntInRangeInclusiveType< - ConstUsize<1>, - CpuConfigMaxFetchesInFlight>, - >, - >, - > { - let NextPcStateOutputs { - to_fetch_cancel_data, - .. - } = NextPcStateOutputs[this.config.ty()]; - if *this.fetch_decode_stage_inputs_to_cancel > 0 { - #[hdl(sim)] - to_fetch_cancel_data.HdlSome(*this.fetch_decode_stage_inputs_to_cancel) - } else { - #[hdl(sim)] - to_fetch_cancel_data.HdlNone() + fn nothing_to_cancel(self) -> SimValue { + #[hdl(sim)] + Self { + cancel_state: false, + input_queue_to_cancel: 0usize.to_sim_value_with_type(self.input_queue_to_cancel), + output_queue_to_cancel: 0usize.to_sim_value_with_type(self.output_queue_to_cancel), } } } #[hdl(no_static)] -pub struct StatesAndQueues> { - next_pc_stage_state: NextPcStageState, - br_pred_stage_inputs: Queue, ConstUsize<32>>, - br_pred_stage_state: BrPredStageState, - fetch_decode_stage_inputs: Queue, CpuConfigMaxFetchesInFlight>, - fetch_decode_stage_state: FetchDecodeStageState, - post_decode_stage_inputs: Queue, ConstUsize<1>>, - post_decode_stage_state: PostDecodeStageState, - post_decode_stage_outputs: Queue, CpuConfigFetchWidth>, - rename_dispatch_execute_stage_inputs: Queue, ConstUsize<256>>, - rename_dispatch_execute_stage_state: RenameDispatchExecuteStageState, - retire_stage_inputs: Queue, ConstUsize<1>>, - retire_stage_state: ExecuteRetireStageState, +struct StageWithQueues + PhantomConstCpuConfig, S: Type + Stage> { + input_queue: Queue, StageInputQueueSize>, + state: S, + output_queue: Queue, StageOutputQueueSize>, config: C, } -impl SimValueDefault for StatesAndQueues> { +#[hdl(no_static)] +struct StageWithQueuesInputs< + C: PhantomConstGet + PhantomConstCpuConfig, + S: Type + Stage, +> { + to_external_pipe_input_input_ready: StageExternalPipeIoReady, + to_external_pipe_input_cancel_ready: Bool, + from_external_pipe_output_data: StageFromExternalPipeOutputData, +} + +impl< + C: PhantomConstCpuConfig, + S: Stage< + C, + ToExternalPipeInputInterface = (), + FromExternalPipeOutputInterface = (), + FromExternalPipeOutputItem = (), + >, +> StageWithQueuesInputs +{ + #[hdl] + fn no_external_pipe(self) -> SimValue { + let Self { + to_external_pipe_input_input_ready, + to_external_pipe_input_cancel_ready: _, + from_external_pipe_output_data, + } = self; + #[hdl(sim)] + Self { + to_external_pipe_input_input_ready: uint_in_range_inclusive_max( + to_external_pipe_input_input_ready, + ), + to_external_pipe_input_cancel_ready: true, + from_external_pipe_output_data: from_external_pipe_output_data.new_full_sim( + from_external_pipe_output_data + .elements_ty() + .sim_value_default(), + ), + } + } +} + +#[hdl(no_static)] +struct StageWithQueuesOutputs< + C: PhantomConstGet + PhantomConstCpuConfig, + S: Type + Stage, +> { + to_external_pipe_input_input: StageToExternalPipeInputInput, + to_external_pipe_input_cancel: StageToExternalPipeInputCancel, + from_external_pipe_output_ready: StageExternalPipeIoReady, +} + +enum CancelResult { + Done, + InProgress, +} + +impl> SimValueDefault for StageWithQueues { #[hdl] fn sim_value_default(self) -> SimValue { let Self { - next_pc_stage_state, - br_pred_stage_inputs, - br_pred_stage_state, - fetch_decode_stage_inputs, - fetch_decode_stage_state, - post_decode_stage_inputs, - post_decode_stage_state, - post_decode_stage_outputs, - rename_dispatch_execute_stage_inputs, - rename_dispatch_execute_stage_state, - retire_stage_inputs, - retire_stage_state, + input_queue, + state, + output_queue, config, } = self; #[hdl(sim)] Self { - next_pc_stage_state: next_pc_stage_state.sim_value_default(), - br_pred_stage_inputs: br_pred_stage_inputs.sim_value_default(), - br_pred_stage_state: br_pred_stage_state.sim_value_default(), - fetch_decode_stage_inputs: fetch_decode_stage_inputs.sim_value_default(), - fetch_decode_stage_state: fetch_decode_stage_state.sim_value_default(), - post_decode_stage_inputs: post_decode_stage_inputs.sim_value_default(), - post_decode_stage_state: post_decode_stage_state.sim_value_default(), - post_decode_stage_outputs: post_decode_stage_outputs.sim_value_default(), - rename_dispatch_execute_stage_inputs: rename_dispatch_execute_stage_inputs - .sim_value_default(), - rename_dispatch_execute_stage_state: rename_dispatch_execute_stage_state - .sim_value_default(), - retire_stage_inputs: retire_stage_inputs.sim_value_default(), - retire_stage_state: retire_stage_state.sim_value_default(), + input_queue: input_queue.sim_value_default(), + state: state.sim_value_default(), + output_queue: output_queue.sim_value_default(), config, } } } -impl ResetSteps for StatesAndQueues> { +impl> ResetSteps for StageWithQueues { #[hdl] fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { #[hdl(sim)] let Self { - next_pc_stage_state, - br_pred_stage_inputs, - br_pred_stage_state, - fetch_decode_stage_inputs, - fetch_decode_stage_state, - post_decode_stage_inputs, - post_decode_stage_state, - post_decode_stage_outputs, - rename_dispatch_execute_stage_inputs, - rename_dispatch_execute_stage_state, - retire_stage_inputs, - retire_stage_state, + input_queue, + state, + output_queue, config: _, } = this; - let next_pc_stage_state = ResetSteps::reset_step(next_pc_stage_state, step); - let br_pred_stage_inputs = ResetSteps::reset_step(br_pred_stage_inputs, step); - let br_pred_stage_state = ResetSteps::reset_step(br_pred_stage_state, step); - let fetch_decode_stage_inputs = ResetSteps::reset_step(fetch_decode_stage_inputs, step); - let fetch_decode_stage_state = ResetSteps::reset_step(fetch_decode_stage_state, step); - let post_decode_stage_inputs = ResetSteps::reset_step(post_decode_stage_inputs, step); - let post_decode_stage_state = ResetSteps::reset_step(post_decode_stage_state, step); - let post_decode_stage_outputs = ResetSteps::reset_step(post_decode_stage_outputs, step); - let rename_dispatch_execute_stage_inputs = - ResetSteps::reset_step(rename_dispatch_execute_stage_inputs, step); - let rename_dispatch_execute_stage_state = - ResetSteps::reset_step(rename_dispatch_execute_stage_state, step); - let retire_stage_inputs = ResetSteps::reset_step(retire_stage_inputs, step); - let retire_stage_state = ResetSteps::reset_step(retire_stage_state, step); - next_pc_stage_state - .and(br_pred_stage_inputs) - .and(br_pred_stage_state) - .and(fetch_decode_stage_inputs) - .and(fetch_decode_stage_state) - .and(post_decode_stage_inputs) - .and(post_decode_stage_state) - .and(post_decode_stage_outputs) - .and(rename_dispatch_execute_stage_inputs) - .and(rename_dispatch_execute_stage_state) - .and(retire_stage_inputs) - .and(retire_stage_state) + let input_queue = ResetSteps::reset_step(input_queue, step); + let state = ResetSteps::reset_step(state, step); + let output_queue = ResetSteps::reset_step(output_queue, step); + input_queue.and(state).and(output_queue) } } -impl StatesAndQueues> { - #[hdl] - fn step_no_cancel( - this: &mut SimValue, - inputs: SimValue>>, - ) -> SimValue>>> { - #[hdl(sim)] - let NextPcStateStepInputs::<_> { - to_fetch_fetch_triggered, - to_fetch_cancel_triggered, - from_decode_inner_triggered, - post_decode_output_insns_triggered, - from_retire_inner_triggered, - } = inputs; - assert!(!*to_fetch_cancel_triggered); - #[hdl(sim)] - let Self { - next_pc_stage_state, - br_pred_stage_inputs, - br_pred_stage_state, - fetch_decode_stage_inputs, - fetch_decode_stage_state, - post_decode_stage_inputs, - post_decode_stage_state, - post_decode_stage_outputs, - rename_dispatch_execute_stage_inputs, - rename_dispatch_execute_stage_state, - retire_stage_inputs, - retire_stage_state, - config, - } = this; - let config = config.ty(); - let retval_ty = HdlOption[CancelInProgress[config]]; - let mut retval = #[hdl(sim)] - retval_ty.HdlNone(); - if Queue::capacity(br_pred_stage_inputs) - Queue::len(br_pred_stage_inputs) { - todo!() - } - #[hdl(sim)] - let StageOutput::<_, _, _> { outputs, cancel } = - Stage::run(next_pc_stage_state, &().to_sim_value()); +#[must_use] +enum StageWithQueuesRunResult> { + Success { + /// the number of outputs popped from `S::InputStages`'s output queues this clock cycle. + input_stages_outputs_popped_count: usize, + }, + Cancel { + cancel: SimValue>, + stage_cancel: SimValue>, + sibling_cancel: >::SimValueCancel, + }, +} +impl> StageWithQueues { + fn input_queue_space_left_with_sibling( + this: &SimValue, + sibling: &>::SimValueStageWithQueues, + ) -> usize { + let mut retval = Queue::space_left(&this.input_queue); + if let Some(sibling) = S::SiblingStage::sim_value_stage_with_queues_opt(sibling) { + retval = retval.min(Queue::space_left(&sibling.input_queue)); + } retval } #[hdl] - fn step_cancel( - this: &mut SimValue, - cancel_opt: &mut SimValue>>>, - inputs: SimValue>>, - ) { - #[hdl(sim)] - let NextPcStateStepInputs::<_> { - to_fetch_fetch_triggered, - to_fetch_cancel_triggered, - from_decode_inner_triggered, - post_decode_output_insns_triggered, - from_retire_inner_triggered, - } = inputs; - assert!(!*to_fetch_fetch_triggered); - #[hdl(sim)] - if let HdlSome(_) = from_decode_inner_triggered { - unreachable!(); - } - assert_eq!(**ArrayVec::len_sim(&post_decode_output_insns_triggered), 0); - #[hdl(sim)] - if let HdlSome(_) = from_retire_inner_triggered { - unreachable!(); - } + fn outputs( + this: &SimValue, + cancel: Option<&SimValue>>, + input_stages_output_queues: >::SimValueOutputQueueRefs<'_>, + sibling: &>::SimValueStageWithQueues, + ) -> SimValue> { #[hdl(sim)] let Self { - next_pc_stage_state, - br_pred_stage_inputs, - br_pred_stage_state, - fetch_decode_stage_inputs, - fetch_decode_stage_state, - post_decode_stage_inputs, - post_decode_stage_state, - post_decode_stage_outputs, - rename_dispatch_execute_stage_inputs, - rename_dispatch_execute_stage_state, - retire_stage_inputs, - retire_stage_state, - config: _, + input_queue, + state, + output_queue, + config, } = this; - let cancel = #[hdl(sim)] - match &mut *cancel_opt { - HdlSome(cancel) => cancel, - HdlNone => unreachable!(), - }; - #[hdl(sim)] - if let HdlSome(_) = CancelInProgress::to_fetch_cancel_data(cancel) { - if !*to_fetch_cancel_triggered { - return; + let config = config.ty(); + let state_ty = state.ty(); + let ty = StageWithQueuesOutputs[config][state_ty]; + let cancel_ty = ty.to_external_pipe_input_cancel; + if let Some(cancel) = cancel { + let input_queue_to_cancel = + *S::cancel_in_progress_for_stage_ref(cancel).input_queue_to_cancel; + let to_external_pipe_input_cancel = if input_queue_to_cancel > 0 { + #[hdl(sim)] + cancel_ty.HdlSome(input_queue_to_cancel.to_sim_value_with_type(cancel_ty.HdlSome)) + } else { + #[hdl(sim)] + cancel_ty.HdlNone() + }; + #[hdl(sim)] + StageWithQueuesOutputs::<_, _> { + to_external_pipe_input_input: ty.to_external_pipe_input_input.sim_value_default(), + to_external_pipe_input_cancel, + from_external_pipe_output_ready: 0usize + .to_sim_value_with_type(ty.from_external_pipe_output_ready), + } + } else { + let mut to_external_pipe_input_input = + ty.to_external_pipe_input_input.sim_value_default(); + for input in S::InputStages::peek_output_queues( + input_stages_output_queues, + Self::input_queue_space_left_with_sibling(this, sibling), + ) { + let Ok(_) = ArrayVec::try_push_sim(&mut to_external_pipe_input_input, input) else { + break; + }; + } + let outputs_limit = Queue::space_left(output_queue); + let step_limit = + outputs_limit / S::MaxOutputsPerStep::as_usize(S::max_outputs_per_step(config)); + #[hdl(sim)] + StageWithQueuesOutputs::<_, _> { + to_external_pipe_input_input, + to_external_pipe_input_cancel: #[hdl(sim)] + cancel_ty.HdlNone(), + from_external_pipe_output_ready: step_limit + .min(Queue::len(input_queue)) + .min(S::ExternalPipeIoWidth::as_usize(S::external_pipe_io_width( + config, + ))) + .to_sim_value_with_type(ty.from_external_pipe_output_ready), } } + } + #[hdl] + fn cancel( + this: &mut SimValue, + cancel: &mut SimValue>, + inputs: &SimValue>, + last_outputs: &SimValue>, + ) -> CancelResult { #[hdl(sim)] - let CancelInProgress::<_> { - cancel, - br_pred_stage_inputs_to_cancel, - br_pred_stage_cancel, - fetch_decode_stage_inputs_to_cancel, - fetch_decode_stage_cancel, - post_decode_stage_inputs_to_cancel, - post_decode_stage_cancel, - post_decode_stage_outputs_to_cancel, - rename_dispatch_execute_stage_inputs_to_cancel, - rename_dispatch_execute_stage_cancel, - retire_stage_inputs_to_cancel, - retire_stage_cancel, + let Self { + input_queue, + state, + output_queue, config: _, - } = cancel; - Stage::cancel(next_pc_stage_state, cancel); - for _ in 0..**br_pred_stage_inputs_to_cancel { - Queue::undo_push(br_pred_stage_inputs).expect("known to be non-empty"); - } - if **br_pred_stage_cancel { - Stage::cancel(br_pred_stage_state, cancel); - } - for _ in 0..**fetch_decode_stage_inputs_to_cancel { - Queue::undo_push(fetch_decode_stage_inputs).expect("known to be non-empty"); - } - if **fetch_decode_stage_cancel { - Stage::cancel(fetch_decode_stage_state, cancel); - } - for _ in 0..**post_decode_stage_inputs_to_cancel { - Queue::undo_push(post_decode_stage_inputs).expect("known to be non-empty"); - } - if **post_decode_stage_cancel { - Stage::cancel(post_decode_stage_state, cancel); - } - for _ in 0..**post_decode_stage_outputs_to_cancel { - Queue::undo_push(post_decode_stage_outputs).expect("known to be non-empty"); - } - for _ in 0..**rename_dispatch_execute_stage_inputs_to_cancel { - Queue::undo_push(rename_dispatch_execute_stage_inputs).expect("known to be non-empty"); - } - if **rename_dispatch_execute_stage_cancel { - Stage::cancel(rename_dispatch_execute_stage_state, cancel); - } - for _ in 0..**retire_stage_inputs_to_cancel { - Queue::undo_push(retire_stage_inputs).expect("known to be non-empty"); - } - if **retire_stage_cancel { - Stage::cancel(retire_stage_state, cancel); - } - *cancel_opt = #[hdl(sim)] - (cancel_opt.ty()).HdlNone(); - } -} - -#[hdl(no_static)] -pub struct NextPcState> { - states_and_queues: StatesAndQueues, - cancel: HdlOption>, -} - -impl SimValueDefault for NextPcState> { - #[hdl] - fn sim_value_default(self) -> SimValue { - let Self { - states_and_queues, - cancel, - } = self; - #[hdl(sim)] - Self { - states_and_queues: states_and_queues.sim_value_default(), - cancel: cancel.sim_value_default(), - } - } -} - -impl ResetSteps for NextPcState> { - #[hdl] - fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { - #[hdl(sim)] - let Self { - states_and_queues, - cancel, } = this; - *cancel = #[hdl(sim)] - (cancel.ty()).HdlNone(); - ResetSteps::reset_step(states_and_queues, step) + #[hdl(sim)] + let CancelInProgressForStageWithQueues::<_, _> { + cancel_state, + input_queue_to_cancel, + output_queue_to_cancel, + } = S::cancel_in_progress_for_stage_mut(cancel); + #[hdl(sim)] + let StageWithQueuesInputs::<_, _> { + to_external_pipe_input_input_ready: _, + to_external_pipe_input_cancel_ready, + from_external_pipe_output_data: _, + } = inputs; + #[hdl(sim)] + let StageWithQueuesOutputs::<_, _> { + to_external_pipe_input_input, + to_external_pipe_input_cancel, + from_external_pipe_output_ready, + } = last_outputs; + assert_eq!(**ArrayVec::len_sim(to_external_pipe_input_input), 0); + #[hdl(sim)] + if let HdlNone = to_external_pipe_input_cancel { + unreachable!(); + } + assert_eq!(**from_external_pipe_output_ready, 0); + if !**to_external_pipe_input_cancel_ready { + return CancelResult::InProgress; + } + for _ in 0..std::mem::replace(input_queue_to_cancel, 0) { + let Some(_) = Queue::undo_push(input_queue) else { + unreachable!(); + }; + } + for _ in 0..std::mem::replace(output_queue_to_cancel, 0) { + let Some(_) = Queue::undo_push(output_queue) else { + unreachable!(); + }; + } + if std::mem::replace(cancel_state, false) { + S::cancel(state, &cancel.cancel); + } + CancelResult::Done + } + #[hdl] + fn make_cancel_all( + this: &SimValue, + ) -> SimValue> { + #[hdl(sim)] + let Self { + input_queue, + state, + output_queue, + config, + } = this; + let CancelInProgressForStageWithQueues { + cancel_state: _, + input_queue_to_cancel, + output_queue_to_cancel, + } = CancelInProgressForStageWithQueues[config.ty()][state.ty()]; + #[hdl(sim)] + CancelInProgressForStageWithQueues::<_, _> { + cancel_state: true, + input_queue_to_cancel: Queue::len(input_queue) + .to_sim_value_with_type(input_queue_to_cancel), + output_queue_to_cancel: Queue::len(output_queue) + .to_sim_value_with_type(output_queue_to_cancel), + } + } + /// `sibling_already_ran` should be `true` if both there is a sibling and + /// that sibling's `run()` was already called this clock cycle. + #[hdl] + fn run( + this: &mut SimValue, + inputs: &SimValue>, + last_outputs: &SimValue>, + mut input_stages_output_queues: >::SimValueOutputQueueMuts<'_>, + sibling_already_ran: bool, + sibling: &>::SimValueStageWithQueues, + sibling_inputs: &>::SimValueStageWithQueuesInputs, + ) -> StageWithQueuesRunResult { + #[hdl(sim)] + let Self { + input_queue, + state, + output_queue, + config, + } = this; + let config = config.ty(); + #[hdl(sim)] + let StageWithQueuesInputs::<_, _> { + to_external_pipe_input_input_ready, + to_external_pipe_input_cancel_ready: _, + from_external_pipe_output_data, + } = inputs; + let to_external_pipe_input_input_ready = if let Some(sibling_inputs) = + S::SiblingStage::sim_value_stage_with_queues_inputs_opt(sibling_inputs) + { + // check sibling to keep in lock-step the feeding of items into the input queues + assert_ne!( + S::HAS_EXTERNAL_PIPE, + >::StageOrSomething::HAS_EXTERNAL_PIPE, + ); + if S::HAS_EXTERNAL_PIPE { + // ignore sibling_inputs.to_external_pipe_input_input_ready + **to_external_pipe_input_input_ready + } else { + // ignore inputs.to_external_pipe_input_input_ready + *sibling_inputs.to_external_pipe_input_input_ready + } + } else { + **to_external_pipe_input_input_ready + }; + #[hdl(sim)] + let StageWithQueuesOutputs::<_, _> { + to_external_pipe_input_input, + to_external_pipe_input_cancel, + from_external_pipe_output_ready, + } = last_outputs; + #[hdl(sim)] + if let HdlSome(_) = to_external_pipe_input_cancel { + unreachable!(); + } + let mut input_stages_outputs_popped_count = 0; + for outputs in ArrayVec::elements_sim_ref(to_external_pipe_input_input) + .iter() + .take(to_external_pipe_input_input_ready) + { + input_stages_outputs_popped_count += 1; + // only pop in one sibling + if !sibling_already_ran { + let Some(_) = S::InputStages::pop_output_queues( + S::InputStages::reborrow_output_queue_muts(&mut input_stages_output_queues), + ) else { + unreachable!(); + }; + } + let Ok(_) = Queue::try_push(input_queue, outputs) else { + unreachable!(); + }; + } + for ext_pipe_output in ArrayVec::elements_sim_ref(from_external_pipe_output_data) + .iter() + .take(**from_external_pipe_output_ready) + { + let Some(outputs) = Queue::pop(input_queue) else { + unreachable!(); + }; + #[hdl(sim)] + let StageRunOutput::<_, _> { outputs, cancel } = + S::run(state, &S::make_inputs(&outputs, ext_pipe_output)); + for output in ArrayVec::elements_sim_ref(&outputs) { + let Ok(_) = Queue::try_push(output_queue, output) else { + unreachable!(); + }; + } + // handle cancelling only after handling all outputs so the outputs aren't canceled + #[hdl(sim)] + if let HdlSome(cancel) = cancel { + // ignore the rest of the input_queue and from_external_pipe_output_data, + // it doesn't matter that they're getting ignored since we're + // cancelling all inputs anyway. + let cancel_count = Queue::len(input_queue); + let sibling_cancel = S::SiblingStage::make_sim_value_cancel( + S::SiblingStage::sim_value_stage_with_queues_opt(sibling).map(|sibling| { + // this logic assumes both this stage and the sibling stage always output + // one item for every input item when no cancels are generated. + // this logic also assumes inputs are fed in lock step into + // this stage's input queue and the sibling stage's input queue, + // and that outputs are removed in lock step from + // this stage's output queue and the sibling stage's output queue. + let sibling_cancel_count = if sibling_already_ran { + // both this stage and its sibling already pushed the same items to + // their input queues, so they are in lock-step and can use the + // same cancel count. + cancel_count + } else { + // this stage pushed input_stages_outputs_popped_count additional items to its + // input queue, but the sibling hasn't so subtract off those additional items + cancel_count - input_stages_outputs_popped_count + }; + let CancelInProgressForStageWithQueues { + cancel_state: _, + input_queue_to_cancel, + output_queue_to_cancel, + } = CancelInProgressForStageWithQueues[config.ty()][sibling.ty().state]; + let sibling_input_queue_len = Queue::len(&sibling.input_queue); + #[hdl(sim)] + CancelInProgressForStageWithQueues::<_, _> { + // cancel the state if we cancel the whole input queue + cancel_state: sibling_cancel_count >= sibling_input_queue_len, + input_queue_to_cancel: sibling_cancel_count + .min(sibling_input_queue_len) + .to_sim_value_with_type(input_queue_to_cancel), + output_queue_to_cancel: sibling_cancel_count + .saturating_sub(sibling_input_queue_len) + .to_sim_value_with_type(output_queue_to_cancel), + } + }), + ); + let CancelInProgressForStageWithQueues { + cancel_state: _, + input_queue_to_cancel, + output_queue_to_cancel, + } = CancelInProgressForStageWithQueues[config.ty()][state.ty()]; + return StageWithQueuesRunResult::Cancel { + cancel, + stage_cancel: #[hdl(sim)] + CancelInProgressForStageWithQueues::<_, _> { + cancel_state: false, + input_queue_to_cancel: cancel_count + .to_sim_value_with_type(input_queue_to_cancel), + output_queue_to_cancel: 0usize + .to_sim_value_with_type(output_queue_to_cancel), + }, + sibling_cancel, + }; + } + } + StageWithQueuesRunResult::Success { + input_stages_outputs_popped_count, + } } } #[hdl(no_static)] -struct NextPcStateOutputs> { - to_fetch_fetch_data: HdlOption, - to_fetch_cancel_data: - HdlOption, CpuConfigMaxFetchesInFlight>>, - from_decode_inner_ready: Bool, - post_decode_output_insns: ArrayVec>, - from_retire_inner_ready: Bool, +struct CancelInProgress + PhantomConstCpuConfig> { + cancel: Cancel, + next_pc: CancelInProgressForStageWithQueues>, + br_pred: CancelInProgressForStageWithQueues>, + fetch_decode: CancelInProgressForStageWithQueues>, + post_decode: CancelInProgressForStageWithQueues>, + execute_retire: CancelInProgressForStageWithQueues>, config: C, } #[hdl(no_static)] -struct NextPcStateStepInputs> { - to_fetch_fetch_triggered: Bool, - to_fetch_cancel_triggered: Bool, - from_decode_inner_triggered: HdlOption>, - post_decode_output_insns_triggered: ArrayVec>, - from_retire_inner_triggered: HdlOption>, +struct AllStages + PhantomConstCpuConfig> { + next_pc: StageWithQueues>, + br_pred: StageWithQueues>, + fetch_decode: StageWithQueues>, + post_decode: StageWithQueues>, + execute_retire: StageWithQueues>, + config: C, } -impl NextPcState> { +impl SimValueDefault for AllStages { #[hdl] - fn outputs(this: &SimValue) -> SimValue>> { + fn sim_value_default(self) -> SimValue { + let Self { + next_pc, + br_pred, + fetch_decode, + post_decode, + execute_retire, + config, + } = self; + #[hdl(sim)] + Self { + next_pc: next_pc.sim_value_default(), + br_pred: br_pred.sim_value_default(), + fetch_decode: fetch_decode.sim_value_default(), + post_decode: post_decode.sim_value_default(), + execute_retire: execute_retire.sim_value_default(), + config, + } + } +} + +impl ResetSteps for AllStages { + #[hdl] + fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { #[hdl(sim)] let Self { - states_and_queues, - cancel, - } = this; - #[hdl(sim)] - let StatesAndQueues::<_> { - next_pc_stage_state: _, - br_pred_stage_inputs: _, - br_pred_stage_state: _, - fetch_decode_stage_inputs, - fetch_decode_stage_state: _, - post_decode_stage_inputs, - post_decode_stage_state: _, - post_decode_stage_outputs, - rename_dispatch_execute_stage_inputs: _, - rename_dispatch_execute_stage_state: _, - retire_stage_inputs, - retire_stage_state: _, - config, - } = states_and_queues; - let config = config.ty(); - let NextPcStateOutputs { - to_fetch_fetch_data: _, - to_fetch_cancel_data: to_fetch_cancel_data_ty, - from_decode_inner_ready: _, - post_decode_output_insns: post_decode_output_insns_ty, - from_retire_inner_ready: _, + next_pc, + br_pred, + fetch_decode, + post_decode, + execute_retire, config: _, - } = NextPcStateOutputs[config]; + } = this; + let next_pc = ResetSteps::reset_step(next_pc, step); + let br_pred = ResetSteps::reset_step(br_pred, step); + let fetch_decode = ResetSteps::reset_step(fetch_decode, step); + let post_decode = ResetSteps::reset_step(post_decode, step); + let execute_retire = ResetSteps::reset_step(execute_retire, step); + next_pc + .and(br_pred) + .and(fetch_decode) + .and(post_decode) + .and(execute_retire) + } +} + +#[hdl(no_static)] +struct AllStagesOutputs + PhantomConstCpuConfig> { + next_pc: StageWithQueuesOutputs>, + br_pred: StageWithQueuesOutputs>, + fetch_decode: StageWithQueuesOutputs>, + post_decode: StageWithQueuesOutputs>, + execute_retire: StageWithQueuesOutputs>, + config: C, +} + +#[hdl(no_static)] +struct AllStagesInputs + PhantomConstCpuConfig> { + next_pc: StageWithQueuesInputs>, + br_pred: StageWithQueuesInputs>, + fetch_decode: StageWithQueuesInputs>, + post_decode: StageWithQueuesInputs>, + execute_retire: StageWithQueuesInputs>, + config: C, +} + +impl AllStages { + #[hdl] + fn outputs( + this: &SimValue, + cancel: Option<&SimValue>>, + ) -> SimValue> { #[hdl(sim)] - if let HdlSome(cancel) = cancel { - #[hdl(sim)] - NextPcStateOutputs::<_> { - to_fetch_fetch_data: #[hdl(sim)] - HdlNone(), - to_fetch_cancel_data: CancelInProgress::to_fetch_cancel_data(cancel), - from_decode_inner_ready: false, - post_decode_output_insns: post_decode_output_insns_ty.sim_value_default(), - from_retire_inner_ready: false, - config, - } - } else { - let to_fetch_fetch_data = if let Some(data) = Queue::peek(fetch_decode_stage_inputs) { - #[hdl(sim)] - HdlSome( - #[hdl(sim)] - NextPcToFetchInterfaceInner { - start_pc: data.start_pc, - fetch_block_id: data.fetch_block_id, - }, - ) - } else { - #[hdl(sim)] - HdlNone() - }; - let mut post_decode_output_insns = - post_decode_output_insns_ty.new_sim(WipDecodedInsn.sim_value_default()); - let mut post_decode_stage_outputs = post_decode_stage_outputs.clone(); - while let Some(post_decode_stage_output) = Queue::pop(&mut post_decode_stage_outputs) { - #[hdl(sim)] - let PostDecodeStageOutput::<_> { - insn, - next_pc: _, - btb_entry_index: _, - start_branch_history: _, - start_call_stack: _, - branch_predictor_index: _, - config: _, - } = post_decode_stage_output; - ArrayVec::try_push_sim(&mut post_decode_output_insns, insn).expect("known to fit"); - } - #[hdl(sim)] - NextPcStateOutputs::<_> { - to_fetch_fetch_data, - to_fetch_cancel_data: to_fetch_cancel_data_ty.HdlNone(), - from_decode_inner_ready: !Queue::is_full(post_decode_stage_inputs), - post_decode_output_insns, - from_retire_inner_ready: !Queue::is_full(retire_stage_inputs), - config, - } + let Self { + next_pc, + br_pred, + fetch_decode, + post_decode, + execute_retire, + config, + } = this; + let config = config.ty(); + #[hdl(sim)] + AllStagesOutputs::<_> { + next_pc: StageWithQueues::outputs(next_pc, cancel, (), &()), + br_pred: StageWithQueues::outputs(br_pred, cancel, &next_pc.output_queue, fetch_decode), + fetch_decode: StageWithQueues::outputs( + fetch_decode, + cancel, + &next_pc.output_queue, + br_pred, + ), + post_decode: StageWithQueues::outputs( + post_decode, + cancel, + (&fetch_decode.output_queue, &br_pred.output_queue), + &(), + ), + execute_retire: StageWithQueues::outputs( + execute_retire, + cancel, + &post_decode.output_queue, + &(), + ), + config, } } #[hdl] - fn step( + fn cancel( this: &mut SimValue, - inputs: SimValue>>, + cancel: &mut SimValue>, + inputs: &SimValue>, + last_outputs: &SimValue>, + ) -> CancelResult { + #[hdl(sim)] + let Self { + next_pc, + br_pred, + fetch_decode, + post_decode, + execute_retire, + config: _, + } = this; + let next_pc = + StageWithQueues::cancel(next_pc, cancel, &inputs.next_pc, &last_outputs.next_pc); + let br_pred = + StageWithQueues::cancel(br_pred, cancel, &inputs.br_pred, &last_outputs.br_pred); + let fetch_decode = StageWithQueues::cancel( + fetch_decode, + cancel, + &inputs.fetch_decode, + &last_outputs.fetch_decode, + ); + let post_decode = StageWithQueues::cancel( + post_decode, + cancel, + &inputs.post_decode, + &last_outputs.post_decode, + ); + let execute_retire = StageWithQueues::cancel( + execute_retire, + cancel, + &inputs.execute_retire, + &last_outputs.execute_retire, + ); + match (next_pc, br_pred, fetch_decode, post_decode, execute_retire) { + ( + CancelResult::Done, + CancelResult::Done, + CancelResult::Done, + CancelResult::Done, + CancelResult::Done, + ) => CancelResult::Done, + _ => CancelResult::InProgress, + } + } + #[hdl] + fn run( + this: &mut SimValue, + inputs: &SimValue>, + last_outputs: &SimValue>, + ) -> Result<(), SimValue>> { + #[hdl(sim)] + let Self { + next_pc, + br_pred, + fetch_decode, + post_decode, + execute_retire, + config, + } = this; + let config = config.ty(); + let cancel_ty = CancelInProgress[config]; + match StageWithQueues::run( + execute_retire, + &inputs.execute_retire, + &last_outputs.execute_retire, + &mut post_decode.output_queue, + false, + &(), + &(), + ) { + StageWithQueuesRunResult::Cancel { + cancel, + stage_cancel, + sibling_cancel: (), + } => { + return Err( + #[hdl(sim)] + CancelInProgress::<_> { + cancel, + next_pc: StageWithQueues::make_cancel_all(next_pc), + br_pred: StageWithQueues::make_cancel_all(br_pred), + fetch_decode: StageWithQueues::make_cancel_all(fetch_decode), + post_decode: StageWithQueues::make_cancel_all(post_decode), + execute_retire: stage_cancel, + config, + }, + ); + } + StageWithQueuesRunResult::Success { + input_stages_outputs_popped_count: _, + } => {} + } + match StageWithQueues::run( + post_decode, + &inputs.post_decode, + &last_outputs.post_decode, + (&mut fetch_decode.output_queue, &mut br_pred.output_queue), + false, + &(), + &(), + ) { + StageWithQueuesRunResult::Cancel { + cancel, + stage_cancel, + sibling_cancel: (), + } => { + return Err( + #[hdl(sim)] + CancelInProgress::<_> { + cancel, + next_pc: StageWithQueues::make_cancel_all(next_pc), + br_pred: StageWithQueues::make_cancel_all(br_pred), + fetch_decode: StageWithQueues::make_cancel_all(fetch_decode), + post_decode: stage_cancel, + execute_retire: cancel_ty.execute_retire.nothing_to_cancel(), + config, + }, + ); + } + StageWithQueuesRunResult::Success { + input_stages_outputs_popped_count: _, + } => {} + } + let next_pc_popped_count = match StageWithQueues::run( + fetch_decode, + &inputs.fetch_decode, + &last_outputs.fetch_decode, + &mut next_pc.output_queue, + false, + br_pred, + &inputs.br_pred, + ) { + StageWithQueuesRunResult::Cancel { + cancel, + stage_cancel, + sibling_cancel, + } => { + return Err( + #[hdl(sim)] + CancelInProgress::<_> { + cancel, + next_pc: StageWithQueues::make_cancel_all(next_pc), + br_pred: sibling_cancel, + fetch_decode: stage_cancel, + post_decode: cancel_ty.post_decode.nothing_to_cancel(), + execute_retire: cancel_ty.execute_retire.nothing_to_cancel(), + config, + }, + ); + } + StageWithQueuesRunResult::Success { + input_stages_outputs_popped_count, + } => input_stages_outputs_popped_count, + }; + match StageWithQueues::run( + br_pred, + &inputs.br_pred, + &last_outputs.br_pred, + &mut next_pc.output_queue, + true, + fetch_decode, + &inputs.fetch_decode, + ) { + StageWithQueuesRunResult::Cancel { + cancel, + stage_cancel, + sibling_cancel, + } => { + return Err( + #[hdl(sim)] + CancelInProgress::<_> { + cancel, + next_pc: StageWithQueues::make_cancel_all(next_pc), + br_pred: stage_cancel, + fetch_decode: sibling_cancel, + post_decode: cancel_ty.post_decode.nothing_to_cancel(), + execute_retire: cancel_ty.execute_retire.nothing_to_cancel(), + config, + }, + ); + } + StageWithQueuesRunResult::Success { + input_stages_outputs_popped_count, + } => { + assert_eq!(next_pc_popped_count, input_stages_outputs_popped_count); + } + } + match StageWithQueues::run( + next_pc, + &inputs.next_pc, + &last_outputs.next_pc, + (), + false, + &(), + &(), + ) { + StageWithQueuesRunResult::Cancel { + cancel, + stage_cancel, + sibling_cancel: (), + } => { + return Err( + #[hdl(sim)] + CancelInProgress::<_> { + cancel, + next_pc: stage_cancel, + br_pred: cancel_ty.br_pred.nothing_to_cancel(), + fetch_decode: cancel_ty.fetch_decode.nothing_to_cancel(), + post_decode: cancel_ty.post_decode.nothing_to_cancel(), + execute_retire: cancel_ty.execute_retire.nothing_to_cancel(), + config, + }, + ); + } + StageWithQueuesRunResult::Success { + input_stages_outputs_popped_count: _, + } => {} + } + for _ in 0..config.get().fetch_width.get() { + let Some(execute_retire_output) = Queue::pop(&mut execute_retire.output_queue) else { + break; + }; + #[hdl(sim)] + let ExecuteRetireStageOutput::<_> { + train_branch_predictor, + config: _, + } = &execute_retire_output; + #[hdl(sim)] + if let HdlSome(train_branch_predictor) = train_branch_predictor { + BrPredStageState::train_branch_predictor( + &mut br_pred.state, + train_branch_predictor, + ); + // for now we only retire one conditional branch per clock cycle + // TODO: maybe improve later? + break; + } + } + Ok(()) + } +} + +#[hdl(no_static)] +pub struct NextPcState + PhantomConstCpuConfig> { + all_stages: AllStages, + cancel: HdlOption>, + config: C, +} + +impl SimValueDefault for NextPcState { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + all_stages, + cancel, + config, + } = self; + #[hdl(sim)] + Self { + all_stages: all_stages.sim_value_default(), + cancel: cancel.sim_value_default(), + config, + } + } +} + +impl ResetSteps for NextPcState { + #[hdl] + fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { + all_stages, + cancel, + config: _, + } = this; + *cancel = #[hdl(sim)] + (cancel.ty()).HdlNone(); + ResetSteps::reset_step(all_stages, step) + } +} + +impl NextPcState { + #[hdl] + fn outputs(this: &SimValue) -> SimValue> { + #[hdl(sim)] + let Self { + all_stages, + cancel, + config: _, + } = this; + let cancel = #[hdl(sim)] + match cancel { + HdlSome(cancel) => Some(cancel), + HdlNone => None, + }; + AllStages::outputs(all_stages, cancel) + } + #[hdl] + fn run( + this: &mut SimValue, + inputs: &SimValue>, + last_outputs: &SimValue>, ) { #[hdl(sim)] let Self { - states_and_queues, + all_stages, cancel, + config: _, } = this; #[hdl(sim)] - if let HdlSome(_) = &cancel { - StatesAndQueues::step_cancel(states_and_queues, cancel, inputs); + if let HdlSome(cancel_in_progress) = &mut *cancel { + match AllStages::cancel(all_stages, cancel_in_progress, inputs, last_outputs) { + CancelResult::Done => { + *cancel = #[hdl(sim)] + (cancel.ty()).HdlNone(); + } + CancelResult::InProgress => {} + } } else { - *cancel = StatesAndQueues::step_no_cancel(states_and_queues, inputs); + match AllStages::run(all_stages, inputs, last_outputs) { + Ok(()) => {} + Err(cancel_in_progress) => { + *cancel = #[hdl(sim)] + (cancel.ty()).HdlSome(cancel_in_progress); + } + } } } } +fn uint_in_range_inclusive_max( + ty: UIntInRangeInclusiveType, +) -> SimValue> { + End::as_usize(ty.end()).to_sim_value_with_type(ty) +} + #[hdl_module(extern)] pub fn next_pc(config: PhantomConst) { #[hdl] @@ -2833,6 +3886,7 @@ pub fn next_pc(config: PhantomConst) { state_expr: Expr>>, ) { let mut state = sim.read(state_expr).await; + let config = state.config.ty(); for step in 0usize.. { sim.write(state_expr, state).await; sim.wait_for_clock_edge(cd.clk).await; @@ -2844,70 +3898,104 @@ pub fn next_pc(config: PhantomConst) { } } loop { - #[hdl(sim)] - let NextPcStateOutputs::<_> { - to_fetch_fetch_data, - to_fetch_cancel_data, - from_decode_inner_ready, - post_decode_output_insns, - from_retire_inner_ready, - config: _, - } = NextPcState::outputs(&state); + let outputs = NextPcState::outputs(&state); + let to_fetch_fetch_data = #[hdl(sim)] + if let HdlSome(v) = + ArrayVec::into_opt_sim(&outputs.fetch_decode.to_external_pipe_input_input) + { + #[hdl(sim)] + HdlSome( + #[hdl(sim)] + NextPcToFetchInterfaceInner { + start_pc: v.start_pc, + fetch_block_id: v.fetch_block_id, + }, + ) + } else { + #[hdl(sim)] + HdlNone() + }; sim.write(to_fetch.fetch.data, to_fetch_fetch_data).await; - sim.write(to_fetch.cancel.data, to_fetch_cancel_data).await; - sim.write(from_decode.inner.ready, from_decode_inner_ready) - .await; - sim.write(post_decode_output.insns, post_decode_output_insns) - .await; - sim.write(from_retire.inner.ready, from_retire_inner_ready) - .await; + sim.write( + to_fetch.cancel.data, + &outputs.fetch_decode.to_external_pipe_input_cancel, + ) + .await; + sim.write( + from_decode.inner.ready, + *outputs.fetch_decode.from_external_pipe_output_ready != 0, + ) + .await; + sim.write( + post_decode_output.insns, + ArrayVec::map_sim( + &outputs.execute_retire.to_external_pipe_input_input, + WipDecodedInsn.sim_value_default(), + |_, v| v.insn.clone(), + ), + ) + .await; + sim.write( + from_retire.inner.ready, + *outputs.execute_retire.from_external_pipe_output_ready + >= config.get().fetch_width.get(), + ) + .await; sim.write(state_expr, state).await; sim.wait_for_clock_edge(cd.clk).await; state = sim.read_past(state_expr, cd.clk).await; - let to_fetch_fetch_triggered = + let AllStagesInputs { + next_pc, + br_pred, + fetch_decode, + post_decode, + execute_retire, + config: _, + } = AllStagesInputs[config]; + let fetch_ready: SimValue> = sim + .read_past(to_fetch.fetch.ready, cd.clk) + .await + .cast_to_static::>() + .cast_to(fetch_decode.to_external_pipe_input_input_ready); + let fetch_cancel_ready = sim.read_past(to_fetch.cancel.ready, cd.clk).await; + let decode_data = sim.read_past(from_decode.inner.data, cd.clk).await; + let post_decode_output_ready = sim.read_past(post_decode_output.ready, cd.clk).await; + let retire_data = #[hdl(sim)] - if let HdlSome(_) = sim.read_past(to_fetch.fetch.data, cd.clk).await { - *sim.read_past(to_fetch.fetch.ready, cd.clk).await - } else { - false - }; - let to_fetch_cancel_triggered = - #[hdl(sim)] - if let HdlSome(_) = sim.read_past(to_fetch.cancel.data, cd.clk).await { - *sim.read_past(to_fetch.cancel.ready, cd.clk).await - } else { - false - }; - let from_decode_inner_triggered = - if *sim.read_past(from_decode.inner.ready, cd.clk).await { - sim.read_past(from_decode.inner.data, cd.clk).await - } else { + if let HdlSome(data) = sim.read_past(from_retire.inner.data, cd.clk).await { #[hdl(sim)] - (from_decode.ty().inner.data).HdlNone() - }; - let mut post_decode_output_insns_triggered = - sim.read_past(post_decode_output.insns, cd.clk).await; - ArrayVec::truncate_sim( - &mut post_decode_output_insns_triggered, - *sim.read_past(post_decode_output.ready, cd.clk).await, - ); - let from_retire_inner_triggered = - if *sim.read_past(from_retire.inner.ready, cd.clk).await { - sim.read_past(from_retire.inner.data, cd.clk).await + let RetireToNextPcInterfaceInner::<_> { insns, config: _ } = data; + insns } else { - #[hdl(sim)] - (from_retire.ty().inner.data).HdlNone() + execute_retire + .from_external_pipe_output_data + .sim_value_default() }; - NextPcState::step( + NextPcState::run( &mut state, - #[hdl(sim)] - NextPcStateStepInputs::<_> { - to_fetch_fetch_triggered, - to_fetch_cancel_triggered, - from_decode_inner_triggered, - post_decode_output_insns_triggered, - from_retire_inner_triggered, + &#[hdl(sim)] + AllStagesInputs::<_> { + next_pc: next_pc.no_external_pipe(), + br_pred: br_pred.no_external_pipe(), + fetch_decode: #[hdl(sim)] + StageWithQueuesInputs::<_, _> { + to_external_pipe_input_input_ready: fetch_ready, + to_external_pipe_input_cancel_ready: fetch_cancel_ready, + from_external_pipe_output_data: ArrayVec::from_opt_sim( + decode_data, + DecodeToPostDecodeInterfaceInner[config].sim_value_default(), + ), + }, + post_decode: post_decode.no_external_pipe(), + execute_retire: #[hdl(sim)] + StageWithQueuesInputs::<_, _> { + to_external_pipe_input_input_ready: post_decode_output_ready, + to_external_pipe_input_cancel_ready: true, + from_external_pipe_output_data: retire_data, + }, + config, }, + &outputs, ); } } diff --git a/crates/cpu/src/util/array_vec.rs b/crates/cpu/src/util/array_vec.rs index 0f6fc6f..71275b1 100644 --- a/crates/cpu/src/util/array_vec.rs +++ b/crates/cpu/src/util/array_vec.rs @@ -190,6 +190,35 @@ impl ArrayVec { mapped_array_vec } #[hdl] + pub fn map_sim( + this: impl ToSimValue, + uninit_element: impl ToSimValue, + mut f: impl FnMut(usize, SimValue) -> SimValue, + ) -> SimValue> { + let this = this.into_sim_value(); + let uninit_element = uninit_element.into_sim_value(); + let ty = this.ty().mapped_ty(uninit_element.ty()); + #[hdl(sim)] + let Self { elements, len } = this; + #[hdl(sim)] + ArrayVec::<_, _> { + elements: SimValue::from_array_elements( + ty.elements, + SimValue::into_value(elements) + .into_iter() + .enumerate() + .map(|(index, element)| { + if index < *len { + f(index, element) + } else { + uninit_element.clone() + } + }), + ), + len, + } + } + #[hdl] pub fn as_array_of_options(this: impl ToExpr) -> Expr, N>> { let this = this.to_expr(); #[hdl] @@ -217,3 +246,34 @@ where as ExprIndex>::expr_index(&this.elements, index) } } + +impl ArrayVec> { + #[hdl] + pub fn from_opt_sim( + opt: impl ToSimValue>, + uninit_element: impl ToSimValueWithType, + ) -> SimValue { + let opt = opt.into_sim_value(); + let ty = ArrayVec[opt.ty().HdlSome][ConstUsize]; + #[hdl(sim)] + match opt { + HdlSome(v) => ty.new_full_sim([v]), + HdlNone => ty.new_sim(uninit_element), + } + } + #[hdl] + pub fn into_opt_sim(this: impl ToSimValue) -> SimValue> { + let this = this.into_sim_value(); + #[hdl(sim)] + let Self { elements, len } = this; + let [element] = SimValue::into_value(elements); + let ty = HdlOption[element.ty()]; + if *len == 0 { + #[hdl(sim)] + ty.HdlNone() + } else { + #[hdl(sim)] + ty.HdlSome(element) + } + } +} diff --git a/crates/cpu/tests/next_pc.rs b/crates/cpu/tests/next_pc.rs index 9af026d..7181f51 100644 --- a/crates/cpu/tests/next_pc.rs +++ b/crates/cpu/tests/next_pc.rs @@ -78,7 +78,7 @@ const DEMO_ILLEGAL_INSN_TRAP: u64 = 0xFF000000u64; #[hdl] struct FetchPipeQueueEntry { - fetch_pc: UInt<64>, + start_pc: UInt<64>, cycles_left: UInt<8>, fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>, } @@ -88,7 +88,7 @@ impl FetchPipeQueueEntry { fn default_sim(self) -> SimValue { #[hdl(sim)] FetchPipeQueueEntry { - fetch_pc: 0u64, + start_pc: 0u64, cycles_left: 0u8, fetch_block_id: 0u8, } @@ -129,7 +129,8 @@ fn mock_fetch_pipe(config: PhantomConst) { sim.resettable( cd, async |mut sim| { - sim.write(from_fetch.inner.ready, false).await; + sim.write(from_fetch.fetch.ready, false).await; + sim.write(from_fetch.cancel.ready, false).await; sim.write( to_post_decode.inner.data, to_post_decode.ty().inner.data.HdlNone(), @@ -179,21 +180,21 @@ fn mock_fetch_pipe(config: PhantomConst) { if let Some(front) = queue.front().filter(|v| v.cycles_left.as_int() == 0) { #[hdl(sim)] let FetchPipeQueueEntry { - fetch_pc, + start_pc, cycles_left: _, fetch_block_id, } = front; - let fetch_pc = fetch_pc.as_int(); - let fetch_end = - (fetch_pc + 1).next_multiple_of(config.get().fetch_width_in_bytes() as u64); + let start_pc = start_pc.as_int(); + let end_pc = + (start_pc + 1).next_multiple_of(config.get().fetch_width_in_bytes() as u64); let insns = to_post_decode.ty().inner.data.HdlSome.insns; let zeroed_insn = UInt[insns.element().canonical().bit_width()] .zero() .cast_bits_to(insns.element()); let mut insns = insns.new_sim(zeroed_insn); - let mut expected_pc = fetch_pc; + let mut expected_pc = start_pc; // TODO: handle instructions that go past the end of a fetch block - for (pc, insn) in mock_insns.fetch_block(fetch_pc..fetch_end) { + for (pc, insn) in mock_insns.fetch_block(start_pc..end_pc) { let next_pc = pc + insn.byte_len(); if pc != expected_pc { break; @@ -226,7 +227,7 @@ fn mock_fetch_pipe(config: PhantomConst) { WipDecodedInsn { fetch_block_id, id: next_id.cast_to_static::>(), - pc: fetch_pc, + pc: start_pc, size_in_bytes: 0u8.cast_to_static::>(), kind: WipDecodedInsnKind.Interrupt(DEMO_ILLEGAL_INSN_TRAP), }, @@ -250,8 +251,9 @@ fn mock_fetch_pipe(config: PhantomConst) { ) .await; } - sim.write(from_fetch.inner.ready, queue.len() < FETCH_PIPE_QUEUE_SIZE) + sim.write(from_fetch.fetch.ready, queue.len() < FETCH_PIPE_QUEUE_SIZE) .await; + sim.write(from_fetch.cancel.ready, true).await; sim.wait_for_clock_edge(cd.clk).await; if sim.read_past_bool(to_post_decode.inner.ready, cd.clk).await { #[hdl(sim)] @@ -264,25 +266,31 @@ fn mock_fetch_pipe(config: PhantomConst) { entry.cycles_left = (entry.cycles_left.as_int() - 1u8).to_sim_value(); } } - if !sim.read_past_bool(from_fetch.inner.ready, cd.clk).await { - continue; - } + // handle cancels before pushing new fetch op #[hdl(sim)] - if let HdlSome(inner) = sim.read_past(from_fetch.inner.data, cd.clk).await { - #[hdl(sim)] - let NextPcToFetchInterfaceInner { - next_fetch_pc, - fetch_block_id, - in_progress_fetches_to_cancel, - } = &inner; + if let HdlSome(in_progress_fetches_to_cancel) = + sim.read_past(from_fetch.cancel.data, cd.clk).await + { // cancel in-progress fetches from newest to oldest - for _ in 0..in_progress_fetches_to_cancel.as_int() { + for _ in 0..*in_progress_fetches_to_cancel { let _ = queue.pop_back(); } + } + if !sim.read_past_bool(from_fetch.fetch.ready, cd.clk).await { + continue; + } + // handle pushing new fetch op after handling cancels + #[hdl(sim)] + if let HdlSome(inner) = sim.read_past(from_fetch.fetch.data, cd.clk).await { + #[hdl(sim)] + let NextPcToFetchInterfaceInner { + start_pc, + fetch_block_id, + } = &inner; queue.push_back( #[hdl(sim)] FetchPipeQueueEntry { - fetch_pc: next_fetch_pc, + start_pc, cycles_left: FetchPipeQueueEntry::get_next_delay(delay_sequence_index), fetch_block_id, },