fix queueing errors

This commit is contained in:
Jacob Lifshay 2025-12-15 00:47:53 -08:00
parent 8ab63f3c6a
commit 84e4fde512
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
3 changed files with 20332 additions and 15680 deletions

View file

@ -24,9 +24,9 @@ use fayalite::{
prelude::*, prelude::*,
sim::value::SimOnlyValueTrait, sim::value::SimOnlyValueTrait,
ty::StaticType, ty::StaticType,
util::ready_valid::ReadyValid, util::{DebugAsDisplay, ready_valid::ReadyValid},
}; };
use std::borrow::Cow; use std::{borrow::Cow, fmt};
pub const FETCH_BLOCK_ID_WIDTH: usize = FetchBlockIdInt::BITS as usize; pub const FETCH_BLOCK_ID_WIDTH: usize = FetchBlockIdInt::BITS as usize;
type FetchBlockIdInt = u8; type FetchBlockIdInt = u8;
@ -246,6 +246,7 @@ pub struct RetireToNextPcInterfaceInner<C: PhantomConstGet<CpuConfig>> {
/// branch instruction is mis-speculated. /// branch instruction is mis-speculated.
pub struct RetireToNextPcInterface<C: PhantomConstGet<CpuConfig>> { pub struct RetireToNextPcInterface<C: PhantomConstGet<CpuConfig>> {
pub inner: ReadyValid<RetireToNextPcInterfaceInner<C>>, pub inner: ReadyValid<RetireToNextPcInterfaceInner<C>>,
pub next_insn_ids: ArrayVec<UInt<12>, CpuConfigRobSize<C>>,
} }
#[hdl(no_static)] #[hdl(no_static)]
@ -311,8 +312,8 @@ struct StageRunOutput<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig, S:
trait Stages<C: PhantomConstCpuConfig>: Type { trait Stages<C: PhantomConstCpuConfig>: Type {
type Outputs: Type + SimValueDefault; type Outputs: Type + SimValueDefault;
type SimValueOutputQueueRefs<'a>: 'a + Copy; type SimValueOutputQueueRefs<'a>: 'a + Copy + fmt::Debug;
type SimValueOutputQueueMuts<'a>: 'a; type SimValueOutputQueueMuts<'a>: 'a + fmt::Debug;
fn outputs_ty(config: C) -> Self::Outputs; fn outputs_ty(config: C) -> Self::Outputs;
fn reborrow_output_queues_as_refs<'a>( fn reborrow_output_queues_as_refs<'a>(
output_queues: &'a Self::SimValueOutputQueueMuts<'_>, output_queues: &'a Self::SimValueOutputQueueMuts<'_>,
@ -329,6 +330,7 @@ trait Stages<C: PhantomConstCpuConfig>: Type {
output_queues: Self::SimValueOutputQueueMuts<'_>, output_queues: Self::SimValueOutputQueueMuts<'_>,
) -> Option<SimValue<Self::Outputs>>; ) -> Option<SimValue<Self::Outputs>>;
fn visit_sim_value_ref<V: StagesVisitSimValueRef<C>>(this: &SimValue<Self>, visitor: &mut V); fn visit_sim_value_ref<V: StagesVisitSimValueRef<C>>(this: &SimValue<Self>, visitor: &mut V);
fn dump_output_items(outputs: &SimValue<Self::Outputs>) -> String;
} }
impl<C: PhantomConstCpuConfig> Stages<C> for () { impl<C: PhantomConstCpuConfig> Stages<C> for () {
@ -377,14 +379,30 @@ impl<C: PhantomConstCpuConfig> Stages<C> for () {
#[hdl(sim)] #[hdl(sim)]
let () = this; let () = this;
} }
#[hdl]
fn dump_output_items(outputs: &SimValue<Self::Outputs>) -> String {
#[hdl(sim)]
let () = outputs;
String::from("()")
}
} }
impl<C: PhantomConstCpuConfig, S1: Stage<C>> Stages<C> for S1 { impl<C: PhantomConstCpuConfig, S1: Stage<C>> Stages<C> for S1 {
type Outputs = S1::Output; type Outputs = S1::Output;
type SimValueOutputQueueRefs<'a> = type SimValueOutputQueueRefs<'a> = &'a SimValue<
&'a SimValue<Queue<StageOutput<C, S1>, StageOutputQueueSize<C, S1>>>; Queue<
type SimValueOutputQueueMuts<'a> = StageOutput<C, S1>,
&'a mut SimValue<Queue<StageOutput<C, S1>, StageOutputQueueSize<C, S1>>>; StageOutputQueueSize<C, S1>,
StageWithQueuesOutputQueueName<StageName<C, S1>>,
>,
>;
type SimValueOutputQueueMuts<'a> = &'a mut SimValue<
Queue<
StageOutput<C, S1>,
StageOutputQueueSize<C, S1>,
StageWithQueuesOutputQueueName<StageName<C, S1>>,
>,
>;
fn outputs_ty(config: C) -> Self::Outputs { fn outputs_ty(config: C) -> Self::Outputs {
S1::output_ty(config) S1::output_ty(config)
} }
@ -402,7 +420,7 @@ impl<C: PhantomConstCpuConfig, S1: Stage<C>> Stages<C> for S1 {
output_queues: Self::SimValueOutputQueueRefs<'_>, output_queues: Self::SimValueOutputQueueRefs<'_>,
max_peek_len: usize, max_peek_len: usize,
) -> impl Iterator<Item = SimValue<Self::Outputs>> { ) -> impl Iterator<Item = SimValue<Self::Outputs>> {
Queue::peek_iter(output_queues).take(max_peek_len) Queue::peek_iter(output_queues).take(max_peek_len).cloned()
} }
fn pop_output_queues( fn pop_output_queues(
output_queues: Self::SimValueOutputQueueMuts<'_>, output_queues: Self::SimValueOutputQueueMuts<'_>,
@ -412,17 +430,45 @@ impl<C: PhantomConstCpuConfig, S1: Stage<C>> Stages<C> for S1 {
fn visit_sim_value_ref<V: StagesVisitSimValueRef<C>>(this: &SimValue<Self>, visitor: &mut V) { fn visit_sim_value_ref<V: StagesVisitSimValueRef<C>>(this: &SimValue<Self>, visitor: &mut V) {
visitor.visit(this); visitor.visit(this);
} }
#[hdl]
fn dump_output_items(outputs: &SimValue<Self::Outputs>) -> String {
S1::dump_output_item(outputs)
}
} }
impl<C: PhantomConstCpuConfig, S1: Stage<C>, S2: Stage<C>> Stages<C> for (S1, S2) { impl<C: PhantomConstCpuConfig, S1: Stage<C>, S2: Stage<C>> Stages<C> for (S1, S2) {
type Outputs = (S1::Output, S2::Output); type Outputs = (S1::Output, S2::Output);
type SimValueOutputQueueRefs<'a> = ( type SimValueOutputQueueRefs<'a> = (
&'a SimValue<Queue<StageOutput<C, S1>, StageOutputQueueSize<C, S1>>>, &'a SimValue<
&'a SimValue<Queue<StageOutput<C, S2>, StageOutputQueueSize<C, S2>>>, Queue<
StageOutput<C, S1>,
StageOutputQueueSize<C, S1>,
StageWithQueuesOutputQueueName<StageName<C, S1>>,
>,
>,
&'a SimValue<
Queue<
StageOutput<C, S2>,
StageOutputQueueSize<C, S2>,
StageWithQueuesOutputQueueName<StageName<C, S2>>,
>,
>,
); );
type SimValueOutputQueueMuts<'a> = ( type SimValueOutputQueueMuts<'a> = (
&'a mut SimValue<Queue<StageOutput<C, S1>, StageOutputQueueSize<C, S1>>>, &'a mut SimValue<
&'a mut SimValue<Queue<StageOutput<C, S2>, StageOutputQueueSize<C, S2>>>, Queue<
StageOutput<C, S1>,
StageOutputQueueSize<C, S1>,
StageWithQueuesOutputQueueName<StageName<C, S1>>,
>,
>,
&'a mut SimValue<
Queue<
StageOutput<C, S2>,
StageOutputQueueSize<C, S2>,
StageWithQueuesOutputQueueName<StageName<C, S2>>,
>,
>,
); );
fn outputs_ty(config: C) -> Self::Outputs { fn outputs_ty(config: C) -> Self::Outputs {
(S1::output_ty(config), S2::output_ty(config)) (S1::output_ty(config), S2::output_ty(config))
@ -474,6 +520,14 @@ impl<C: PhantomConstCpuConfig, S1: Stage<C>, S2: Stage<C>> Stages<C> for (S1, S2
visitor.visit(s1); visitor.visit(s1);
visitor.visit(s2); visitor.visit(s2);
} }
#[hdl]
fn dump_output_items(outputs: &SimValue<Self::Outputs>) -> String {
#[hdl(sim)]
let (s1, s2) = outputs;
let s1 = S1::dump_output_item(s1);
let s2 = S2::dump_output_item(s2);
format!("({s1}, {s2})")
}
} }
trait StagesVisitSimValueRef<C: PhantomConstCpuConfig> { trait StagesVisitSimValueRef<C: PhantomConstCpuConfig> {
@ -596,6 +650,7 @@ trait Stage<C: PhantomConstCpuConfig>: Type + SimValueDefault + ResetSteps {
type InputQueueSize: Size; type InputQueueSize: Size;
type OutputQueueSize: Size; type OutputQueueSize: Size;
const HAS_EXTERNAL_PIPE: bool; const HAS_EXTERNAL_PIPE: bool;
const NAME: &'static str;
fn inputs_ty(config: C) -> Self::Inputs; fn inputs_ty(config: C) -> Self::Inputs;
fn output_ty(config: C) -> Self::Output; fn output_ty(config: C) -> Self::Output;
@ -620,6 +675,8 @@ trait Stage<C: PhantomConstCpuConfig>: Type + SimValueDefault + ResetSteps {
from_external_pipe_output_item: &SimValue<Self::FromExternalPipeOutputItem>, from_external_pipe_output_item: &SimValue<Self::FromExternalPipeOutputItem>,
) -> SimValue<Self::Inputs>; ) -> SimValue<Self::Inputs>;
fn dump_output_item(item: &SimValue<Self::Output>) -> String;
/// see [`StageRunOutput`] for docs on output /// see [`StageRunOutput`] for docs on output
fn run( fn run(
state: &mut SimValue<Self>, state: &mut SimValue<Self>,
@ -635,7 +692,7 @@ macro_rules! hdl_type_alias_with_generics {
#[ty = $ty:expr] #[ty = $ty:expr]
$vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty; $vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty;
) => { ) => {
$vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<$C>>::Type; $vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<($C, $Arg)>>::Type;
$vis struct $WithoutGenerics {} $vis struct $WithoutGenerics {}
@ -668,7 +725,7 @@ macro_rules! hdl_type_alias_with_generics {
#[size = $size:expr] #[size = $size:expr]
$vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty; $vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty;
) => { ) => {
$vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<$C>>::Type; $vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<($C, $Arg)>>::Type;
$vis struct $WithoutGenerics {} $vis struct $WithoutGenerics {}
@ -734,6 +791,12 @@ hdl_type_alias_with_generics! {
type StageFromExternalPipeOutputItem<C: PhantomConstCpuConfig, T: Stage<C>> = <T as Stage<C>>::FromExternalPipeOutputItem; type StageFromExternalPipeOutputItem<C: PhantomConstCpuConfig, T: Stage<C>> = <T as Stage<C>>::FromExternalPipeOutputItem;
} }
hdl_type_alias_with_generics! {
#[without_generics = StageNameWithoutGenerics, StageNameWithStage]
#[ty = |_config, _stage| PhantomConst::new(<T as Stage<C>>::NAME)]
type StageName<C: PhantomConstCpuConfig, T: Stage<C>> = PhantomConst<str>;
}
hdl_type_alias_with_generics! { hdl_type_alias_with_generics! {
#[without_generics = StageMaxOutputsPerStepWithoutGenerics, StageMaxOutputsPerStepWithStage] #[without_generics = StageMaxOutputsPerStepWithoutGenerics, StageMaxOutputsPerStepWithStage]
#[size = |config: C, _stage| T::max_outputs_per_step(config)] #[size = |config: C, _stage| T::max_outputs_per_step(config)]
@ -880,9 +943,10 @@ impl<C: PhantomConstCpuConfig> Stage<C> for NextPcStageState<C> {
type FromExternalPipeOutputItem = (); type FromExternalPipeOutputItem = ();
type MaxOutputsPerStep = ConstUsize<1>; type MaxOutputsPerStep = ConstUsize<1>;
type ExternalPipeIoWidth = ConstUsize<1>; type ExternalPipeIoWidth = ConstUsize<1>;
type InputQueueSize = ConstUsize<1>; type InputQueueSize = ConstUsize<2>;
type OutputQueueSize = ConstUsize<1>; type OutputQueueSize = ConstUsize<2>;
const HAS_EXTERNAL_PIPE: bool = false; const HAS_EXTERNAL_PIPE: bool = false;
const NAME: &'static str = "next_pc";
fn inputs_ty(_config: C) -> Self::Inputs { fn inputs_ty(_config: C) -> Self::Inputs {
() ()
@ -945,6 +1009,24 @@ impl<C: PhantomConstCpuConfig> Stage<C> for NextPcStageState<C> {
() ()
} }
#[hdl]
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
#[hdl(sim)]
let NextPcStageOutput::<_> {
start_pc,
next_start_pc: _,
btb_entry: _,
fetch_block_id,
start_call_stack: _,
config: _,
} = item;
format!(
"fid={:#x} pc={:#x}",
fetch_block_id.as_int(),
start_pc.as_int(),
)
}
#[hdl] #[hdl]
fn run( fn run(
state: &mut SimValue<Self>, state: &mut SimValue<Self>,
@ -1114,6 +1196,8 @@ impl<C: PhantomConstCpuConfig> Stage<C> for NextPcStageState<C> {
#[hdl(no_static)] #[hdl(no_static)]
struct BrPredStageOutput<C: PhantomConstGet<CpuConfig>> { struct BrPredStageOutput<C: PhantomConstGet<CpuConfig>> {
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
start_pc: UInt<64>,
start_branch_history: UInt<6>, start_branch_history: UInt<6>,
branch_predictor_index: HdlOption<UIntInRange<0, { BRANCH_PREDICTOR_SIZE }>>, branch_predictor_index: HdlOption<UIntInRange<0, { BRANCH_PREDICTOR_SIZE }>>,
config: C, config: C,
@ -1124,6 +1208,8 @@ impl<C: PhantomConstCpuConfig> SimValueDefault for BrPredStageOutput<C> {
fn sim_value_default(self) -> SimValue<Self> { fn sim_value_default(self) -> SimValue<Self> {
#[hdl(sim)] #[hdl(sim)]
Self { Self {
fetch_block_id: self.fetch_block_id.zero(),
start_pc: 0u64,
start_branch_history: self.start_branch_history.zero(), start_branch_history: self.start_branch_history.zero(),
branch_predictor_index: #[hdl(sim)] branch_predictor_index: #[hdl(sim)]
HdlNone(), HdlNone(),
@ -1203,9 +1289,10 @@ impl<C: PhantomConstCpuConfig> Stage<C> for BrPredStageState<C> {
type FromExternalPipeOutputItem = (); type FromExternalPipeOutputItem = ();
type MaxOutputsPerStep = ConstUsize<1>; type MaxOutputsPerStep = ConstUsize<1>;
type ExternalPipeIoWidth = ConstUsize<1>; type ExternalPipeIoWidth = ConstUsize<1>;
type InputQueueSize = ConstUsize<1>; type InputQueueSize = ConstUsize<2>;
type OutputQueueSize = CpuConfigMaxFetchesInFlight<C>; type OutputQueueSize = CpuConfigMaxFetchesInFlight<C>;
const HAS_EXTERNAL_PIPE: bool = false; const HAS_EXTERNAL_PIPE: bool = false;
const NAME: &'static str = "br_pred";
fn inputs_ty(config: C) -> Self::Inputs { fn inputs_ty(config: C) -> Self::Inputs {
NextPcStageOutput[config] NextPcStageOutput[config]
@ -1265,6 +1352,23 @@ impl<C: PhantomConstCpuConfig> Stage<C> for BrPredStageState<C> {
input_stages_outputs.clone() input_stages_outputs.clone()
} }
#[hdl]
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
#[hdl(sim)]
let BrPredStageOutput::<_> {
fetch_block_id,
start_pc,
start_branch_history: _,
branch_predictor_index: _,
config: _,
} = item;
format!(
"fid={:#x} pc={:#x}",
fetch_block_id.as_int(),
start_pc.as_int(),
)
}
#[hdl] #[hdl]
fn run( fn run(
state: &mut SimValue<Self>, state: &mut SimValue<Self>,
@ -1277,7 +1381,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for BrPredStageState<C> {
start_pc, start_pc,
next_start_pc: _, next_start_pc: _,
btb_entry, btb_entry,
fetch_block_id: _, fetch_block_id,
start_call_stack, start_call_stack,
config: _, config: _,
} = inputs; } = inputs;
@ -1343,6 +1447,8 @@ impl<C: PhantomConstCpuConfig> Stage<C> for BrPredStageState<C> {
} }
let output = #[hdl(sim)] let output = #[hdl(sim)]
BrPredStageOutput::<_> { BrPredStageOutput::<_> {
fetch_block_id,
start_pc,
start_branch_history, start_branch_history,
branch_predictor_index, branch_predictor_index,
config, config,
@ -1450,8 +1556,9 @@ impl<C: PhantomConstCpuConfig> Stage<C> for FetchDecodeStageState<C> {
type MaxOutputsPerStep = ConstUsize<1>; type MaxOutputsPerStep = ConstUsize<1>;
type ExternalPipeIoWidth = ConstUsize<1>; type ExternalPipeIoWidth = ConstUsize<1>;
type InputQueueSize = CpuConfigMaxFetchesInFlight<C>; type InputQueueSize = CpuConfigMaxFetchesInFlight<C>;
type OutputQueueSize = ConstUsize<1>; type OutputQueueSize = ConstUsize<2>;
const HAS_EXTERNAL_PIPE: bool = true; const HAS_EXTERNAL_PIPE: bool = true;
const NAME: &'static str = "fetch_decode";
fn inputs_ty(config: C) -> Self::Inputs { fn inputs_ty(config: C) -> Self::Inputs {
FetchDecodeStageOutput[config] FetchDecodeStageOutput[config]
@ -1513,6 +1620,44 @@ impl<C: PhantomConstCpuConfig> Stage<C> for FetchDecodeStageState<C> {
} }
} }
#[hdl]
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
#[hdl(sim)]
let FetchDecodeStageOutput::<_> {
next_pc_stage_output,
decode_output,
} = item;
#[hdl(sim)]
let NextPcStageOutput::<_> {
start_pc,
next_start_pc: _,
btb_entry: _,
fetch_block_id,
start_call_stack: _,
config: _,
} = next_pc_stage_output;
#[hdl(sim)]
let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output;
let mut items = vec![];
for insn in ArrayVec::elements_sim_ref(insns) {
#[hdl(sim)]
let WipDecodedInsn {
fetch_block_id: _,
id,
pc,
size_in_bytes: _,
kind: _,
} = insn;
items.push(format!("id={id} pc={:#x}", pc.as_int()));
}
format!(
"fid={:#x} pc={:#x} [{}]",
fetch_block_id.as_int(),
start_pc.as_int(),
items.join(", "),
)
}
#[hdl] #[hdl]
fn run( fn run(
state: &mut SimValue<Self>, state: &mut SimValue<Self>,
@ -1522,6 +1667,36 @@ impl<C: PhantomConstCpuConfig> Stage<C> for FetchDecodeStageState<C> {
#[hdl(sim)] #[hdl(sim)]
let Self { config } = state; let Self { config } = state;
let config = config.ty(); let config = config.ty();
#[hdl(sim)]
let FetchDecodeStageOutput::<_> {
next_pc_stage_output,
decode_output,
} = inputs;
#[hdl(sim)]
let NextPcStageOutput::<_> {
start_pc,
next_start_pc: _,
btb_entry: _,
fetch_block_id,
start_call_stack: _,
config: _,
} = next_pc_stage_output;
#[hdl(sim)]
let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output;
for (i, insn) in ArrayVec::elements_sim_ref(insns).iter().enumerate() {
#[hdl(sim)]
let WipDecodedInsn {
fetch_block_id: insn_fetch_block_id,
id: _,
pc: insn_pc,
size_in_bytes: _,
kind: _,
} = insn;
assert_eq!(insn_fetch_block_id, fetch_block_id);
if i == 0 {
assert_eq!(insn_pc, start_pc);
}
}
let StageRunOutput { outputs, cancel } = StageRunOutput[config][this_ty]; let StageRunOutput { outputs, cancel } = StageRunOutput[config][this_ty];
#[hdl(sim)] #[hdl(sim)]
StageRunOutput::<_, _> { StageRunOutput::<_, _> {
@ -1618,9 +1793,10 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
type FromExternalPipeOutputItem = (); type FromExternalPipeOutputItem = ();
type MaxOutputsPerStep = CpuConfigFetchWidth<C>; type MaxOutputsPerStep = CpuConfigFetchWidth<C>;
type ExternalPipeIoWidth = ConstUsize<1>; type ExternalPipeIoWidth = ConstUsize<1>;
type InputQueueSize = ConstUsize<1>; type InputQueueSize = ConstUsize<2>;
type OutputQueueSize = TwiceCpuConfigFetchWidth<C>; type OutputQueueSize = TwiceCpuConfigFetchWidth<C>;
const HAS_EXTERNAL_PIPE: bool = false; const HAS_EXTERNAL_PIPE: bool = false;
const NAME: &'static str = "post_decode";
fn inputs_ty(config: C) -> Self::Inputs { fn inputs_ty(config: C) -> Self::Inputs {
PostDecodeStageInput[config] PostDecodeStageInput[config]
@ -1686,6 +1862,23 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
} }
} }
#[hdl]
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
#[hdl(sim)]
let WipDecodedInsn {
fetch_block_id,
id,
pc,
size_in_bytes: _,
kind: _,
} = &item.insn;
format!(
"fid={:#x} id={id} pc={:#x}",
fetch_block_id.as_int(),
pc.as_int(),
)
}
#[hdl] #[hdl]
fn run( fn run(
state: &mut SimValue<Self>, state: &mut SimValue<Self>,
@ -1718,10 +1911,14 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output; let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output;
#[hdl(sim)] #[hdl(sim)]
let BrPredStageOutput::<_> { let BrPredStageOutput::<_> {
start_pc: br_pred_start_pc,
fetch_block_id: br_pred_fetch_block_id,
start_branch_history, start_branch_history,
branch_predictor_index, branch_predictor_index,
config: _, config: _,
} = br_pred_stage_output; } = br_pred_stage_output;
assert_eq!(start_pc, br_pred_start_pc);
assert_eq!(fetch_block_id, br_pred_fetch_block_id);
assert_ne!( assert_ne!(
**ArrayVec::len_sim(&insns), **ArrayVec::len_sim(&insns),
0, 0,
@ -2039,6 +2236,9 @@ struct ExecuteRetireStageState<C: PhantomConstGet<CpuConfig> + PhantomConstCpuCo
#[hdl(no_static)] #[hdl(no_static)]
struct ExecuteRetireStageOutput<C: PhantomConstGet<CpuConfig>> { struct ExecuteRetireStageOutput<C: PhantomConstGet<CpuConfig>> {
train_branch_predictor: HdlOption<TrainBranchPredictor>, train_branch_predictor: HdlOption<TrainBranchPredictor>,
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
id: UInt<12>,
pc: UInt<64>,
config: C, config: C,
} }
@ -2047,12 +2247,18 @@ impl<C: PhantomConstCpuConfig> SimValueDefault for ExecuteRetireStageOutput<C> {
fn sim_value_default(self) -> SimValue<Self> { fn sim_value_default(self) -> SimValue<Self> {
let Self { let Self {
train_branch_predictor, train_branch_predictor,
fetch_block_id,
id,
pc: _,
config, config,
} = self; } = self;
#[hdl(sim)] #[hdl(sim)]
Self { Self {
train_branch_predictor: #[hdl(sim)] train_branch_predictor: #[hdl(sim)]
train_branch_predictor.HdlNone(), train_branch_predictor.HdlNone(),
fetch_block_id: fetch_block_id.zero(),
id: id.zero(),
pc: 0u64,
config, config,
} }
} }
@ -2089,6 +2295,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
type InputQueueSize = CpuConfigRobSize<C>; type InputQueueSize = CpuConfigRobSize<C>;
type OutputQueueSize = CpuConfigFetchWidth<C>; type OutputQueueSize = CpuConfigFetchWidth<C>;
const HAS_EXTERNAL_PIPE: bool = true; const HAS_EXTERNAL_PIPE: bool = true;
const NAME: &'static str = "execute_retire";
fn inputs_ty(config: C) -> Self::Inputs { fn inputs_ty(config: C) -> Self::Inputs {
ExecuteRetireStageInput[config] ExecuteRetireStageInput[config]
@ -2150,6 +2357,23 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
} }
} }
#[hdl]
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
#[hdl(sim)]
let ExecuteRetireStageOutput::<_> {
train_branch_predictor: _,
fetch_block_id,
id,
pc,
config: _,
} = item;
format!(
"fid={:#x} id={id} pc={:#x}",
fetch_block_id.as_int(),
pc.as_int(),
)
}
#[hdl] #[hdl]
fn run( fn run(
state: &mut SimValue<Self>, state: &mut SimValue<Self>,
@ -2318,6 +2542,9 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
#[hdl(sim)] #[hdl(sim)]
ExecuteRetireStageOutput::<_> { ExecuteRetireStageOutput::<_> {
train_branch_predictor, train_branch_predictor,
fetch_block_id: &insn.fetch_block_id,
id,
pc: insn.pc,
config, config,
}, },
), ),
@ -2341,6 +2568,9 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
#[hdl(sim)] #[hdl(sim)]
ExecuteRetireStageOutput::<_> { ExecuteRetireStageOutput::<_> {
train_branch_predictor, train_branch_predictor,
fetch_block_id: &insn.fetch_block_id,
id,
pc: insn.pc,
config, config,
}, },
]), ]),
@ -2896,7 +3126,7 @@ impl ResetSteps for BranchTargetBuffer {
} }
#[hdl] #[hdl]
struct Queue<T, Capacity: Size> { struct Queue<T, Capacity: Size, Name: PhantomConstGet<str>> {
data: ArrayType<T, Capacity>, data: ArrayType<T, Capacity>,
/// inclusive /// inclusive
start: UIntInRangeType<ConstUsize<0>, Capacity>, start: UIntInRangeType<ConstUsize<0>, Capacity>,
@ -2904,9 +3134,18 @@ struct Queue<T, Capacity: Size> {
end: UIntInRangeType<ConstUsize<0>, Capacity>, end: UIntInRangeType<ConstUsize<0>, Capacity>,
/// used to disambiguate between a full and an empty queue /// used to disambiguate between a full and an empty queue
eq_start_end_means_full: Bool, eq_start_end_means_full: Bool,
name: Name,
} }
impl<T: Type, Capacity: Size> Queue<T, Capacity> { impl<T: Type, Capacity: Size, Name: Type + PhantomConstGet<str>> Queue<T, Capacity, Name> {
fn debug_op(self, fn_name: &str, data: &SimValue<T>) {
println!("Queue::<_, _, {:?}>::{fn_name}: {data:#?}", self.name);
}
fn dump(this: &SimValue<Self>, dump_item: impl Fn(&SimValue<T>) -> String) {
let name = this.name.ty().get();
let items = Vec::from_iter(Self::peek_iter(this).map(|v| DebugAsDisplay(dump_item(&v))));
println!("Queue {name}: {items:#?}");
}
fn capacity(self) -> usize { fn capacity(self) -> usize {
self.data.len() self.data.len()
} }
@ -2951,8 +3190,11 @@ impl<T: Type, Capacity: Size> Queue<T, Capacity> {
let end = *this.end; let end = *this.end;
*this.end = this.ty().next_pos(end); *this.end = this.ty().next_pos(end);
*this.eq_start_end_means_full = true; *this.eq_start_end_means_full = true;
let this_ty = this.ty();
let data = &mut this.data[end]; let data = &mut this.data[end];
*data = dbg!(value.to_sim_value_with_type(data.ty())); let value = value.to_sim_value_with_type(data.ty());
this_ty.debug_op("push", &value);
*data = value;
Ok(()) Ok(())
} }
} }
@ -2963,6 +3205,7 @@ impl<T: Type, Capacity: Size> Queue<T, Capacity> {
let end = this.ty().prev_pos(*this.end); let end = this.ty().prev_pos(*this.end);
*this.end = end; *this.end = end;
let data = this.data[end].clone(); let data = this.data[end].clone();
this.ty().debug_op("undo_push", &data);
*this.eq_start_end_means_full = false; *this.eq_start_end_means_full = false;
Some(data) Some(data)
} }
@ -2976,9 +3219,8 @@ impl<T: Type, Capacity: Size> Queue<T, Capacity> {
} }
fn peek_iter( fn peek_iter(
this: &SimValue<Self>, this: &SimValue<Self>,
) -> impl Clone + DoubleEndedIterator<Item = SimValue<T>> + ExactSizeIterator { ) -> impl Clone + DoubleEndedIterator<Item = &SimValue<T>> + ExactSizeIterator {
(0..Self::len(this)) (0..Self::len(this)).map(|nth| &this.data[this.ty().nth_pos_after(*this.start, nth)])
.map(|nth| dbg!(this.data[this.ty().nth_pos_after(*this.start, nth)].clone()))
} }
fn pop(this: &mut SimValue<Self>) -> Option<SimValue<T>> { fn pop(this: &mut SimValue<Self>) -> Option<SimValue<T>> {
if Self::is_empty(this) { if Self::is_empty(this) {
@ -2987,13 +3229,14 @@ impl<T: Type, Capacity: Size> Queue<T, Capacity> {
let start = *this.start; let start = *this.start;
*this.start = this.ty().next_pos(start); *this.start = this.ty().next_pos(start);
let data = this.data[start].clone(); let data = this.data[start].clone();
this.ty().debug_op("pop", &data);
*this.eq_start_end_means_full = false; *this.eq_start_end_means_full = false;
Some(data) Some(data)
} }
} }
} }
impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity> { impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity, PhantomConst<str>> {
#[hdl] #[hdl]
fn sim_value_default(self) -> SimValue<Self> { fn sim_value_default(self) -> SimValue<Self> {
let Self { let Self {
@ -3001,9 +3244,10 @@ impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity>
start, start,
end, end,
eq_start_end_means_full: _, eq_start_end_means_full: _,
name,
} = self; } = self;
#[hdl(sim)] #[hdl(sim)]
Queue::<T, Capacity> { Queue::<T, Capacity, _> {
data: repeat( data: repeat(
data.element().sim_value_default(), data.element().sim_value_default(),
Capacity::from_usize(data.len()), Capacity::from_usize(data.len()),
@ -3011,19 +3255,21 @@ impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity>
start: 0usize.to_sim_value_with_type(start), start: 0usize.to_sim_value_with_type(start),
end: 0usize.to_sim_value_with_type(end), end: 0usize.to_sim_value_with_type(end),
eq_start_end_means_full: false, eq_start_end_means_full: false,
name,
} }
} }
} }
impl<T: SimValueDefault, Capacity: Size> ResetSteps for Queue<T, Capacity> { impl<T: SimValueDefault, Capacity: Size> ResetSteps for Queue<T, Capacity, PhantomConst<str>> {
#[hdl] #[hdl]
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus { fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
#[hdl(sim)] #[hdl(sim)]
let Queue::<T, Capacity> { let Queue::<T, Capacity, _> {
data, data,
start, start,
end, end,
eq_start_end_means_full, eq_start_end_means_full,
name: _,
} = this; } = this;
**start = 0; **start = 0;
**end = 0; **end = 0;
@ -3054,11 +3300,25 @@ impl<C: PhantomConstCpuConfig, S: Stage<C>> CancelInProgressForStageWithQueues<C
} }
} }
#[hdl(get(|name| PhantomConst::new_deref(format!("{name}.input_queue"))))]
type StageWithQueuesInputQueueName<Name: PhantomConstGet<str>> = PhantomConst<str>;
#[hdl(get(|name| PhantomConst::new_deref(format!("{name}.output_queue"))))]
type StageWithQueuesOutputQueueName<Name: PhantomConstGet<str>> = PhantomConst<str>;
#[hdl(no_static)] #[hdl(no_static)]
struct StageWithQueues<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig, S: Type + Stage<C>> { struct StageWithQueues<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig, S: Type + Stage<C>> {
input_queue: Queue<StageInputStagesOutputs<C, S>, StageInputQueueSize<C, S>>, input_queue: Queue<
StageInputStagesOutputs<C, S>,
StageInputQueueSize<C, S>,
StageWithQueuesInputQueueName<StageName<C, S>>,
>,
state: S, state: S,
output_queue: Queue<StageOutput<C, S>, StageOutputQueueSize<C, S>>, output_queue: Queue<
StageOutput<C, S>,
StageOutputQueueSize<C, S>,
StageWithQueuesOutputQueueName<StageName<C, S>>,
>,
config: C, config: C,
} }
@ -3169,6 +3429,18 @@ enum StageWithQueuesRunResult<C: PhantomConstCpuConfig, S: Stage<C>> {
} }
impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> { impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> {
#[hdl]
fn dump_queues(this: &SimValue<Self>) {
#[hdl(sim)]
let Self {
input_queue,
state: _,
output_queue,
config: _,
} = this;
Queue::dump(input_queue, S::InputStages::dump_output_items);
Queue::dump(output_queue, S::dump_output_item);
}
fn input_queue_space_left_with_sibling( fn input_queue_space_left_with_sibling(
this: &SimValue<Self>, this: &SimValue<Self>,
sibling: &<S::SiblingStage as SiblingStageOrUnit<C, S>>::SimValueStageWithQueues, sibling: &<S::SiblingStage as SiblingStageOrUnit<C, S>>::SimValueStageWithQueues,
@ -3341,7 +3613,7 @@ impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> {
state, state,
output_queue, output_queue,
config, config,
} = this; } = &mut *this;
let config = config.ty(); let config = config.ty();
#[hdl(sim)] #[hdl(sim)]
let StageWithQueuesInputs::<_, _> { let StageWithQueuesInputs::<_, _> {
@ -3659,11 +3931,39 @@ impl<C: PhantomConstCpuConfig> AllStages<C> {
} }
} }
#[hdl] #[hdl]
fn get_execute_retire_output(
this: &SimValue<Self>,
) -> (usize, Option<SimValue<TrainBranchPredictor>>) {
let config = this.config.ty();
let mut retire_count = 0usize;
for execute_retire_output in
Queue::peek_iter(&this.execute_retire.output_queue).take(config.get().fetch_width.get())
{
retire_count += 1;
#[hdl(sim)]
let ExecuteRetireStageOutput::<_> {
train_branch_predictor,
fetch_block_id: _,
id: _,
pc: _,
config: _,
} = &execute_retire_output;
#[hdl(sim)]
if let HdlSome(train_branch_predictor) = train_branch_predictor {
// for now we only retire one conditional branch per clock cycle
// TODO: maybe improve later?
return (retire_count, Some(train_branch_predictor.clone()));
}
}
(retire_count, None)
}
#[hdl]
fn run( fn run(
this: &mut SimValue<Self>, this: &mut SimValue<Self>,
inputs: &SimValue<AllStagesInputs<C>>, inputs: &SimValue<AllStagesInputs<C>>,
last_outputs: &SimValue<AllStagesOutputs<C>>, last_outputs: &SimValue<AllStagesOutputs<C>>,
) -> Result<(), SimValue<CancelInProgress<C>>> { ) -> Result<(), SimValue<CancelInProgress<C>>> {
let (retire_count, _) = Self::get_execute_retire_output(this);
#[hdl(sim)] #[hdl(sim)]
let Self { let Self {
next_pc, next_pc,
@ -3672,9 +3972,16 @@ impl<C: PhantomConstCpuConfig> AllStages<C> {
post_decode, post_decode,
execute_retire, execute_retire,
config, config,
} = this; } = &mut *this;
let config = config.ty(); let config = config.ty();
let cancel_ty = CancelInProgress[config]; let cancel_ty = CancelInProgress[config];
for _ in 0..retire_count {
// items were handled in the previous clock cycle,
// but are removed only now so you can see them for debugging
let Some(_) = Queue::pop(&mut execute_retire.output_queue) else {
unreachable!();
};
}
match StageWithQueues::run( match StageWithQueues::run(
execute_retire, execute_retire,
&inputs.execute_retire, &inputs.execute_retire,
@ -3832,28 +4139,33 @@ impl<C: PhantomConstCpuConfig> AllStages<C> {
input_stages_outputs_popped_count: _, input_stages_outputs_popped_count: _,
} => {} } => {}
} }
for _ in 0..config.get().fetch_width.get() { match Self::get_execute_retire_output(this) {
let Some(execute_retire_output) = Queue::pop(&mut execute_retire.output_queue) else { (_, Some(train_branch_predictor)) => BrPredStageState::train_branch_predictor(
break; &mut this.br_pred.state,
}; &train_branch_predictor,
#[hdl(sim)] ),
let ExecuteRetireStageOutput::<_> { (_, None) => {}
train_branch_predictor,
config: _,
} = &execute_retire_output;
#[hdl(sim)]
if let HdlSome(train_branch_predictor) = train_branch_predictor {
BrPredStageState::train_branch_predictor(
&mut br_pred.state,
train_branch_predictor,
);
// for now we only retire one conditional branch per clock cycle
// TODO: maybe improve later?
break;
}
} }
Ok(()) Ok(())
} }
#[hdl]
fn dump_queues(this: &SimValue<Self>) {
#[hdl(sim)]
let Self {
next_pc,
br_pred,
fetch_decode,
post_decode,
execute_retire,
config: _,
} = this;
println!("Dump Queues:");
StageWithQueues::dump_queues(next_pc);
StageWithQueues::dump_queues(br_pred);
StageWithQueues::dump_queues(fetch_decode);
StageWithQueues::dump_queues(post_decode);
StageWithQueues::dump_queues(execute_retire);
}
} }
#[hdl(no_static)] #[hdl(no_static)]
@ -4038,6 +4350,14 @@ pub fn next_pc(config: PhantomConst<CpuConfig>) {
sim.write(state_expr, state).await; sim.write(state_expr, state).await;
sim.wait_for_clock_edge(cd.clk).await; sim.wait_for_clock_edge(cd.clk).await;
state = sim.read_past(state_expr, cd.clk).await; state = sim.read_past(state_expr, cd.clk).await;
AllStages::dump_queues(&state.all_stages);
let next_retire_insn_ids = sim.read_past(from_retire.next_insn_ids, cd.clk).await;
let next_retire_insn_ids = ArrayVec::elements_sim_ref(&next_retire_insn_ids);
let expected_next_retire_insn_ids = Vec::from_iter(
Queue::peek_iter(&state.all_stages.execute_retire.input_queue)
.map(|v| v.insn.id.clone()),
);
assert_eq!(next_retire_insn_ids, expected_next_retire_insn_ids);
let AllStagesInputs { let AllStagesInputs {
next_pc, next_pc,
br_pred, br_pred,
@ -4058,7 +4378,16 @@ pub fn next_pc(config: PhantomConst<CpuConfig>) {
#[hdl(sim)] #[hdl(sim)]
if let HdlSome(data) = sim.read_past(from_retire.inner.data, cd.clk).await { if let HdlSome(data) = sim.read_past(from_retire.inner.data, cd.clk).await {
#[hdl(sim)] #[hdl(sim)]
let RetireToNextPcInterfaceInner::<_> { insns, config: _ } = data; let RetireToNextPcInterfaceInner::<_> {
mut insns,
config: _,
} = data;
if !sim.read_past_bool(from_retire.inner.ready, cd.clk).await {
// since we can have `outputs.execute_retire.from_external_pipe_output_ready > 0`
// without `from_retire.inner.ready` being set, make sure we don't retire any instructions in that case
ArrayVec::truncate_sim(&mut insns, 0);
}
println!("from retire: {:#?}", ArrayVec::elements_sim_ref(&insns));
insns insns
} else { } else {
execute_retire execute_retire
@ -4148,7 +4477,8 @@ mod tests {
#[test] #[test]
fn test_queue() { fn test_queue() {
let mut queue: SimValue<Queue<UInt<8>, ConstUsize<8>>> = Queue::TYPE.sim_value_default(); let mut queue: SimValue<Queue<UInt<8>, ConstUsize<8>, PhantomConst<str>>> =
Queue::TYPE.sim_value_default();
let mut reference_queue = VecDeque::new(); let mut reference_queue = VecDeque::new();
let mut tested_full = false; let mut tested_full = false;
let mut tested_empty = false; let mut tested_empty = false;

File diff suppressed because it is too large Load diff

View file

@ -12,7 +12,11 @@ use cpu::{
unit::UnitKind, unit::UnitKind,
util::array_vec::ArrayVec, util::array_vec::ArrayVec,
}; };
use fayalite::{prelude::*, sim::vcd::VcdWriterDecls, util::RcWriter}; use fayalite::{
prelude::*,
sim::vcd::VcdWriterDecls,
util::{DebugAsDisplay, RcWriter},
};
use std::{ use std::{
cell::Cell, cell::Cell,
collections::{BTreeMap, BTreeSet, VecDeque}, collections::{BTreeMap, BTreeSet, VecDeque},
@ -565,13 +569,31 @@ impl MockExecuteState {
#[hdl] #[hdl]
fn try_retire( fn try_retire(
&mut self, &mut self,
) -> Option<Result<SimValue<RetireToNextPcInterfacePerInsn<PhantomConst<CpuConfig>>>, String>> ) -> Option<(
{ SimValue<RetireToNextPcInterfacePerInsn<PhantomConst<CpuConfig>>>,
Result<(), String>,
)> {
if self.queue.front()?.cycles_left.as_int() != 0 { if self.queue.front()?.cycles_left.as_int() != 0 {
return None; return None;
} }
let entry = self.queue.pop_front()?; let entry = self.queue.pop_front()?;
Some(self.do_retire(entry)) let id = entry.insn.id.clone();
Some(match self.do_retire(entry) {
Ok(v) => (v, Ok(())),
Err(e) => (
#[hdl(sim)]
RetireToNextPcInterfacePerInsn::<_> {
id,
next_pc: u64::from_be_bytes(*b"ErrError"),
call_stack_op: #[hdl(sim)]
CallStackOp::None(),
cond_br_taken: #[hdl(sim)]
HdlNone(),
config: self.config,
},
Err(e),
),
})
} }
fn space_available(&self) -> usize { fn space_available(&self) -> usize {
EXECUTE_RETIRE_PIPE_QUEUE_SIZE.saturating_sub(self.queue.len()) EXECUTE_RETIRE_PIPE_QUEUE_SIZE.saturating_sub(self.queue.len())
@ -621,6 +643,11 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
retire_output.ty().inner.data.HdlNone(), retire_output.ty().inner.data.HdlNone(),
) )
.await; .await;
sim.write(
retire_output.next_insn_ids,
retire_output.next_insn_ids.ty().new_sim(0_hdl_u12),
)
.await;
sim.write( sim.write(
queue_debug, queue_debug,
queue_debug queue_debug
@ -672,30 +699,21 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
let mut sim_queue = queue_debug let mut sim_queue = queue_debug
.ty() .ty()
.new_sim(ExecuteRetirePipeQueueEntry.default_sim()); .new_sim(ExecuteRetirePipeQueueEntry.default_sim());
let mut next_insn_ids = retire_output.next_insn_ids.ty().new_sim(0_hdl_u12);
for entry in &state.queue { for entry in &state.queue {
ArrayVec::try_push_sim(&mut sim_queue, entry) ArrayVec::try_push_sim(&mut sim_queue, entry)
.ok() .ok()
.expect("queue is known to be small enough"); .expect("queue is known to be small enough");
let _ = ArrayVec::try_push_sim(&mut next_insn_ids, &entry.insn.id);
} }
sim.write(queue_debug, sim_queue).await; sim.write(queue_debug, sim_queue).await;
sim.write(retire_output.next_insn_ids, next_insn_ids).await;
let mut retiring = retire_vec_ty.new_sim(&empty_retire_insn); let mut retiring = retire_vec_ty.new_sim(&empty_retire_insn);
let mut peek_state = state.clone(); let mut peek_state = state.clone();
while let Some(peek_retire) = peek_state.try_retire() { while let Some((peek_retire, result)) = peek_state.try_retire() {
if peek_retire.is_err() && **ArrayVec::len_sim(&retiring) > 0 { if result.is_err() && **ArrayVec::len_sim(&retiring) > 0 {
break; break;
} }
let peek_retire = peek_retire.unwrap_or_else(|_| {
#[hdl(sim)]
RetireToNextPcInterfacePerInsn::<_> {
id: 0_hdl_u12,
next_pc: u64::from_be_bytes(*b"ErrError"),
call_stack_op: #[hdl(sim)]
CallStackOp::None(),
cond_br_taken: #[hdl(sim)]
HdlNone(),
config,
}
});
let Ok(_) = ArrayVec::try_push_sim(&mut retiring, peek_retire) else { let Ok(_) = ArrayVec::try_push_sim(&mut retiring, peek_retire) else {
break; break;
}; };
@ -723,11 +741,22 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
) )
.await; .await;
sim.wait_for_clock_edge(cd.clk).await; sim.wait_for_clock_edge(cd.clk).await;
println!(
"Dump mock execute retire pipe queue: {:#?}",
Vec::from_iter(state.queue.iter().map(|v| {
DebugAsDisplay(format!(
"fid={:#x} id={} pc={:#x}",
v.insn.fetch_block_id.as_int(),
v.insn.id,
v.insn.pc.as_int(),
))
}))
);
if sim.read_past_bool(retire_output.inner.ready, cd.clk).await { if sim.read_past_bool(retire_output.inner.ready, cd.clk).await {
for _ in 0..**ArrayVec::len_sim(&retiring) { for _ in 0..**ArrayVec::len_sim(&retiring) {
match state.try_retire() { match state.try_retire() {
Some(Ok(_)) => {} Some((_, Ok(_))) => {}
Some(Err(e)) => panic!("retire error: {e}"), Some((_, Err(e))) => panic!("retire error: {e}"),
None => unreachable!(), None => unreachable!(),
} }
} }
@ -737,7 +766,7 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
&mut new_insns, &mut new_insns,
*sim.read_past(from_post_decode.ready, cd.clk).await, *sim.read_past(from_post_decode.ready, cd.clk).await,
); );
for insn in ArrayVec::elements_sim_ref(&new_insns) { for insn in dbg!(ArrayVec::elements_sim_ref(&new_insns)) {
state.start(insn, delay_sequence_index); state.start(insn, delay_sequence_index);
} }
} }
@ -803,7 +832,7 @@ fn test_next_pc() {
config.fetch_width = NonZeroUsize::new(2).unwrap(); config.fetch_width = NonZeroUsize::new(2).unwrap();
let m = dut(PhantomConst::new_sized(config)); let m = dut(PhantomConst::new_sized(config));
let mut sim = Simulation::new(m); let mut sim = Simulation::new(m);
let mut writer = RcWriter::default(); let writer = RcWriter::default();
sim.add_trace_writer(VcdWriterDecls::new(writer.clone())); sim.add_trace_writer(VcdWriterDecls::new(writer.clone()));
struct DumpVcdOnDrop { struct DumpVcdOnDrop {
writer: Option<RcWriter>, writer: Option<RcWriter>,
@ -823,6 +852,7 @@ fn test_next_pc() {
sim.write_reset(sim.io().cd.rst, true); sim.write_reset(sim.io().cd.rst, true);
for _cycle in 0..300 { for _cycle in 0..300 {
sim.advance_time(SimDuration::from_nanos(500)); sim.advance_time(SimDuration::from_nanos(500));
println!("clock tick");
sim.write_clock(sim.io().cd.clk, true); sim.write_clock(sim.io().cd.clk, true);
sim.advance_time(SimDuration::from_nanos(500)); sim.advance_time(SimDuration::from_nanos(500));
sim.write_clock(sim.io().cd.clk, false); sim.write_clock(sim.io().cd.clk, false);