fix queueing errors
This commit is contained in:
parent
8ab63f3c6a
commit
84e4fde512
3 changed files with 20332 additions and 15680 deletions
|
|
@ -24,9 +24,9 @@ use fayalite::{
|
||||||
prelude::*,
|
prelude::*,
|
||||||
sim::value::SimOnlyValueTrait,
|
sim::value::SimOnlyValueTrait,
|
||||||
ty::StaticType,
|
ty::StaticType,
|
||||||
util::ready_valid::ReadyValid,
|
util::{DebugAsDisplay, ready_valid::ReadyValid},
|
||||||
};
|
};
|
||||||
use std::borrow::Cow;
|
use std::{borrow::Cow, fmt};
|
||||||
|
|
||||||
pub const FETCH_BLOCK_ID_WIDTH: usize = FetchBlockIdInt::BITS as usize;
|
pub const FETCH_BLOCK_ID_WIDTH: usize = FetchBlockIdInt::BITS as usize;
|
||||||
type FetchBlockIdInt = u8;
|
type FetchBlockIdInt = u8;
|
||||||
|
|
@ -246,6 +246,7 @@ pub struct RetireToNextPcInterfaceInner<C: PhantomConstGet<CpuConfig>> {
|
||||||
/// branch instruction is mis-speculated.
|
/// branch instruction is mis-speculated.
|
||||||
pub struct RetireToNextPcInterface<C: PhantomConstGet<CpuConfig>> {
|
pub struct RetireToNextPcInterface<C: PhantomConstGet<CpuConfig>> {
|
||||||
pub inner: ReadyValid<RetireToNextPcInterfaceInner<C>>,
|
pub inner: ReadyValid<RetireToNextPcInterfaceInner<C>>,
|
||||||
|
pub next_insn_ids: ArrayVec<UInt<12>, CpuConfigRobSize<C>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[hdl(no_static)]
|
#[hdl(no_static)]
|
||||||
|
|
@ -311,8 +312,8 @@ struct StageRunOutput<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig, S:
|
||||||
|
|
||||||
trait Stages<C: PhantomConstCpuConfig>: Type {
|
trait Stages<C: PhantomConstCpuConfig>: Type {
|
||||||
type Outputs: Type + SimValueDefault;
|
type Outputs: Type + SimValueDefault;
|
||||||
type SimValueOutputQueueRefs<'a>: 'a + Copy;
|
type SimValueOutputQueueRefs<'a>: 'a + Copy + fmt::Debug;
|
||||||
type SimValueOutputQueueMuts<'a>: 'a;
|
type SimValueOutputQueueMuts<'a>: 'a + fmt::Debug;
|
||||||
fn outputs_ty(config: C) -> Self::Outputs;
|
fn outputs_ty(config: C) -> Self::Outputs;
|
||||||
fn reborrow_output_queues_as_refs<'a>(
|
fn reborrow_output_queues_as_refs<'a>(
|
||||||
output_queues: &'a Self::SimValueOutputQueueMuts<'_>,
|
output_queues: &'a Self::SimValueOutputQueueMuts<'_>,
|
||||||
|
|
@ -329,6 +330,7 @@ trait Stages<C: PhantomConstCpuConfig>: Type {
|
||||||
output_queues: Self::SimValueOutputQueueMuts<'_>,
|
output_queues: Self::SimValueOutputQueueMuts<'_>,
|
||||||
) -> Option<SimValue<Self::Outputs>>;
|
) -> Option<SimValue<Self::Outputs>>;
|
||||||
fn visit_sim_value_ref<V: StagesVisitSimValueRef<C>>(this: &SimValue<Self>, visitor: &mut V);
|
fn visit_sim_value_ref<V: StagesVisitSimValueRef<C>>(this: &SimValue<Self>, visitor: &mut V);
|
||||||
|
fn dump_output_items(outputs: &SimValue<Self::Outputs>) -> String;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<C: PhantomConstCpuConfig> Stages<C> for () {
|
impl<C: PhantomConstCpuConfig> Stages<C> for () {
|
||||||
|
|
@ -377,14 +379,30 @@ impl<C: PhantomConstCpuConfig> Stages<C> for () {
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
let () = this;
|
let () = this;
|
||||||
}
|
}
|
||||||
|
#[hdl]
|
||||||
|
fn dump_output_items(outputs: &SimValue<Self::Outputs>) -> String {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let () = outputs;
|
||||||
|
String::from("()")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<C: PhantomConstCpuConfig, S1: Stage<C>> Stages<C> for S1 {
|
impl<C: PhantomConstCpuConfig, S1: Stage<C>> Stages<C> for S1 {
|
||||||
type Outputs = S1::Output;
|
type Outputs = S1::Output;
|
||||||
type SimValueOutputQueueRefs<'a> =
|
type SimValueOutputQueueRefs<'a> = &'a SimValue<
|
||||||
&'a SimValue<Queue<StageOutput<C, S1>, StageOutputQueueSize<C, S1>>>;
|
Queue<
|
||||||
type SimValueOutputQueueMuts<'a> =
|
StageOutput<C, S1>,
|
||||||
&'a mut SimValue<Queue<StageOutput<C, S1>, StageOutputQueueSize<C, S1>>>;
|
StageOutputQueueSize<C, S1>,
|
||||||
|
StageWithQueuesOutputQueueName<StageName<C, S1>>,
|
||||||
|
>,
|
||||||
|
>;
|
||||||
|
type SimValueOutputQueueMuts<'a> = &'a mut SimValue<
|
||||||
|
Queue<
|
||||||
|
StageOutput<C, S1>,
|
||||||
|
StageOutputQueueSize<C, S1>,
|
||||||
|
StageWithQueuesOutputQueueName<StageName<C, S1>>,
|
||||||
|
>,
|
||||||
|
>;
|
||||||
fn outputs_ty(config: C) -> Self::Outputs {
|
fn outputs_ty(config: C) -> Self::Outputs {
|
||||||
S1::output_ty(config)
|
S1::output_ty(config)
|
||||||
}
|
}
|
||||||
|
|
@ -402,7 +420,7 @@ impl<C: PhantomConstCpuConfig, S1: Stage<C>> Stages<C> for S1 {
|
||||||
output_queues: Self::SimValueOutputQueueRefs<'_>,
|
output_queues: Self::SimValueOutputQueueRefs<'_>,
|
||||||
max_peek_len: usize,
|
max_peek_len: usize,
|
||||||
) -> impl Iterator<Item = SimValue<Self::Outputs>> {
|
) -> impl Iterator<Item = SimValue<Self::Outputs>> {
|
||||||
Queue::peek_iter(output_queues).take(max_peek_len)
|
Queue::peek_iter(output_queues).take(max_peek_len).cloned()
|
||||||
}
|
}
|
||||||
fn pop_output_queues(
|
fn pop_output_queues(
|
||||||
output_queues: Self::SimValueOutputQueueMuts<'_>,
|
output_queues: Self::SimValueOutputQueueMuts<'_>,
|
||||||
|
|
@ -412,17 +430,45 @@ impl<C: PhantomConstCpuConfig, S1: Stage<C>> Stages<C> for S1 {
|
||||||
fn visit_sim_value_ref<V: StagesVisitSimValueRef<C>>(this: &SimValue<Self>, visitor: &mut V) {
|
fn visit_sim_value_ref<V: StagesVisitSimValueRef<C>>(this: &SimValue<Self>, visitor: &mut V) {
|
||||||
visitor.visit(this);
|
visitor.visit(this);
|
||||||
}
|
}
|
||||||
|
#[hdl]
|
||||||
|
fn dump_output_items(outputs: &SimValue<Self::Outputs>) -> String {
|
||||||
|
S1::dump_output_item(outputs)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<C: PhantomConstCpuConfig, S1: Stage<C>, S2: Stage<C>> Stages<C> for (S1, S2) {
|
impl<C: PhantomConstCpuConfig, S1: Stage<C>, S2: Stage<C>> Stages<C> for (S1, S2) {
|
||||||
type Outputs = (S1::Output, S2::Output);
|
type Outputs = (S1::Output, S2::Output);
|
||||||
type SimValueOutputQueueRefs<'a> = (
|
type SimValueOutputQueueRefs<'a> = (
|
||||||
&'a SimValue<Queue<StageOutput<C, S1>, StageOutputQueueSize<C, S1>>>,
|
&'a SimValue<
|
||||||
&'a SimValue<Queue<StageOutput<C, S2>, StageOutputQueueSize<C, S2>>>,
|
Queue<
|
||||||
|
StageOutput<C, S1>,
|
||||||
|
StageOutputQueueSize<C, S1>,
|
||||||
|
StageWithQueuesOutputQueueName<StageName<C, S1>>,
|
||||||
|
>,
|
||||||
|
>,
|
||||||
|
&'a SimValue<
|
||||||
|
Queue<
|
||||||
|
StageOutput<C, S2>,
|
||||||
|
StageOutputQueueSize<C, S2>,
|
||||||
|
StageWithQueuesOutputQueueName<StageName<C, S2>>,
|
||||||
|
>,
|
||||||
|
>,
|
||||||
);
|
);
|
||||||
type SimValueOutputQueueMuts<'a> = (
|
type SimValueOutputQueueMuts<'a> = (
|
||||||
&'a mut SimValue<Queue<StageOutput<C, S1>, StageOutputQueueSize<C, S1>>>,
|
&'a mut SimValue<
|
||||||
&'a mut SimValue<Queue<StageOutput<C, S2>, StageOutputQueueSize<C, S2>>>,
|
Queue<
|
||||||
|
StageOutput<C, S1>,
|
||||||
|
StageOutputQueueSize<C, S1>,
|
||||||
|
StageWithQueuesOutputQueueName<StageName<C, S1>>,
|
||||||
|
>,
|
||||||
|
>,
|
||||||
|
&'a mut SimValue<
|
||||||
|
Queue<
|
||||||
|
StageOutput<C, S2>,
|
||||||
|
StageOutputQueueSize<C, S2>,
|
||||||
|
StageWithQueuesOutputQueueName<StageName<C, S2>>,
|
||||||
|
>,
|
||||||
|
>,
|
||||||
);
|
);
|
||||||
fn outputs_ty(config: C) -> Self::Outputs {
|
fn outputs_ty(config: C) -> Self::Outputs {
|
||||||
(S1::output_ty(config), S2::output_ty(config))
|
(S1::output_ty(config), S2::output_ty(config))
|
||||||
|
|
@ -474,6 +520,14 @@ impl<C: PhantomConstCpuConfig, S1: Stage<C>, S2: Stage<C>> Stages<C> for (S1, S2
|
||||||
visitor.visit(s1);
|
visitor.visit(s1);
|
||||||
visitor.visit(s2);
|
visitor.visit(s2);
|
||||||
}
|
}
|
||||||
|
#[hdl]
|
||||||
|
fn dump_output_items(outputs: &SimValue<Self::Outputs>) -> String {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let (s1, s2) = outputs;
|
||||||
|
let s1 = S1::dump_output_item(s1);
|
||||||
|
let s2 = S2::dump_output_item(s2);
|
||||||
|
format!("({s1}, {s2})")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trait StagesVisitSimValueRef<C: PhantomConstCpuConfig> {
|
trait StagesVisitSimValueRef<C: PhantomConstCpuConfig> {
|
||||||
|
|
@ -596,6 +650,7 @@ trait Stage<C: PhantomConstCpuConfig>: Type + SimValueDefault + ResetSteps {
|
||||||
type InputQueueSize: Size;
|
type InputQueueSize: Size;
|
||||||
type OutputQueueSize: Size;
|
type OutputQueueSize: Size;
|
||||||
const HAS_EXTERNAL_PIPE: bool;
|
const HAS_EXTERNAL_PIPE: bool;
|
||||||
|
const NAME: &'static str;
|
||||||
|
|
||||||
fn inputs_ty(config: C) -> Self::Inputs;
|
fn inputs_ty(config: C) -> Self::Inputs;
|
||||||
fn output_ty(config: C) -> Self::Output;
|
fn output_ty(config: C) -> Self::Output;
|
||||||
|
|
@ -620,6 +675,8 @@ trait Stage<C: PhantomConstCpuConfig>: Type + SimValueDefault + ResetSteps {
|
||||||
from_external_pipe_output_item: &SimValue<Self::FromExternalPipeOutputItem>,
|
from_external_pipe_output_item: &SimValue<Self::FromExternalPipeOutputItem>,
|
||||||
) -> SimValue<Self::Inputs>;
|
) -> SimValue<Self::Inputs>;
|
||||||
|
|
||||||
|
fn dump_output_item(item: &SimValue<Self::Output>) -> String;
|
||||||
|
|
||||||
/// see [`StageRunOutput`] for docs on output
|
/// see [`StageRunOutput`] for docs on output
|
||||||
fn run(
|
fn run(
|
||||||
state: &mut SimValue<Self>,
|
state: &mut SimValue<Self>,
|
||||||
|
|
@ -635,7 +692,7 @@ macro_rules! hdl_type_alias_with_generics {
|
||||||
#[ty = $ty:expr]
|
#[ty = $ty:expr]
|
||||||
$vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty;
|
$vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty;
|
||||||
) => {
|
) => {
|
||||||
$vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<$C>>::Type;
|
$vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<($C, $Arg)>>::Type;
|
||||||
|
|
||||||
$vis struct $WithoutGenerics {}
|
$vis struct $WithoutGenerics {}
|
||||||
|
|
||||||
|
|
@ -668,7 +725,7 @@ macro_rules! hdl_type_alias_with_generics {
|
||||||
#[size = $size:expr]
|
#[size = $size:expr]
|
||||||
$vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty;
|
$vis:vis type $Type:ident<$C:ident: $PhantomConstCpuConfig:ident, $Arg:ident: $Trait:ident<$TraitC:ident>> = $Target:ty;
|
||||||
) => {
|
) => {
|
||||||
$vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<$C>>::Type;
|
$vis type $Type<$C, $Arg> = <$Target as fayalite::phantom_const::ReturnSelfUnchanged<($C, $Arg)>>::Type;
|
||||||
|
|
||||||
$vis struct $WithoutGenerics {}
|
$vis struct $WithoutGenerics {}
|
||||||
|
|
||||||
|
|
@ -734,6 +791,12 @@ hdl_type_alias_with_generics! {
|
||||||
type StageFromExternalPipeOutputItem<C: PhantomConstCpuConfig, T: Stage<C>> = <T as Stage<C>>::FromExternalPipeOutputItem;
|
type StageFromExternalPipeOutputItem<C: PhantomConstCpuConfig, T: Stage<C>> = <T as Stage<C>>::FromExternalPipeOutputItem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hdl_type_alias_with_generics! {
|
||||||
|
#[without_generics = StageNameWithoutGenerics, StageNameWithStage]
|
||||||
|
#[ty = |_config, _stage| PhantomConst::new(<T as Stage<C>>::NAME)]
|
||||||
|
type StageName<C: PhantomConstCpuConfig, T: Stage<C>> = PhantomConst<str>;
|
||||||
|
}
|
||||||
|
|
||||||
hdl_type_alias_with_generics! {
|
hdl_type_alias_with_generics! {
|
||||||
#[without_generics = StageMaxOutputsPerStepWithoutGenerics, StageMaxOutputsPerStepWithStage]
|
#[without_generics = StageMaxOutputsPerStepWithoutGenerics, StageMaxOutputsPerStepWithStage]
|
||||||
#[size = |config: C, _stage| T::max_outputs_per_step(config)]
|
#[size = |config: C, _stage| T::max_outputs_per_step(config)]
|
||||||
|
|
@ -880,9 +943,10 @@ impl<C: PhantomConstCpuConfig> Stage<C> for NextPcStageState<C> {
|
||||||
type FromExternalPipeOutputItem = ();
|
type FromExternalPipeOutputItem = ();
|
||||||
type MaxOutputsPerStep = ConstUsize<1>;
|
type MaxOutputsPerStep = ConstUsize<1>;
|
||||||
type ExternalPipeIoWidth = ConstUsize<1>;
|
type ExternalPipeIoWidth = ConstUsize<1>;
|
||||||
type InputQueueSize = ConstUsize<1>;
|
type InputQueueSize = ConstUsize<2>;
|
||||||
type OutputQueueSize = ConstUsize<1>;
|
type OutputQueueSize = ConstUsize<2>;
|
||||||
const HAS_EXTERNAL_PIPE: bool = false;
|
const HAS_EXTERNAL_PIPE: bool = false;
|
||||||
|
const NAME: &'static str = "next_pc";
|
||||||
|
|
||||||
fn inputs_ty(_config: C) -> Self::Inputs {
|
fn inputs_ty(_config: C) -> Self::Inputs {
|
||||||
()
|
()
|
||||||
|
|
@ -945,6 +1009,24 @@ impl<C: PhantomConstCpuConfig> Stage<C> for NextPcStageState<C> {
|
||||||
()
|
()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[hdl]
|
||||||
|
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let NextPcStageOutput::<_> {
|
||||||
|
start_pc,
|
||||||
|
next_start_pc: _,
|
||||||
|
btb_entry: _,
|
||||||
|
fetch_block_id,
|
||||||
|
start_call_stack: _,
|
||||||
|
config: _,
|
||||||
|
} = item;
|
||||||
|
format!(
|
||||||
|
"fid={:#x} pc={:#x}",
|
||||||
|
fetch_block_id.as_int(),
|
||||||
|
start_pc.as_int(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
#[hdl]
|
#[hdl]
|
||||||
fn run(
|
fn run(
|
||||||
state: &mut SimValue<Self>,
|
state: &mut SimValue<Self>,
|
||||||
|
|
@ -1114,6 +1196,8 @@ impl<C: PhantomConstCpuConfig> Stage<C> for NextPcStageState<C> {
|
||||||
|
|
||||||
#[hdl(no_static)]
|
#[hdl(no_static)]
|
||||||
struct BrPredStageOutput<C: PhantomConstGet<CpuConfig>> {
|
struct BrPredStageOutput<C: PhantomConstGet<CpuConfig>> {
|
||||||
|
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
|
||||||
|
start_pc: UInt<64>,
|
||||||
start_branch_history: UInt<6>,
|
start_branch_history: UInt<6>,
|
||||||
branch_predictor_index: HdlOption<UIntInRange<0, { BRANCH_PREDICTOR_SIZE }>>,
|
branch_predictor_index: HdlOption<UIntInRange<0, { BRANCH_PREDICTOR_SIZE }>>,
|
||||||
config: C,
|
config: C,
|
||||||
|
|
@ -1124,6 +1208,8 @@ impl<C: PhantomConstCpuConfig> SimValueDefault for BrPredStageOutput<C> {
|
||||||
fn sim_value_default(self) -> SimValue<Self> {
|
fn sim_value_default(self) -> SimValue<Self> {
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
Self {
|
Self {
|
||||||
|
fetch_block_id: self.fetch_block_id.zero(),
|
||||||
|
start_pc: 0u64,
|
||||||
start_branch_history: self.start_branch_history.zero(),
|
start_branch_history: self.start_branch_history.zero(),
|
||||||
branch_predictor_index: #[hdl(sim)]
|
branch_predictor_index: #[hdl(sim)]
|
||||||
HdlNone(),
|
HdlNone(),
|
||||||
|
|
@ -1203,9 +1289,10 @@ impl<C: PhantomConstCpuConfig> Stage<C> for BrPredStageState<C> {
|
||||||
type FromExternalPipeOutputItem = ();
|
type FromExternalPipeOutputItem = ();
|
||||||
type MaxOutputsPerStep = ConstUsize<1>;
|
type MaxOutputsPerStep = ConstUsize<1>;
|
||||||
type ExternalPipeIoWidth = ConstUsize<1>;
|
type ExternalPipeIoWidth = ConstUsize<1>;
|
||||||
type InputQueueSize = ConstUsize<1>;
|
type InputQueueSize = ConstUsize<2>;
|
||||||
type OutputQueueSize = CpuConfigMaxFetchesInFlight<C>;
|
type OutputQueueSize = CpuConfigMaxFetchesInFlight<C>;
|
||||||
const HAS_EXTERNAL_PIPE: bool = false;
|
const HAS_EXTERNAL_PIPE: bool = false;
|
||||||
|
const NAME: &'static str = "br_pred";
|
||||||
|
|
||||||
fn inputs_ty(config: C) -> Self::Inputs {
|
fn inputs_ty(config: C) -> Self::Inputs {
|
||||||
NextPcStageOutput[config]
|
NextPcStageOutput[config]
|
||||||
|
|
@ -1265,6 +1352,23 @@ impl<C: PhantomConstCpuConfig> Stage<C> for BrPredStageState<C> {
|
||||||
input_stages_outputs.clone()
|
input_stages_outputs.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[hdl]
|
||||||
|
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let BrPredStageOutput::<_> {
|
||||||
|
fetch_block_id,
|
||||||
|
start_pc,
|
||||||
|
start_branch_history: _,
|
||||||
|
branch_predictor_index: _,
|
||||||
|
config: _,
|
||||||
|
} = item;
|
||||||
|
format!(
|
||||||
|
"fid={:#x} pc={:#x}",
|
||||||
|
fetch_block_id.as_int(),
|
||||||
|
start_pc.as_int(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
#[hdl]
|
#[hdl]
|
||||||
fn run(
|
fn run(
|
||||||
state: &mut SimValue<Self>,
|
state: &mut SimValue<Self>,
|
||||||
|
|
@ -1277,7 +1381,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for BrPredStageState<C> {
|
||||||
start_pc,
|
start_pc,
|
||||||
next_start_pc: _,
|
next_start_pc: _,
|
||||||
btb_entry,
|
btb_entry,
|
||||||
fetch_block_id: _,
|
fetch_block_id,
|
||||||
start_call_stack,
|
start_call_stack,
|
||||||
config: _,
|
config: _,
|
||||||
} = inputs;
|
} = inputs;
|
||||||
|
|
@ -1343,6 +1447,8 @@ impl<C: PhantomConstCpuConfig> Stage<C> for BrPredStageState<C> {
|
||||||
}
|
}
|
||||||
let output = #[hdl(sim)]
|
let output = #[hdl(sim)]
|
||||||
BrPredStageOutput::<_> {
|
BrPredStageOutput::<_> {
|
||||||
|
fetch_block_id,
|
||||||
|
start_pc,
|
||||||
start_branch_history,
|
start_branch_history,
|
||||||
branch_predictor_index,
|
branch_predictor_index,
|
||||||
config,
|
config,
|
||||||
|
|
@ -1450,8 +1556,9 @@ impl<C: PhantomConstCpuConfig> Stage<C> for FetchDecodeStageState<C> {
|
||||||
type MaxOutputsPerStep = ConstUsize<1>;
|
type MaxOutputsPerStep = ConstUsize<1>;
|
||||||
type ExternalPipeIoWidth = ConstUsize<1>;
|
type ExternalPipeIoWidth = ConstUsize<1>;
|
||||||
type InputQueueSize = CpuConfigMaxFetchesInFlight<C>;
|
type InputQueueSize = CpuConfigMaxFetchesInFlight<C>;
|
||||||
type OutputQueueSize = ConstUsize<1>;
|
type OutputQueueSize = ConstUsize<2>;
|
||||||
const HAS_EXTERNAL_PIPE: bool = true;
|
const HAS_EXTERNAL_PIPE: bool = true;
|
||||||
|
const NAME: &'static str = "fetch_decode";
|
||||||
|
|
||||||
fn inputs_ty(config: C) -> Self::Inputs {
|
fn inputs_ty(config: C) -> Self::Inputs {
|
||||||
FetchDecodeStageOutput[config]
|
FetchDecodeStageOutput[config]
|
||||||
|
|
@ -1513,6 +1620,44 @@ impl<C: PhantomConstCpuConfig> Stage<C> for FetchDecodeStageState<C> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[hdl]
|
||||||
|
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let FetchDecodeStageOutput::<_> {
|
||||||
|
next_pc_stage_output,
|
||||||
|
decode_output,
|
||||||
|
} = item;
|
||||||
|
#[hdl(sim)]
|
||||||
|
let NextPcStageOutput::<_> {
|
||||||
|
start_pc,
|
||||||
|
next_start_pc: _,
|
||||||
|
btb_entry: _,
|
||||||
|
fetch_block_id,
|
||||||
|
start_call_stack: _,
|
||||||
|
config: _,
|
||||||
|
} = next_pc_stage_output;
|
||||||
|
#[hdl(sim)]
|
||||||
|
let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output;
|
||||||
|
let mut items = vec![];
|
||||||
|
for insn in ArrayVec::elements_sim_ref(insns) {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let WipDecodedInsn {
|
||||||
|
fetch_block_id: _,
|
||||||
|
id,
|
||||||
|
pc,
|
||||||
|
size_in_bytes: _,
|
||||||
|
kind: _,
|
||||||
|
} = insn;
|
||||||
|
items.push(format!("id={id} pc={:#x}", pc.as_int()));
|
||||||
|
}
|
||||||
|
format!(
|
||||||
|
"fid={:#x} pc={:#x} [{}]",
|
||||||
|
fetch_block_id.as_int(),
|
||||||
|
start_pc.as_int(),
|
||||||
|
items.join(", "),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
#[hdl]
|
#[hdl]
|
||||||
fn run(
|
fn run(
|
||||||
state: &mut SimValue<Self>,
|
state: &mut SimValue<Self>,
|
||||||
|
|
@ -1522,6 +1667,36 @@ impl<C: PhantomConstCpuConfig> Stage<C> for FetchDecodeStageState<C> {
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
let Self { config } = state;
|
let Self { config } = state;
|
||||||
let config = config.ty();
|
let config = config.ty();
|
||||||
|
#[hdl(sim)]
|
||||||
|
let FetchDecodeStageOutput::<_> {
|
||||||
|
next_pc_stage_output,
|
||||||
|
decode_output,
|
||||||
|
} = inputs;
|
||||||
|
#[hdl(sim)]
|
||||||
|
let NextPcStageOutput::<_> {
|
||||||
|
start_pc,
|
||||||
|
next_start_pc: _,
|
||||||
|
btb_entry: _,
|
||||||
|
fetch_block_id,
|
||||||
|
start_call_stack: _,
|
||||||
|
config: _,
|
||||||
|
} = next_pc_stage_output;
|
||||||
|
#[hdl(sim)]
|
||||||
|
let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output;
|
||||||
|
for (i, insn) in ArrayVec::elements_sim_ref(insns).iter().enumerate() {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let WipDecodedInsn {
|
||||||
|
fetch_block_id: insn_fetch_block_id,
|
||||||
|
id: _,
|
||||||
|
pc: insn_pc,
|
||||||
|
size_in_bytes: _,
|
||||||
|
kind: _,
|
||||||
|
} = insn;
|
||||||
|
assert_eq!(insn_fetch_block_id, fetch_block_id);
|
||||||
|
if i == 0 {
|
||||||
|
assert_eq!(insn_pc, start_pc);
|
||||||
|
}
|
||||||
|
}
|
||||||
let StageRunOutput { outputs, cancel } = StageRunOutput[config][this_ty];
|
let StageRunOutput { outputs, cancel } = StageRunOutput[config][this_ty];
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
StageRunOutput::<_, _> {
|
StageRunOutput::<_, _> {
|
||||||
|
|
@ -1618,9 +1793,10 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
|
||||||
type FromExternalPipeOutputItem = ();
|
type FromExternalPipeOutputItem = ();
|
||||||
type MaxOutputsPerStep = CpuConfigFetchWidth<C>;
|
type MaxOutputsPerStep = CpuConfigFetchWidth<C>;
|
||||||
type ExternalPipeIoWidth = ConstUsize<1>;
|
type ExternalPipeIoWidth = ConstUsize<1>;
|
||||||
type InputQueueSize = ConstUsize<1>;
|
type InputQueueSize = ConstUsize<2>;
|
||||||
type OutputQueueSize = TwiceCpuConfigFetchWidth<C>;
|
type OutputQueueSize = TwiceCpuConfigFetchWidth<C>;
|
||||||
const HAS_EXTERNAL_PIPE: bool = false;
|
const HAS_EXTERNAL_PIPE: bool = false;
|
||||||
|
const NAME: &'static str = "post_decode";
|
||||||
|
|
||||||
fn inputs_ty(config: C) -> Self::Inputs {
|
fn inputs_ty(config: C) -> Self::Inputs {
|
||||||
PostDecodeStageInput[config]
|
PostDecodeStageInput[config]
|
||||||
|
|
@ -1686,6 +1862,23 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[hdl]
|
||||||
|
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let WipDecodedInsn {
|
||||||
|
fetch_block_id,
|
||||||
|
id,
|
||||||
|
pc,
|
||||||
|
size_in_bytes: _,
|
||||||
|
kind: _,
|
||||||
|
} = &item.insn;
|
||||||
|
format!(
|
||||||
|
"fid={:#x} id={id} pc={:#x}",
|
||||||
|
fetch_block_id.as_int(),
|
||||||
|
pc.as_int(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
#[hdl]
|
#[hdl]
|
||||||
fn run(
|
fn run(
|
||||||
state: &mut SimValue<Self>,
|
state: &mut SimValue<Self>,
|
||||||
|
|
@ -1718,10 +1911,14 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
|
||||||
let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output;
|
let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output;
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
let BrPredStageOutput::<_> {
|
let BrPredStageOutput::<_> {
|
||||||
|
start_pc: br_pred_start_pc,
|
||||||
|
fetch_block_id: br_pred_fetch_block_id,
|
||||||
start_branch_history,
|
start_branch_history,
|
||||||
branch_predictor_index,
|
branch_predictor_index,
|
||||||
config: _,
|
config: _,
|
||||||
} = br_pred_stage_output;
|
} = br_pred_stage_output;
|
||||||
|
assert_eq!(start_pc, br_pred_start_pc);
|
||||||
|
assert_eq!(fetch_block_id, br_pred_fetch_block_id);
|
||||||
assert_ne!(
|
assert_ne!(
|
||||||
**ArrayVec::len_sim(&insns),
|
**ArrayVec::len_sim(&insns),
|
||||||
0,
|
0,
|
||||||
|
|
@ -2039,6 +2236,9 @@ struct ExecuteRetireStageState<C: PhantomConstGet<CpuConfig> + PhantomConstCpuCo
|
||||||
#[hdl(no_static)]
|
#[hdl(no_static)]
|
||||||
struct ExecuteRetireStageOutput<C: PhantomConstGet<CpuConfig>> {
|
struct ExecuteRetireStageOutput<C: PhantomConstGet<CpuConfig>> {
|
||||||
train_branch_predictor: HdlOption<TrainBranchPredictor>,
|
train_branch_predictor: HdlOption<TrainBranchPredictor>,
|
||||||
|
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
|
||||||
|
id: UInt<12>,
|
||||||
|
pc: UInt<64>,
|
||||||
config: C,
|
config: C,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2047,12 +2247,18 @@ impl<C: PhantomConstCpuConfig> SimValueDefault for ExecuteRetireStageOutput<C> {
|
||||||
fn sim_value_default(self) -> SimValue<Self> {
|
fn sim_value_default(self) -> SimValue<Self> {
|
||||||
let Self {
|
let Self {
|
||||||
train_branch_predictor,
|
train_branch_predictor,
|
||||||
|
fetch_block_id,
|
||||||
|
id,
|
||||||
|
pc: _,
|
||||||
config,
|
config,
|
||||||
} = self;
|
} = self;
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
Self {
|
Self {
|
||||||
train_branch_predictor: #[hdl(sim)]
|
train_branch_predictor: #[hdl(sim)]
|
||||||
train_branch_predictor.HdlNone(),
|
train_branch_predictor.HdlNone(),
|
||||||
|
fetch_block_id: fetch_block_id.zero(),
|
||||||
|
id: id.zero(),
|
||||||
|
pc: 0u64,
|
||||||
config,
|
config,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2089,6 +2295,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
|
||||||
type InputQueueSize = CpuConfigRobSize<C>;
|
type InputQueueSize = CpuConfigRobSize<C>;
|
||||||
type OutputQueueSize = CpuConfigFetchWidth<C>;
|
type OutputQueueSize = CpuConfigFetchWidth<C>;
|
||||||
const HAS_EXTERNAL_PIPE: bool = true;
|
const HAS_EXTERNAL_PIPE: bool = true;
|
||||||
|
const NAME: &'static str = "execute_retire";
|
||||||
|
|
||||||
fn inputs_ty(config: C) -> Self::Inputs {
|
fn inputs_ty(config: C) -> Self::Inputs {
|
||||||
ExecuteRetireStageInput[config]
|
ExecuteRetireStageInput[config]
|
||||||
|
|
@ -2150,6 +2357,23 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[hdl]
|
||||||
|
fn dump_output_item(item: &SimValue<Self::Output>) -> String {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let ExecuteRetireStageOutput::<_> {
|
||||||
|
train_branch_predictor: _,
|
||||||
|
fetch_block_id,
|
||||||
|
id,
|
||||||
|
pc,
|
||||||
|
config: _,
|
||||||
|
} = item;
|
||||||
|
format!(
|
||||||
|
"fid={:#x} id={id} pc={:#x}",
|
||||||
|
fetch_block_id.as_int(),
|
||||||
|
pc.as_int(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
#[hdl]
|
#[hdl]
|
||||||
fn run(
|
fn run(
|
||||||
state: &mut SimValue<Self>,
|
state: &mut SimValue<Self>,
|
||||||
|
|
@ -2318,6 +2542,9 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
ExecuteRetireStageOutput::<_> {
|
ExecuteRetireStageOutput::<_> {
|
||||||
train_branch_predictor,
|
train_branch_predictor,
|
||||||
|
fetch_block_id: &insn.fetch_block_id,
|
||||||
|
id,
|
||||||
|
pc: insn.pc,
|
||||||
config,
|
config,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
|
@ -2341,6 +2568,9 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
ExecuteRetireStageOutput::<_> {
|
ExecuteRetireStageOutput::<_> {
|
||||||
train_branch_predictor,
|
train_branch_predictor,
|
||||||
|
fetch_block_id: &insn.fetch_block_id,
|
||||||
|
id,
|
||||||
|
pc: insn.pc,
|
||||||
config,
|
config,
|
||||||
},
|
},
|
||||||
]),
|
]),
|
||||||
|
|
@ -2896,7 +3126,7 @@ impl ResetSteps for BranchTargetBuffer {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[hdl]
|
#[hdl]
|
||||||
struct Queue<T, Capacity: Size> {
|
struct Queue<T, Capacity: Size, Name: PhantomConstGet<str>> {
|
||||||
data: ArrayType<T, Capacity>,
|
data: ArrayType<T, Capacity>,
|
||||||
/// inclusive
|
/// inclusive
|
||||||
start: UIntInRangeType<ConstUsize<0>, Capacity>,
|
start: UIntInRangeType<ConstUsize<0>, Capacity>,
|
||||||
|
|
@ -2904,9 +3134,18 @@ struct Queue<T, Capacity: Size> {
|
||||||
end: UIntInRangeType<ConstUsize<0>, Capacity>,
|
end: UIntInRangeType<ConstUsize<0>, Capacity>,
|
||||||
/// used to disambiguate between a full and an empty queue
|
/// used to disambiguate between a full and an empty queue
|
||||||
eq_start_end_means_full: Bool,
|
eq_start_end_means_full: Bool,
|
||||||
|
name: Name,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Type, Capacity: Size> Queue<T, Capacity> {
|
impl<T: Type, Capacity: Size, Name: Type + PhantomConstGet<str>> Queue<T, Capacity, Name> {
|
||||||
|
fn debug_op(self, fn_name: &str, data: &SimValue<T>) {
|
||||||
|
println!("Queue::<_, _, {:?}>::{fn_name}: {data:#?}", self.name);
|
||||||
|
}
|
||||||
|
fn dump(this: &SimValue<Self>, dump_item: impl Fn(&SimValue<T>) -> String) {
|
||||||
|
let name = this.name.ty().get();
|
||||||
|
let items = Vec::from_iter(Self::peek_iter(this).map(|v| DebugAsDisplay(dump_item(&v))));
|
||||||
|
println!("Queue {name}: {items:#?}");
|
||||||
|
}
|
||||||
fn capacity(self) -> usize {
|
fn capacity(self) -> usize {
|
||||||
self.data.len()
|
self.data.len()
|
||||||
}
|
}
|
||||||
|
|
@ -2951,8 +3190,11 @@ impl<T: Type, Capacity: Size> Queue<T, Capacity> {
|
||||||
let end = *this.end;
|
let end = *this.end;
|
||||||
*this.end = this.ty().next_pos(end);
|
*this.end = this.ty().next_pos(end);
|
||||||
*this.eq_start_end_means_full = true;
|
*this.eq_start_end_means_full = true;
|
||||||
|
let this_ty = this.ty();
|
||||||
let data = &mut this.data[end];
|
let data = &mut this.data[end];
|
||||||
*data = dbg!(value.to_sim_value_with_type(data.ty()));
|
let value = value.to_sim_value_with_type(data.ty());
|
||||||
|
this_ty.debug_op("push", &value);
|
||||||
|
*data = value;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2963,6 +3205,7 @@ impl<T: Type, Capacity: Size> Queue<T, Capacity> {
|
||||||
let end = this.ty().prev_pos(*this.end);
|
let end = this.ty().prev_pos(*this.end);
|
||||||
*this.end = end;
|
*this.end = end;
|
||||||
let data = this.data[end].clone();
|
let data = this.data[end].clone();
|
||||||
|
this.ty().debug_op("undo_push", &data);
|
||||||
*this.eq_start_end_means_full = false;
|
*this.eq_start_end_means_full = false;
|
||||||
Some(data)
|
Some(data)
|
||||||
}
|
}
|
||||||
|
|
@ -2976,9 +3219,8 @@ impl<T: Type, Capacity: Size> Queue<T, Capacity> {
|
||||||
}
|
}
|
||||||
fn peek_iter(
|
fn peek_iter(
|
||||||
this: &SimValue<Self>,
|
this: &SimValue<Self>,
|
||||||
) -> impl Clone + DoubleEndedIterator<Item = SimValue<T>> + ExactSizeIterator {
|
) -> impl Clone + DoubleEndedIterator<Item = &SimValue<T>> + ExactSizeIterator {
|
||||||
(0..Self::len(this))
|
(0..Self::len(this)).map(|nth| &this.data[this.ty().nth_pos_after(*this.start, nth)])
|
||||||
.map(|nth| dbg!(this.data[this.ty().nth_pos_after(*this.start, nth)].clone()))
|
|
||||||
}
|
}
|
||||||
fn pop(this: &mut SimValue<Self>) -> Option<SimValue<T>> {
|
fn pop(this: &mut SimValue<Self>) -> Option<SimValue<T>> {
|
||||||
if Self::is_empty(this) {
|
if Self::is_empty(this) {
|
||||||
|
|
@ -2987,13 +3229,14 @@ impl<T: Type, Capacity: Size> Queue<T, Capacity> {
|
||||||
let start = *this.start;
|
let start = *this.start;
|
||||||
*this.start = this.ty().next_pos(start);
|
*this.start = this.ty().next_pos(start);
|
||||||
let data = this.data[start].clone();
|
let data = this.data[start].clone();
|
||||||
|
this.ty().debug_op("pop", &data);
|
||||||
*this.eq_start_end_means_full = false;
|
*this.eq_start_end_means_full = false;
|
||||||
Some(data)
|
Some(data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity> {
|
impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity, PhantomConst<str>> {
|
||||||
#[hdl]
|
#[hdl]
|
||||||
fn sim_value_default(self) -> SimValue<Self> {
|
fn sim_value_default(self) -> SimValue<Self> {
|
||||||
let Self {
|
let Self {
|
||||||
|
|
@ -3001,9 +3244,10 @@ impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity>
|
||||||
start,
|
start,
|
||||||
end,
|
end,
|
||||||
eq_start_end_means_full: _,
|
eq_start_end_means_full: _,
|
||||||
|
name,
|
||||||
} = self;
|
} = self;
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
Queue::<T, Capacity> {
|
Queue::<T, Capacity, _> {
|
||||||
data: repeat(
|
data: repeat(
|
||||||
data.element().sim_value_default(),
|
data.element().sim_value_default(),
|
||||||
Capacity::from_usize(data.len()),
|
Capacity::from_usize(data.len()),
|
||||||
|
|
@ -3011,19 +3255,21 @@ impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity>
|
||||||
start: 0usize.to_sim_value_with_type(start),
|
start: 0usize.to_sim_value_with_type(start),
|
||||||
end: 0usize.to_sim_value_with_type(end),
|
end: 0usize.to_sim_value_with_type(end),
|
||||||
eq_start_end_means_full: false,
|
eq_start_end_means_full: false,
|
||||||
|
name,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: SimValueDefault, Capacity: Size> ResetSteps for Queue<T, Capacity> {
|
impl<T: SimValueDefault, Capacity: Size> ResetSteps for Queue<T, Capacity, PhantomConst<str>> {
|
||||||
#[hdl]
|
#[hdl]
|
||||||
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
|
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
let Queue::<T, Capacity> {
|
let Queue::<T, Capacity, _> {
|
||||||
data,
|
data,
|
||||||
start,
|
start,
|
||||||
end,
|
end,
|
||||||
eq_start_end_means_full,
|
eq_start_end_means_full,
|
||||||
|
name: _,
|
||||||
} = this;
|
} = this;
|
||||||
**start = 0;
|
**start = 0;
|
||||||
**end = 0;
|
**end = 0;
|
||||||
|
|
@ -3054,11 +3300,25 @@ impl<C: PhantomConstCpuConfig, S: Stage<C>> CancelInProgressForStageWithQueues<C
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[hdl(get(|name| PhantomConst::new_deref(format!("{name}.input_queue"))))]
|
||||||
|
type StageWithQueuesInputQueueName<Name: PhantomConstGet<str>> = PhantomConst<str>;
|
||||||
|
|
||||||
|
#[hdl(get(|name| PhantomConst::new_deref(format!("{name}.output_queue"))))]
|
||||||
|
type StageWithQueuesOutputQueueName<Name: PhantomConstGet<str>> = PhantomConst<str>;
|
||||||
|
|
||||||
#[hdl(no_static)]
|
#[hdl(no_static)]
|
||||||
struct StageWithQueues<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig, S: Type + Stage<C>> {
|
struct StageWithQueues<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig, S: Type + Stage<C>> {
|
||||||
input_queue: Queue<StageInputStagesOutputs<C, S>, StageInputQueueSize<C, S>>,
|
input_queue: Queue<
|
||||||
|
StageInputStagesOutputs<C, S>,
|
||||||
|
StageInputQueueSize<C, S>,
|
||||||
|
StageWithQueuesInputQueueName<StageName<C, S>>,
|
||||||
|
>,
|
||||||
state: S,
|
state: S,
|
||||||
output_queue: Queue<StageOutput<C, S>, StageOutputQueueSize<C, S>>,
|
output_queue: Queue<
|
||||||
|
StageOutput<C, S>,
|
||||||
|
StageOutputQueueSize<C, S>,
|
||||||
|
StageWithQueuesOutputQueueName<StageName<C, S>>,
|
||||||
|
>,
|
||||||
config: C,
|
config: C,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3169,6 +3429,18 @@ enum StageWithQueuesRunResult<C: PhantomConstCpuConfig, S: Stage<C>> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> {
|
impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> {
|
||||||
|
#[hdl]
|
||||||
|
fn dump_queues(this: &SimValue<Self>) {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let Self {
|
||||||
|
input_queue,
|
||||||
|
state: _,
|
||||||
|
output_queue,
|
||||||
|
config: _,
|
||||||
|
} = this;
|
||||||
|
Queue::dump(input_queue, S::InputStages::dump_output_items);
|
||||||
|
Queue::dump(output_queue, S::dump_output_item);
|
||||||
|
}
|
||||||
fn input_queue_space_left_with_sibling(
|
fn input_queue_space_left_with_sibling(
|
||||||
this: &SimValue<Self>,
|
this: &SimValue<Self>,
|
||||||
sibling: &<S::SiblingStage as SiblingStageOrUnit<C, S>>::SimValueStageWithQueues,
|
sibling: &<S::SiblingStage as SiblingStageOrUnit<C, S>>::SimValueStageWithQueues,
|
||||||
|
|
@ -3341,7 +3613,7 @@ impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> {
|
||||||
state,
|
state,
|
||||||
output_queue,
|
output_queue,
|
||||||
config,
|
config,
|
||||||
} = this;
|
} = &mut *this;
|
||||||
let config = config.ty();
|
let config = config.ty();
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
let StageWithQueuesInputs::<_, _> {
|
let StageWithQueuesInputs::<_, _> {
|
||||||
|
|
@ -3659,11 +3931,39 @@ impl<C: PhantomConstCpuConfig> AllStages<C> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[hdl]
|
#[hdl]
|
||||||
|
fn get_execute_retire_output(
|
||||||
|
this: &SimValue<Self>,
|
||||||
|
) -> (usize, Option<SimValue<TrainBranchPredictor>>) {
|
||||||
|
let config = this.config.ty();
|
||||||
|
let mut retire_count = 0usize;
|
||||||
|
for execute_retire_output in
|
||||||
|
Queue::peek_iter(&this.execute_retire.output_queue).take(config.get().fetch_width.get())
|
||||||
|
{
|
||||||
|
retire_count += 1;
|
||||||
|
#[hdl(sim)]
|
||||||
|
let ExecuteRetireStageOutput::<_> {
|
||||||
|
train_branch_predictor,
|
||||||
|
fetch_block_id: _,
|
||||||
|
id: _,
|
||||||
|
pc: _,
|
||||||
|
config: _,
|
||||||
|
} = &execute_retire_output;
|
||||||
|
#[hdl(sim)]
|
||||||
|
if let HdlSome(train_branch_predictor) = train_branch_predictor {
|
||||||
|
// for now we only retire one conditional branch per clock cycle
|
||||||
|
// TODO: maybe improve later?
|
||||||
|
return (retire_count, Some(train_branch_predictor.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(retire_count, None)
|
||||||
|
}
|
||||||
|
#[hdl]
|
||||||
fn run(
|
fn run(
|
||||||
this: &mut SimValue<Self>,
|
this: &mut SimValue<Self>,
|
||||||
inputs: &SimValue<AllStagesInputs<C>>,
|
inputs: &SimValue<AllStagesInputs<C>>,
|
||||||
last_outputs: &SimValue<AllStagesOutputs<C>>,
|
last_outputs: &SimValue<AllStagesOutputs<C>>,
|
||||||
) -> Result<(), SimValue<CancelInProgress<C>>> {
|
) -> Result<(), SimValue<CancelInProgress<C>>> {
|
||||||
|
let (retire_count, _) = Self::get_execute_retire_output(this);
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
let Self {
|
let Self {
|
||||||
next_pc,
|
next_pc,
|
||||||
|
|
@ -3672,9 +3972,16 @@ impl<C: PhantomConstCpuConfig> AllStages<C> {
|
||||||
post_decode,
|
post_decode,
|
||||||
execute_retire,
|
execute_retire,
|
||||||
config,
|
config,
|
||||||
} = this;
|
} = &mut *this;
|
||||||
let config = config.ty();
|
let config = config.ty();
|
||||||
let cancel_ty = CancelInProgress[config];
|
let cancel_ty = CancelInProgress[config];
|
||||||
|
for _ in 0..retire_count {
|
||||||
|
// items were handled in the previous clock cycle,
|
||||||
|
// but are removed only now so you can see them for debugging
|
||||||
|
let Some(_) = Queue::pop(&mut execute_retire.output_queue) else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
}
|
||||||
match StageWithQueues::run(
|
match StageWithQueues::run(
|
||||||
execute_retire,
|
execute_retire,
|
||||||
&inputs.execute_retire,
|
&inputs.execute_retire,
|
||||||
|
|
@ -3832,28 +4139,33 @@ impl<C: PhantomConstCpuConfig> AllStages<C> {
|
||||||
input_stages_outputs_popped_count: _,
|
input_stages_outputs_popped_count: _,
|
||||||
} => {}
|
} => {}
|
||||||
}
|
}
|
||||||
for _ in 0..config.get().fetch_width.get() {
|
match Self::get_execute_retire_output(this) {
|
||||||
let Some(execute_retire_output) = Queue::pop(&mut execute_retire.output_queue) else {
|
(_, Some(train_branch_predictor)) => BrPredStageState::train_branch_predictor(
|
||||||
break;
|
&mut this.br_pred.state,
|
||||||
};
|
&train_branch_predictor,
|
||||||
#[hdl(sim)]
|
),
|
||||||
let ExecuteRetireStageOutput::<_> {
|
(_, None) => {}
|
||||||
train_branch_predictor,
|
|
||||||
config: _,
|
|
||||||
} = &execute_retire_output;
|
|
||||||
#[hdl(sim)]
|
|
||||||
if let HdlSome(train_branch_predictor) = train_branch_predictor {
|
|
||||||
BrPredStageState::train_branch_predictor(
|
|
||||||
&mut br_pred.state,
|
|
||||||
train_branch_predictor,
|
|
||||||
);
|
|
||||||
// for now we only retire one conditional branch per clock cycle
|
|
||||||
// TODO: maybe improve later?
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
#[hdl]
|
||||||
|
fn dump_queues(this: &SimValue<Self>) {
|
||||||
|
#[hdl(sim)]
|
||||||
|
let Self {
|
||||||
|
next_pc,
|
||||||
|
br_pred,
|
||||||
|
fetch_decode,
|
||||||
|
post_decode,
|
||||||
|
execute_retire,
|
||||||
|
config: _,
|
||||||
|
} = this;
|
||||||
|
println!("Dump Queues:");
|
||||||
|
StageWithQueues::dump_queues(next_pc);
|
||||||
|
StageWithQueues::dump_queues(br_pred);
|
||||||
|
StageWithQueues::dump_queues(fetch_decode);
|
||||||
|
StageWithQueues::dump_queues(post_decode);
|
||||||
|
StageWithQueues::dump_queues(execute_retire);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[hdl(no_static)]
|
#[hdl(no_static)]
|
||||||
|
|
@ -4038,6 +4350,14 @@ pub fn next_pc(config: PhantomConst<CpuConfig>) {
|
||||||
sim.write(state_expr, state).await;
|
sim.write(state_expr, state).await;
|
||||||
sim.wait_for_clock_edge(cd.clk).await;
|
sim.wait_for_clock_edge(cd.clk).await;
|
||||||
state = sim.read_past(state_expr, cd.clk).await;
|
state = sim.read_past(state_expr, cd.clk).await;
|
||||||
|
AllStages::dump_queues(&state.all_stages);
|
||||||
|
let next_retire_insn_ids = sim.read_past(from_retire.next_insn_ids, cd.clk).await;
|
||||||
|
let next_retire_insn_ids = ArrayVec::elements_sim_ref(&next_retire_insn_ids);
|
||||||
|
let expected_next_retire_insn_ids = Vec::from_iter(
|
||||||
|
Queue::peek_iter(&state.all_stages.execute_retire.input_queue)
|
||||||
|
.map(|v| v.insn.id.clone()),
|
||||||
|
);
|
||||||
|
assert_eq!(next_retire_insn_ids, expected_next_retire_insn_ids);
|
||||||
let AllStagesInputs {
|
let AllStagesInputs {
|
||||||
next_pc,
|
next_pc,
|
||||||
br_pred,
|
br_pred,
|
||||||
|
|
@ -4058,7 +4378,16 @@ pub fn next_pc(config: PhantomConst<CpuConfig>) {
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
if let HdlSome(data) = sim.read_past(from_retire.inner.data, cd.clk).await {
|
if let HdlSome(data) = sim.read_past(from_retire.inner.data, cd.clk).await {
|
||||||
#[hdl(sim)]
|
#[hdl(sim)]
|
||||||
let RetireToNextPcInterfaceInner::<_> { insns, config: _ } = data;
|
let RetireToNextPcInterfaceInner::<_> {
|
||||||
|
mut insns,
|
||||||
|
config: _,
|
||||||
|
} = data;
|
||||||
|
if !sim.read_past_bool(from_retire.inner.ready, cd.clk).await {
|
||||||
|
// since we can have `outputs.execute_retire.from_external_pipe_output_ready > 0`
|
||||||
|
// without `from_retire.inner.ready` being set, make sure we don't retire any instructions in that case
|
||||||
|
ArrayVec::truncate_sim(&mut insns, 0);
|
||||||
|
}
|
||||||
|
println!("from retire: {:#?}", ArrayVec::elements_sim_ref(&insns));
|
||||||
insns
|
insns
|
||||||
} else {
|
} else {
|
||||||
execute_retire
|
execute_retire
|
||||||
|
|
@ -4148,7 +4477,8 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_queue() {
|
fn test_queue() {
|
||||||
let mut queue: SimValue<Queue<UInt<8>, ConstUsize<8>>> = Queue::TYPE.sim_value_default();
|
let mut queue: SimValue<Queue<UInt<8>, ConstUsize<8>, PhantomConst<str>>> =
|
||||||
|
Queue::TYPE.sim_value_default();
|
||||||
let mut reference_queue = VecDeque::new();
|
let mut reference_queue = VecDeque::new();
|
||||||
let mut tested_full = false;
|
let mut tested_full = false;
|
||||||
let mut tested_empty = false;
|
let mut tested_empty = false;
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -12,7 +12,11 @@ use cpu::{
|
||||||
unit::UnitKind,
|
unit::UnitKind,
|
||||||
util::array_vec::ArrayVec,
|
util::array_vec::ArrayVec,
|
||||||
};
|
};
|
||||||
use fayalite::{prelude::*, sim::vcd::VcdWriterDecls, util::RcWriter};
|
use fayalite::{
|
||||||
|
prelude::*,
|
||||||
|
sim::vcd::VcdWriterDecls,
|
||||||
|
util::{DebugAsDisplay, RcWriter},
|
||||||
|
};
|
||||||
use std::{
|
use std::{
|
||||||
cell::Cell,
|
cell::Cell,
|
||||||
collections::{BTreeMap, BTreeSet, VecDeque},
|
collections::{BTreeMap, BTreeSet, VecDeque},
|
||||||
|
|
@ -565,13 +569,31 @@ impl MockExecuteState {
|
||||||
#[hdl]
|
#[hdl]
|
||||||
fn try_retire(
|
fn try_retire(
|
||||||
&mut self,
|
&mut self,
|
||||||
) -> Option<Result<SimValue<RetireToNextPcInterfacePerInsn<PhantomConst<CpuConfig>>>, String>>
|
) -> Option<(
|
||||||
{
|
SimValue<RetireToNextPcInterfacePerInsn<PhantomConst<CpuConfig>>>,
|
||||||
|
Result<(), String>,
|
||||||
|
)> {
|
||||||
if self.queue.front()?.cycles_left.as_int() != 0 {
|
if self.queue.front()?.cycles_left.as_int() != 0 {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let entry = self.queue.pop_front()?;
|
let entry = self.queue.pop_front()?;
|
||||||
Some(self.do_retire(entry))
|
let id = entry.insn.id.clone();
|
||||||
|
Some(match self.do_retire(entry) {
|
||||||
|
Ok(v) => (v, Ok(())),
|
||||||
|
Err(e) => (
|
||||||
|
#[hdl(sim)]
|
||||||
|
RetireToNextPcInterfacePerInsn::<_> {
|
||||||
|
id,
|
||||||
|
next_pc: u64::from_be_bytes(*b"ErrError"),
|
||||||
|
call_stack_op: #[hdl(sim)]
|
||||||
|
CallStackOp::None(),
|
||||||
|
cond_br_taken: #[hdl(sim)]
|
||||||
|
HdlNone(),
|
||||||
|
config: self.config,
|
||||||
|
},
|
||||||
|
Err(e),
|
||||||
|
),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
fn space_available(&self) -> usize {
|
fn space_available(&self) -> usize {
|
||||||
EXECUTE_RETIRE_PIPE_QUEUE_SIZE.saturating_sub(self.queue.len())
|
EXECUTE_RETIRE_PIPE_QUEUE_SIZE.saturating_sub(self.queue.len())
|
||||||
|
|
@ -621,6 +643,11 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
|
||||||
retire_output.ty().inner.data.HdlNone(),
|
retire_output.ty().inner.data.HdlNone(),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
sim.write(
|
||||||
|
retire_output.next_insn_ids,
|
||||||
|
retire_output.next_insn_ids.ty().new_sim(0_hdl_u12),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
sim.write(
|
sim.write(
|
||||||
queue_debug,
|
queue_debug,
|
||||||
queue_debug
|
queue_debug
|
||||||
|
|
@ -672,30 +699,21 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
|
||||||
let mut sim_queue = queue_debug
|
let mut sim_queue = queue_debug
|
||||||
.ty()
|
.ty()
|
||||||
.new_sim(ExecuteRetirePipeQueueEntry.default_sim());
|
.new_sim(ExecuteRetirePipeQueueEntry.default_sim());
|
||||||
|
let mut next_insn_ids = retire_output.next_insn_ids.ty().new_sim(0_hdl_u12);
|
||||||
for entry in &state.queue {
|
for entry in &state.queue {
|
||||||
ArrayVec::try_push_sim(&mut sim_queue, entry)
|
ArrayVec::try_push_sim(&mut sim_queue, entry)
|
||||||
.ok()
|
.ok()
|
||||||
.expect("queue is known to be small enough");
|
.expect("queue is known to be small enough");
|
||||||
|
let _ = ArrayVec::try_push_sim(&mut next_insn_ids, &entry.insn.id);
|
||||||
}
|
}
|
||||||
sim.write(queue_debug, sim_queue).await;
|
sim.write(queue_debug, sim_queue).await;
|
||||||
|
sim.write(retire_output.next_insn_ids, next_insn_ids).await;
|
||||||
let mut retiring = retire_vec_ty.new_sim(&empty_retire_insn);
|
let mut retiring = retire_vec_ty.new_sim(&empty_retire_insn);
|
||||||
let mut peek_state = state.clone();
|
let mut peek_state = state.clone();
|
||||||
while let Some(peek_retire) = peek_state.try_retire() {
|
while let Some((peek_retire, result)) = peek_state.try_retire() {
|
||||||
if peek_retire.is_err() && **ArrayVec::len_sim(&retiring) > 0 {
|
if result.is_err() && **ArrayVec::len_sim(&retiring) > 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let peek_retire = peek_retire.unwrap_or_else(|_| {
|
|
||||||
#[hdl(sim)]
|
|
||||||
RetireToNextPcInterfacePerInsn::<_> {
|
|
||||||
id: 0_hdl_u12,
|
|
||||||
next_pc: u64::from_be_bytes(*b"ErrError"),
|
|
||||||
call_stack_op: #[hdl(sim)]
|
|
||||||
CallStackOp::None(),
|
|
||||||
cond_br_taken: #[hdl(sim)]
|
|
||||||
HdlNone(),
|
|
||||||
config,
|
|
||||||
}
|
|
||||||
});
|
|
||||||
let Ok(_) = ArrayVec::try_push_sim(&mut retiring, peek_retire) else {
|
let Ok(_) = ArrayVec::try_push_sim(&mut retiring, peek_retire) else {
|
||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
|
|
@ -723,11 +741,22 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
sim.wait_for_clock_edge(cd.clk).await;
|
sim.wait_for_clock_edge(cd.clk).await;
|
||||||
|
println!(
|
||||||
|
"Dump mock execute retire pipe queue: {:#?}",
|
||||||
|
Vec::from_iter(state.queue.iter().map(|v| {
|
||||||
|
DebugAsDisplay(format!(
|
||||||
|
"fid={:#x} id={} pc={:#x}",
|
||||||
|
v.insn.fetch_block_id.as_int(),
|
||||||
|
v.insn.id,
|
||||||
|
v.insn.pc.as_int(),
|
||||||
|
))
|
||||||
|
}))
|
||||||
|
);
|
||||||
if sim.read_past_bool(retire_output.inner.ready, cd.clk).await {
|
if sim.read_past_bool(retire_output.inner.ready, cd.clk).await {
|
||||||
for _ in 0..**ArrayVec::len_sim(&retiring) {
|
for _ in 0..**ArrayVec::len_sim(&retiring) {
|
||||||
match state.try_retire() {
|
match state.try_retire() {
|
||||||
Some(Ok(_)) => {}
|
Some((_, Ok(_))) => {}
|
||||||
Some(Err(e)) => panic!("retire error: {e}"),
|
Some((_, Err(e))) => panic!("retire error: {e}"),
|
||||||
None => unreachable!(),
|
None => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -737,7 +766,7 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
|
||||||
&mut new_insns,
|
&mut new_insns,
|
||||||
*sim.read_past(from_post_decode.ready, cd.clk).await,
|
*sim.read_past(from_post_decode.ready, cd.clk).await,
|
||||||
);
|
);
|
||||||
for insn in ArrayVec::elements_sim_ref(&new_insns) {
|
for insn in dbg!(ArrayVec::elements_sim_ref(&new_insns)) {
|
||||||
state.start(insn, delay_sequence_index);
|
state.start(insn, delay_sequence_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -803,7 +832,7 @@ fn test_next_pc() {
|
||||||
config.fetch_width = NonZeroUsize::new(2).unwrap();
|
config.fetch_width = NonZeroUsize::new(2).unwrap();
|
||||||
let m = dut(PhantomConst::new_sized(config));
|
let m = dut(PhantomConst::new_sized(config));
|
||||||
let mut sim = Simulation::new(m);
|
let mut sim = Simulation::new(m);
|
||||||
let mut writer = RcWriter::default();
|
let writer = RcWriter::default();
|
||||||
sim.add_trace_writer(VcdWriterDecls::new(writer.clone()));
|
sim.add_trace_writer(VcdWriterDecls::new(writer.clone()));
|
||||||
struct DumpVcdOnDrop {
|
struct DumpVcdOnDrop {
|
||||||
writer: Option<RcWriter>,
|
writer: Option<RcWriter>,
|
||||||
|
|
@ -823,6 +852,7 @@ fn test_next_pc() {
|
||||||
sim.write_reset(sim.io().cd.rst, true);
|
sim.write_reset(sim.io().cd.rst, true);
|
||||||
for _cycle in 0..300 {
|
for _cycle in 0..300 {
|
||||||
sim.advance_time(SimDuration::from_nanos(500));
|
sim.advance_time(SimDuration::from_nanos(500));
|
||||||
|
println!("clock tick");
|
||||||
sim.write_clock(sim.io().cd.clk, true);
|
sim.write_clock(sim.io().cd.clk, true);
|
||||||
sim.advance_time(SimDuration::from_nanos(500));
|
sim.advance_time(SimDuration::from_nanos(500));
|
||||||
sim.write_clock(sim.io().cd.clk, false);
|
sim.write_clock(sim.io().cd.clk, false);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue