Compare commits

..

1 commit

Author SHA1 Message Date
bf2cb688c7
implement register fences and use for L2 reg file writes and when running out of L2 reg file output regs
All checks were successful
/ test (pull_request) Successful in 6m25s
fixes deadlock when running rename_execute_retire_save_restore_gprs
2026-05-21 17:23:57 -07:00
6 changed files with 198708 additions and 95595 deletions

View file

@ -636,11 +636,15 @@ type SimOnlyMOpInUnitState = SimOnly<MOpInUnitState>;
#[hdl(no_static)]
struct RobEntryDebugState<C: PhantomConstGet<CpuConfig>> {
/// See [`RobEntry::is_register_fence`]
is_register_fence: Bool,
/// See [`RobEntry::is_register_fence`]
done_waiting_for_register_fences: Bool,
mop: MOpInstance<RenamedMOp<C>>,
unit_index: UIntInRangeType<ConstUsize<0>, CpuConfigUnitCount<C>>,
mop_in_unit_state: SimOnlyMOpInUnitState,
is_speculative: Bool,
all_prior_mops_are_finished: Bool,
all_prior_mops_finished_and_or_caused_cancel: Bool,
output: HdlOption<NextPcPredictorOp<C>>,
caused_cancel: HdlOption<UnitCausedCancel<C>>,
}
@ -650,20 +654,24 @@ impl<C: PhantomConstCpuConfig> SimValueDefault for RobEntryDebugState<C> {
fn sim_value_default(self) -> SimValue<Self> {
let Self {
mop,
is_register_fence: _,
done_waiting_for_register_fences: _,
unit_index,
mop_in_unit_state: _,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output,
caused_cancel,
} = self;
#[hdl(sim)]
Self {
mop: zeroed(mop),
is_register_fence: false,
done_waiting_for_register_fences: false,
unit_index: zeroed(unit_index),
mop_in_unit_state: SimOnlyValue::default(),
is_speculative: false,
all_prior_mops_are_finished: false,
all_prior_mops_finished_and_or_caused_cancel: false,
output: #[hdl(sim)]
output.HdlNone(),
caused_cancel: #[hdl(sim)]
@ -674,23 +682,34 @@ impl<C: PhantomConstCpuConfig> SimValueDefault for RobEntryDebugState<C> {
#[derive(Debug)]
struct RobEntry<C: PhantomConstCpuConfig> {
/// Block this and all later µOps until all prior µOps are finished and didn't cause a cancel.
/// Not to be confused with memory fences.
is_register_fence: bool,
/// See [`Self::is_register_fence`]
done_waiting_for_register_fences: bool,
mop: SimValue<MOpInstance<RenamedMOp<C>>>,
unit_index: usize,
mop_in_unit_state: MOpInUnitState,
is_speculative: bool,
all_prior_mops_are_finished: bool,
all_prior_mops_finished_and_or_caused_cancel: bool,
output: Option<SimValue<NextPcPredictorOp<C>>>,
caused_cancel: Option<SimValue<UnitCausedCancel<C>>>,
}
impl<C: PhantomConstCpuConfig> RobEntry<C> {
fn new(mop: SimValue<MOpInstance<RenamedMOp<C>>>, unit_index: usize) -> Self {
fn new(
mop: SimValue<MOpInstance<RenamedMOp<C>>>,
unit_index: usize,
is_register_fence: bool,
) -> Self {
Self {
mop,
is_register_fence,
done_waiting_for_register_fences: false,
unit_index,
mop_in_unit_state: MOpInUnitState::NotYetEnqueued,
is_speculative: true,
all_prior_mops_are_finished: false,
all_prior_mops_finished_and_or_caused_cancel: false,
output: None,
caused_cancel: None,
}
@ -710,22 +729,26 @@ impl<C: PhantomConstCpuConfig> RobEntry<C> {
#[hdl]
fn debug_state(&self, config: C) -> SimValue<RobEntryDebugState<C>> {
let Self {
is_register_fence,
done_waiting_for_register_fences,
mop,
unit_index,
mop_in_unit_state,
is_speculative,
all_prior_mops_are_finished,
all_prior_mops_finished_and_or_caused_cancel,
output,
caused_cancel,
} = self;
let ret_ty = RobEntryDebugState[config];
#[hdl(sim)]
RobEntryDebugState::<C> {
is_register_fence,
done_waiting_for_register_fences,
mop,
unit_index: unit_index.into_sim_value_with_type(ret_ty.unit_index),
mop_in_unit_state: SimOnlyValue::new(*mop_in_unit_state),
is_speculative,
all_prior_mops_are_finished,
all_prior_mops_finished_and_or_caused_cancel,
output: output.into_sim_value_with_type(ret_ty.output),
caused_cancel: caused_cancel.into_sim_value_with_type(ret_ty.caused_cancel),
}
@ -1054,7 +1077,7 @@ impl<C: PhantomConstCpuConfig> ReorderBuffer<C> {
}
fn all_mops_are_finished_and_or_caused_cancel(&self) -> bool {
self.renamed().next().is_none_or(|entry| {
entry.all_prior_mops_are_finished
entry.all_prior_mops_finished_and_or_caused_cancel
&& entry.mop_in_unit_state.is_finished_and_or_caused_cancel()
})
}
@ -1067,16 +1090,11 @@ const SimOnlyString: SimOnlyString = SimOnlyString::TYPE;
#[hdl(get(|c| c.rob_size.get().next_power_of_two()))]
type PerInsnTimelineLen<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(no_static)]
struct RenameDelayedForL2Store<C: PhantomConstGet<CpuConfig>> {
chosen_dest: PRegNum<C>,
l2_store_id: MOpId,
}
#[hdl(no_static)]
struct RenameDelayedEntry<C: PhantomConstGet<CpuConfig>> {
is_register_fence: Bool,
mop: MOpInstance<MOp>,
delayed_for_l2_store: HdlOption<RenameDelayedForL2Store<C>>,
chosen_dest: HdlOption<PRegNum<C>>,
}
#[hdl(no_static)]
@ -1145,11 +1163,13 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
let mut retval =
SimValue::from_array_elements(retval_ty, (0..len).map(|_| empty_string.clone()));
for RobEntry {
is_register_fence,
done_waiting_for_register_fences,
mop,
unit_index: _,
mop_in_unit_state,
is_speculative,
all_prior_mops_are_finished,
all_prior_mops_finished_and_or_caused_cancel,
output,
caused_cancel,
} in self.rob.renamed()
@ -1157,11 +1177,17 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
let masked_id = mop.id.as_int() as usize & mask;
**retval[masked_id] = fmt::from_fn(|f| {
f.write_str(mop_in_unit_state.debug_str())?;
if *is_register_fence {
f.write_str("(rf)")?;
}
if !*done_waiting_for_register_fences {
f.write_str("(wfrf)")?;
}
if *is_speculative {
f.write_str("(s)")?;
}
if *all_prior_mops_are_finished {
f.write_str("(apf)")?;
if *all_prior_mops_finished_and_or_caused_cancel {
f.write_str("(apfc)")?;
}
if output.is_some() {
f.write_str("(output)")?;
@ -1341,9 +1367,6 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
unrenamed: &SimValue<MOpInstance<MOp>>,
renamed: RobEntry<C>,
) -> &SimValue<MOpId> {
if let Some(unit_out_reg_index) = renamed.unit_out_reg_index() {
self.l1_reg_file[renamed.unit_index][unit_out_reg_index] = None;
}
self.rob.renamed_push_back_with_new_id(unrenamed, renamed)
}
fn update_rename_table(
@ -1362,8 +1385,9 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
) -> Result<(), SimValue<RenameDelayedEntry<C>>> {
#[hdl(sim)]
let RenameDelayedEntry::<_> {
is_register_fence,
mop: insn,
delayed_for_l2_store,
chosen_dest,
} = entry;
println!("try_rename: insn: {insn:?}");
if self.rob.unrenamed_len() >= self.config.get().rob_size.get() {
@ -1371,8 +1395,9 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
return Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence,
mop: insn,
delayed_for_l2_store,
chosen_dest,
},
);
}
@ -1381,14 +1406,16 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
return Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence,
mop: insn,
delayed_for_l2_store,
chosen_dest,
},
);
}
let unit_kind = UnitMOp::kind_sim(insn.mop.inner());
#[hdl(sim)]
if let MOp::TransformedMove(move_reg_mop) = insn.mop.inner() {
assert!(!*is_register_fence);
let mut src_regs = [MOpRegNum::const_zero_sim()];
MOpTrait::for_each_src_reg_sim_ref(move_reg_mop, &mut |src_reg, index| {
src_regs[index] = src_reg.clone();
@ -1433,32 +1460,12 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
}
}
let chosen_unit = #[hdl(sim)]
if let HdlSome(l2_store) = &delayed_for_l2_store {
let RobEntry {
mop: _,
unit_index: _,
mop_in_unit_state: MOpInUnitState::FinishedAndOrCausedCancel,
is_speculative: _,
all_prior_mops_are_finished: true,
output: _,
caused_cancel: None,
} = self.rob.renamed_by_id(&l2_store.l2_store_id)
else {
println!("try_rename: delaying for l2 store that isn't yet finished");
return Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
mop: insn,
delayed_for_l2_store,
},
);
};
let unit_index =
UnitNum::index_sim(&l2_store.chosen_dest.unit_num).expect("known to be some");
if let HdlSome(chosen_dest) = &chosen_dest {
let unit_index = UnitNum::index_sim(&chosen_dest.unit_num).expect("known to be some");
assert_eq!(self.config.get().units[unit_index].kind, unit_kind);
ChosenUnit {
unit_index,
out_reg_num: Some(UnitOutRegNum::value_sim(&l2_store.chosen_dest.unit_out_reg)),
out_reg_num: Some(UnitOutRegNum::value_sim(&chosen_dest.unit_out_reg)),
space_available: self.space_available_for_unit(unit_index),
}
} else {
@ -1495,8 +1502,9 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
return Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence,
mop: insn,
delayed_for_l2_store,
chosen_dest,
},
);
}
@ -1547,8 +1555,9 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
return Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence,
mop: insn,
delayed_for_l2_store,
chosen_dest,
},
);
}
@ -1580,28 +1589,25 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
.into_trace_as_string(),
},
self.l2_reg_file_unit_index,
*is_register_fence,
),
);
return Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence: true,
mop: insn,
delayed_for_l2_store: #[hdl(sim)]
(delayed_for_l2_store.ty()).HdlSome(
#[hdl(sim)]
RenameDelayedForL2Store::<_> {
chosen_dest: reg_to_free,
l2_store_id,
},
),
chosen_dest: #[hdl(sim)]
(chosen_dest.ty()).HdlSome(reg_to_free),
},
);
}
return Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence,
mop: insn,
delayed_for_l2_store,
chosen_dest,
},
);
};
@ -1683,26 +1689,27 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
.into_trace_as_string(),
},
self.l2_reg_file_unit_index,
*is_register_fence,
),
);
println!("try_rename: read l2 reg");
Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence: false,
mop: insn,
delayed_for_l2_store,
chosen_dest,
},
)
} else if self.rob.all_mops_are_finished_and_or_caused_cancel() {
todo!("dropping the L2 register file's outputs still doesn't fix deadlocks");
// TODO: add proper WaW and WaR tracking for renamed registers
println!("try_rename: dropping all l2 reg file outputs");
self.update_rename_table(&insn, RenameTableUpdate::DropAllL2RegFileOutputs);
Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence: true,
mop: insn,
delayed_for_l2_store,
chosen_dest,
},
)
} else {
@ -1712,8 +1719,9 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence,
mop: insn,
delayed_for_l2_store,
chosen_dest,
},
)
};
@ -1755,6 +1763,7 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
mop,
},
unit_index,
*is_register_fence,
),
);
self.rob.finished_unrenamed_push_back(&insn);
@ -1796,13 +1805,25 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
}
let zero_reg = PRegNum[self.config].const_zero().into_sim_value();
let zero_value = zeroed(TraceAsString[PRegValue]);
for rob in self.rob.renamed() {
if rob.unit_index == unit_index
&& let Some(_) = rob.mop_in_unit_state.with_inputs_ready()
for RobEntry {
is_register_fence: _,
done_waiting_for_register_fences,
mop,
unit_index: rob_unit_index,
mop_in_unit_state,
is_speculative: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output: _,
caused_cancel: _,
} in self.rob.renamed()
{
if *done_waiting_for_register_fences
&& *rob_unit_index == unit_index
&& let Some(_) = mop_in_unit_state.with_inputs_ready()
{
let mut src_values: [_; COMMON_MOP_SRC_LEN] =
std::array::from_fn(|_| Some(zero_value.clone()));
MOpTrait::for_each_src_reg_sim_ref(rob.mop.mop.inner(), &mut |src_reg, index| {
MOpTrait::for_each_src_reg_sim_ref(mop.mop.inner(), &mut |src_reg, index| {
#[hdl(sim)]
let PRegNum::<_> {
unit_num,
@ -1822,7 +1843,7 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
ret_ty.HdlSome(
#[hdl(sim)]
UnitInputsReady::<_> {
mop: &rob.mop,
mop,
src_values,
config: self.config,
},
@ -1876,11 +1897,13 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
let rob = self.rob.renamed_by_id_mut(&id);
let out_reg_index = rob.unit_out_reg_index();
let RobEntry {
is_register_fence: _,
done_waiting_for_register_fences: _,
mop: _,
unit_index,
mop_in_unit_state,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output,
caused_cancel,
} = rob;
@ -1909,11 +1932,13 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
} = finish_cause_cancel;
assert!(!self.is_canceling());
let RobEntry {
is_register_fence: _,
done_waiting_for_register_fences: _,
mop: _,
unit_index: _,
mop_in_unit_state,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output,
caused_cancel,
} = self.rob.renamed_by_id_mut(&id);
@ -1963,10 +1988,10 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
self.rename_delayed.push_back(
#[hdl(sim)]
RenameDelayedEntry::<_> {
is_register_fence: false,
mop: insn,
delayed_for_l2_store: #[hdl(sim)]
(HdlOption[RenameDelayedForL2Store[self.config]])
.HdlNone(),
chosen_dest: #[hdl(sim)]
(HdlOption[PRegNum[self.config]]).HdlNone(),
},
);
}
@ -2020,11 +2045,13 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
for retire_group in self.rob.retire_groups() {
for renamed_entry in retire_group.clone().flat_map(|v| &v.renamed_entries) {
if let RobEntry {
is_register_fence: _,
done_waiting_for_register_fences: _,
mop: _,
unit_index: _,
mop_in_unit_state: MOpInUnitState::FinishedAndOrCausedCancel,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output,
caused_cancel,
} = renamed_entry
@ -2153,11 +2180,13 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
.iter()
.for_each(|v| self.retire_rename_table.update(v, "retire_rename_table"));
for RobEntry {
is_register_fence: _,
done_waiting_for_register_fences: _,
mop: _,
unit_index: _,
mop_in_unit_state,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output: _,
caused_cancel,
} in renamed_entries
@ -2188,7 +2217,7 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
return;
}
for renamed in self.rob.renamed_mut() {
renamed.all_prior_mops_are_finished = true;
renamed.all_prior_mops_finished_and_or_caused_cancel = true;
let MOpInUnitState::FinishedAndOrCausedCancel = renamed.mop_in_unit_state else {
break;
};
@ -2212,13 +2241,39 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
break;
}
}
for renamed in self.rob.renamed_mut() {
let RobEntry {
is_register_fence,
done_waiting_for_register_fences,
mop: _,
unit_index,
mop_in_unit_state: _,
is_speculative,
all_prior_mops_finished_and_or_caused_cancel,
output: _,
caused_cancel: _,
} = &renamed;
if *is_register_fence {
if !*all_prior_mops_finished_and_or_caused_cancel || *is_speculative {
break;
}
}
if !*done_waiting_for_register_fences {
if let Some(unit_out_reg_index) = renamed.unit_out_reg_index() {
self.l1_reg_file[*unit_index][unit_out_reg_index] = None;
}
}
renamed.done_waiting_for_register_fences = true;
}
let first_renamed = self.rob.renamed().next();
if let Some(RobEntry {
is_register_fence: _,
done_waiting_for_register_fences: _,
mop: _,
unit_index: _,
mop_in_unit_state: MOpInUnitState::FinishedAndOrCausedCancel,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output: _,
caused_cancel: Some(caused_cancel),
}) = first_renamed
@ -2326,11 +2381,13 @@ async fn rename_execute_retire_run(
if let HdlSome(enqueue) = sim.read_past(enqueue.data, cd.clk).await {
assert!(!state.is_canceling());
let RobEntry {
is_register_fence: _,
done_waiting_for_register_fences: _,
mop: _,
unit_index: _,
mop_in_unit_state,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output,
caused_cancel,
} = state.rob.renamed_by_id_mut(&enqueue.mop.id);
@ -2345,14 +2402,17 @@ async fn rename_execute_retire_run(
if let HdlSome(inputs_ready) = sim.read_past(inputs_ready, cd.clk).await {
assert!(!state.is_canceling());
let RobEntry {
is_register_fence: _,
done_waiting_for_register_fences,
mop: _,
unit_index: _,
mop_in_unit_state,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output,
caused_cancel,
} = state.rob.renamed_by_id_mut(&inputs_ready.mop.id);
assert!(*done_waiting_for_register_fences);
assert!(output.is_none());
assert!(caused_cancel.is_none());
*mop_in_unit_state = mop_in_unit_state
@ -2365,11 +2425,13 @@ async fn rename_execute_retire_run(
{
assert!(!state.is_canceling());
let RobEntry {
is_register_fence: _,
done_waiting_for_register_fences: _,
mop: _,
unit_index: _,
mop_in_unit_state,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output: _,
caused_cancel: _,
} = state.rob.renamed_by_id_mut(&is_no_longer_speculative.id);
@ -2384,11 +2446,13 @@ async fn rename_execute_retire_run(
let UnitMOpCantCauseCancel::<_> { id, config: _ } = cant_cause_cancel;
assert!(!state.is_canceling());
let RobEntry {
is_register_fence: _,
done_waiting_for_register_fences: _,
mop: _,
unit_index: _,
mop_in_unit_state,
is_speculative: _,
all_prior_mops_are_finished: _,
all_prior_mops_finished_and_or_caused_cancel: _,
output: _,
caused_cancel,
} = state.rob.renamed_by_id_mut(&id);

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff