Compare commits

..

2 commits

Author SHA1 Message Date
ed331a3a96
WIP fixing deadlock when running rename_execute_retire_save_restore_gprs 2026-05-20 23:37:23 -07:00
3e08a282ec
add test_rename_execute_retire_save_restore_gprs
currently it fails due to the L2 reg file running out of output registers
2026-05-20 19:44:20 -07:00
3 changed files with 94519 additions and 56 deletions

View file

@ -256,16 +256,21 @@ impl<C: PhantomConstCpuConfig> SimValueDefault for RenameExecuteRetireDebugState
}
#[hdl(no_static)]
enum RenameTableEntry<C: PhantomConstGet<CpuConfig>> {
L1(PRegNum<C>),
L2(L2RegNum),
struct RenameTableEntry<C: PhantomConstGet<CpuConfig>> {
l1: HdlOption<PRegNum<C>>,
l2: HdlOption<L2RegNum>,
}
impl<C: PhantomConstCpuConfig> RenameTableEntry<C> {
#[hdl]
fn const_zero(self) -> SimValue<Self> {
#[hdl(sim)]
self.L1(self.L1.const_zero())
Self {
l1: #[hdl(sim)]
(self.l1).HdlSome(self.l1.HdlSome.const_zero()),
l2: #[hdl(sim)]
HdlNone(),
}
}
}
@ -313,6 +318,7 @@ enum RenameTableUpdate<C: PhantomConstCpuConfig> {
dest: SimValue<L2RegNum>,
src: SimValue<PRegNum<C>>,
},
DropAllL2RegFileOutputs,
}
impl<C: PhantomConstCpuConfig> RenameTable<C> {
@ -352,39 +358,69 @@ impl<C: PhantomConstCpuConfig> RenameTable<C> {
}
RenameTableUpdate::UpdateForReadL2Reg { dest, src } => {
let new = #[hdl(sim)]
(RenameTableEntry[self.config]).L1(dest);
RenameTableEntry::<_> {
l1: #[hdl(sim)]
(HdlOption[dest.ty()]).HdlSome(dest),
l2: #[hdl(sim)]
HdlSome(src),
};
for (unrenamed_reg_num, entry) in self.entries.iter_mut().enumerate() {
#[hdl(sim)]
match entry.inner() {
RenameTableEntry::<_>::L1(_) => {}
RenameTableEntry::<_>::L2(l2) => {
if L2RegNum::value_sim(l2) == L2RegNum::value_sim(src) {
println!(
"{rename_table_name}: UpdateForReadL2Reg: {unrenamed_reg_num:#x} \
updating from {entry:?} to {new:?}",
);
*entry = new.to_trace_as_string();
if let HdlSome(l2) = &entry.inner().l2 {
if L2RegNum::value_sim(l2) == L2RegNum::value_sim(src) {
println!(
"{rename_table_name}: UpdateForReadL2Reg: {unrenamed_reg_num:#x} \
updating from {entry:?} to {new:?}",
);
#[hdl(sim)]
if let HdlSome(_) = &entry.inner().l1 {
unreachable!("l1 should be HdlNone: {entry:?}");
}
*entry = new.to_trace_as_string();
}
}
}
}
RenameTableUpdate::UpdateForWriteL2Reg { dest, src } => {
let new = #[hdl(sim)]
(RenameTableEntry[self.config]).L2(dest);
RenameTableEntry::<_> {
l1: #[hdl(sim)]
(HdlOption[src.ty()]).HdlNone(),
l2: #[hdl(sim)]
HdlSome(dest),
};
for (unrenamed_reg_num, entry) in self.entries.iter_mut().enumerate() {
#[hdl(sim)]
match entry.inner() {
RenameTableEntry::<_>::L1(l1) => {
if l1 == src {
println!(
"{rename_table_name}: UpdateForWriteL2Reg: {unrenamed_reg_num:#x} \
updating from {entry:?} to {new:?}",
);
*entry = new.to_trace_as_string();
if let HdlSome(l1) = &entry.inner().l1 {
if l1 == src {
println!(
"{rename_table_name}: UpdateForWriteL2Reg: {unrenamed_reg_num:#x} \
updating from {entry:?} to {new:?}",
);
#[hdl(sim)]
if let HdlSome(_) = &entry.inner().l2 {
unreachable!("l2 should be HdlNone: {entry:?}");
}
*entry = new.to_trace_as_string();
}
}
}
}
RenameTableUpdate::DropAllL2RegFileOutputs => {
for (unrenamed_reg_num, entry) in self.entries.iter_mut().enumerate() {
#[hdl(sim)]
if let HdlSome(_) = &entry.inner().l1 {
#[hdl(sim)]
if let HdlSome(_) = &entry.inner().l2 {
let mut new = entry.inner().clone();
new.l1 = #[hdl(sim)]
(new.l1.ty()).HdlNone();
println!(
"{rename_table_name}: DropAllL2RegFileOutputs: {unrenamed_reg_num:#x} \
updating from {entry:?} to {new:?}",
);
*entry = new.to_trace_as_string();
}
RenameTableEntry::<_>::L2(_) => {}
}
}
}
@ -395,13 +431,10 @@ impl<C: PhantomConstCpuConfig> RenameTable<C> {
let mut seen = BTreeSet::new();
for entry in self.entries.iter() {
#[hdl(sim)]
match entry.inner() {
RenameTableEntry::<_>::L1(v) => {
if UnitNum::index_sim(&v.unit_num) == Some(unit_index) {
seen.insert(UnitOutRegNum::value_sim(&v.unit_out_reg));
}
if let HdlSome(v) = &entry.inner().l1 {
if UnitNum::index_sim(&v.unit_num) == Some(unit_index) {
seen.insert(UnitOutRegNum::value_sim(&v.unit_out_reg));
}
RenameTableEntry::<_>::L2(_) => {}
}
}
seen.len()
@ -581,7 +614,7 @@ impl MOpInUnitState {
Self::FinishedAndOrCausedCancel => None,
}
}
fn is_completed(self) -> bool {
fn is_finished_and_or_caused_cancel(self) -> bool {
match self {
Self::NotYetEnqueued => false,
Self::InputsNotReadySpeculative { .. } => false,
@ -1019,6 +1052,12 @@ impl<C: PhantomConstCpuConfig> ReorderBuffer<C> {
.rename_table_updates
.push(update);
}
fn all_mops_are_finished_and_or_caused_cancel(&self) -> bool {
self.renamed().next().is_none_or(|entry| {
entry.all_prior_mops_are_finished
&& entry.mop_in_unit_state.is_finished_and_or_caused_cancel()
})
}
}
type SimOnlyString = SimOnly<String>;
@ -1222,7 +1261,9 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
fn space_available_for_unit(&self, unit_index: usize) -> usize {
let mut retval = self.config.get().unit_max_in_flight(unit_index);
for renamed in self.rob.renamed() {
if renamed.unit_index == unit_index && !renamed.mop_in_unit_state.is_completed() {
if renamed.unit_index == unit_index
&& !renamed.mop_in_unit_state.is_finished_and_or_caused_cancel()
{
let Some(v) = NonZero::new(retval.get() - 1) else {
return 0;
};
@ -1259,13 +1300,10 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
.chain(self.retire_rename_table.entries.iter())
{
#[hdl(sim)]
match entry.inner() {
RenameTableEntry::<_>::L1(entry) => {
if Some(unit_index) == UnitNum::index_sim(&entry.unit_num) {
allocated_regs[UnitOutRegNum::value_sim(&entry.unit_out_reg)] = true;
}
if let HdlSome(l1) = &entry.inner().l1 {
if Some(unit_index) == UnitNum::index_sim(&l1.unit_num) {
allocated_regs[UnitOutRegNum::value_sim(&l1.unit_out_reg)] = true;
}
RenameTableEntry::<_>::L2(_) => {}
}
}
allocated_regs.iter().position(|v| !v)
@ -1292,11 +1330,8 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
.chain(self.retire_rename_table.entries.iter())
{
#[hdl(sim)]
match entry.inner() {
RenameTableEntry::<_>::L1(_) => {}
RenameTableEntry::<_>::L2(entry) => {
allocated_regs[L2RegNum::value_sim(entry)] = true;
}
if let HdlSome(l2) = &entry.inner().l2 {
allocated_regs[L2RegNum::value_sim(l2)] = true;
}
}
allocated_regs.iter().position(|v| !v)
@ -1499,13 +1534,10 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
"try_rename: checking that mop src reg ({renamed:?}) doesn't conflict with picked reg"
);
#[hdl(sim)]
match renamed.inner() {
RenameTableEntry::<_>::L1(v) => {
if reg_to_free == *v {
any_collisions = true;
}
if let HdlSome(l1) = &renamed.inner().l1 {
if reg_to_free == *l1 {
any_collisions = true;
}
RenameTableEntry::<_>::L2(_) => {}
}
});
if any_collisions {
@ -1600,10 +1632,16 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
let renamed = &self.rename_table.entries[MOpRegNum::reg_num_sim(&src_reg) as usize];
println!("renaming src[{index}] from {src_reg:?} to {renamed:?}");
#[hdl(sim)]
match renamed.inner() {
RenameTableEntry::<_>::L1(v) => v.clone(),
RenameTableEntry::<_>::L2(v) => {
needed_load.get_or_insert_with(|| v.clone());
match &renamed.inner().l1 {
HdlSome(l1) => l1.clone(),
HdlNone => {
let l2 = #[hdl(sim)]
if let HdlSome(l2) = &renamed.inner().l2 {
l2
} else {
unreachable!("rename table entry has neither l1 nor l2: {src_reg:?}");
};
needed_load.get_or_insert_with(|| l2.clone());
PRegNum[self.config].const_zero_sim()
}
}
@ -1647,7 +1685,19 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
self.l2_reg_file_unit_index,
),
);
println!("try_rename: l2 reg file has no space and/or has no free output regs");
println!("try_rename: read l2 reg");
Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
mop: insn,
delayed_for_l2_store,
},
)
} else if self.rob.all_mops_are_finished_and_or_caused_cancel() {
todo!("dropping the L2 register file's outputs still doesn't fix deadlocks");
// TODO: add proper WaW and WaR tracking for renamed registers
println!("try_rename: dropping all l2 reg file outputs");
self.update_rename_table(&insn, RenameTableUpdate::DropAllL2RegFileOutputs);
Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
@ -1656,7 +1706,9 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
},
)
} else {
println!("try_rename: l2 reg file has no space and/or has no free output regs");
println!(
"try_rename: waiting for all mops to finish before dropping all l2 reg file outputs"
);
Err(
#[hdl(sim)]
RenameDelayedEntry::<_> {
@ -1673,7 +1725,12 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
)
.into_trace_as_string();
let renamed_dest_reg = #[hdl(sim)]
(RenameTableEntry[self.config]).L1(renamed_dest_reg);
RenameTableEntry::<_> {
l1: #[hdl(sim)]
(HdlOption[renamed_dest_reg.ty()]).HdlSome(renamed_dest_reg),
l2: #[hdl(sim)]
HdlNone(),
};
for unrenamed_reg_num in unrenamed_dest_regs {
self.update_rename_table(
&insn,

File diff suppressed because it is too large Load diff

View file

@ -4486,3 +4486,120 @@ fn test_rename_execute_retire_head_n1() {
panic!();
}
}
struct SaveRestoreGprsInsns;
impl SaveRestoreGprsInsns {
const STATE_OFFSET: usize = 16;
const MAIN_STACK_FRAME_SIZE: usize = 16 + 32 * 8;
const OK: i16 = i16::from_le_bytes(*b"Ok");
}
impl MakeInsns for SaveRestoreGprsInsns {
fn make_insns() -> Insns {
let mut b = InsnsBuilder::new();
let load_all = b.new_label("load_all");
let store_all = b.new_label("store_all");
let main = b.new_label("main");
b.power_isa_addi(1, 0, 0x4000); // setup stack pointer
b.power_isa_bl(main);
b.power_isa_addi(0, 0, Self::OK);
b.power_isa_std(0, 0, MockMemory::IO_ADDR as i16);
let done = b.new_defined_label("done");
b.power_isa_b(done);
b.set_pc(0x1000);
b.define_label(main);
b.power_isa_mflr(0);
b.power_isa_stdu(0, 1, -(Self::MAIN_STACK_FRAME_SIZE as i16));
b.power_isa_addi(0, 0, 2);
b.power_isa_mtctr(0);
let main_loop = b.new_defined_label("main_loop");
b.power_isa_addi(3, 1, Self::STATE_OFFSET as i16);
b.power_isa_bl(store_all);
b.power_isa_addi(3, 1, Self::STATE_OFFSET as i16);
b.power_isa_bl(load_all);
b.power_isa_bdnz(main_loop);
b.power_isa_ld(0, 1, 0);
b.power_isa_addi(1, 1, Self::MAIN_STACK_FRAME_SIZE as i16);
b.power_isa_mtlr(0);
b.power_isa_blr();
b.set_pc(0x1800);
b.define_label(store_all);
for i in 0..32 {
b.power_isa_std(i, 3, i as i16 * 8);
}
b.power_isa_blr();
b.set_pc(0x2000);
b.define_label(load_all);
// load r3 last to avoid overwriting pointer
for i in (0..32).filter(|i| *i != 3).chain([3]) {
b.power_isa_ld(i, 3, i as i16 * 8);
}
b.power_isa_blr();
b.build()
}
fn make_load_store_execution_state() -> MockMemory {
MockMemory::new(vec![], vec![Self::OK as u64], [])
}
}
#[hdl]
#[test]
fn test_rename_execute_retire_save_restore_gprs() {
let _n = SourceLocation::normalize_files_for_tests();
let mut config = CpuConfig::new(
vec![
UnitConfig::new(UnitKind::AluBranch),
UnitConfig::new(UnitKind::AluBranch),
UnitConfig::new(UnitKind::LoadStore),
UnitConfig::new(UnitKind::TransformedMove),
],
NonZeroUsize::new(20).unwrap(),
);
config.fetch_width = NonZeroUsize::new(2).unwrap();
let m = rename_execute_retire_test_harness::<SaveRestoreGprsInsns>(
PhantomConst::new_sized(config),
true,
);
let mut sim = Simulation::new(m);
let writer = RcWriter::default();
sim.add_trace_writer(VcdWriterDecls::new(writer.clone()));
struct DumpVcdOnDrop {
writer: Option<RcWriter>,
}
impl Drop for DumpVcdOnDrop {
fn drop(&mut self) {
if let Some(mut writer) = self.writer.take() {
let vcd = String::from_utf8(writer.take()).unwrap();
println!("####### VCD:\n{vcd}\n#######");
}
}
}
let mut writer = DumpVcdOnDrop {
writer: Some(writer),
};
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, true);
for cycle in 0..700 {
sim.advance_time(SimDuration::from_nanos(500));
println!("clock tick: {cycle}");
sim.write_clock(sim.io().cd.clk, true);
sim.advance_time(SimDuration::from_nanos(500));
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, false);
}
assert!(sim.read_bool(sim.io().all_outputs_written));
// FIXME: vcd is just whatever rename_execute_retire does now, which isn't known to be correct
let vcd = String::from_utf8(writer.writer.take().unwrap().take()).unwrap();
println!("####### VCD:\n{vcd}\n#######");
if vcd != include_str!("expected/rename_execute_retire_save_restore_gprs.vcd") {
panic!();
}
}