group micro ops by the instruction they come from when retiring

This commit is contained in:
Jacob Lifshay 2026-05-05 19:33:25 -07:00
parent 83b3f7bac9
commit 09c8c194e0
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
3 changed files with 286045 additions and 274549 deletions

View file

@ -47,6 +47,11 @@ pub struct MOpInstance<MOp> {
/// it needs to be canceled along with all other &micro;Ops that /// it needs to be canceled along with all other &micro;Ops that
/// come from the same ISA-level instruction. /// come from the same ISA-level instruction.
pub is_first_mop_in_insn: Bool, pub is_first_mop_in_insn: Bool,
/// `true` if this &micro;Op is the last &micro;Op in the ISA-level instruction.
/// In general, a single &micro;Op can't be canceled by itself,
/// it needs to be canceled along with all other &micro;Ops that
/// come from the same ISA-level instruction.
pub is_last_mop_in_insn: Bool,
pub mop: MOp, pub mop: MOp,
} }
@ -64,11 +69,12 @@ impl<MOp: Type> SimValueDebug for MOpInstance<MOp> {
predicted_next_pc, predicted_next_pc,
size_in_bytes, size_in_bytes,
is_first_mop_in_insn, is_first_mop_in_insn,
is_last_mop_in_insn,
mop, mop,
} = value; } = value;
write!( write!(
f, f,
"fid={fetch_block_id:?} id={id:?} pc={pc:?} pn_pc={predicted_next_pc:?} sz={size_in_bytes:?} first={is_first_mop_in_insn}: {mop:?}" "fid={fetch_block_id:?} id={id:?} pc={pc:?} pn_pc={predicted_next_pc:?} sz={size_in_bytes:?} first={is_first_mop_in_insn} last={is_last_mop_in_insn}: {mop:?}"
) )
} }
} }
@ -832,6 +838,29 @@ impl<C: PhantomConstCpuConfig> ReorderBuffer<C> {
) -> impl DoubleEndedIterator<Item = &mut SimValue<MOpInstance<MOp>>> { ) -> impl DoubleEndedIterator<Item = &mut SimValue<MOpInstance<MOp>>> {
self.entries.iter_mut().map(|v| &mut v.unrenamed) self.entries.iter_mut().map(|v| &mut v.unrenamed)
} }
fn retire_groups_unrenamed_ranges(
&self,
) -> impl Clone + Iterator<Item = std::ops::Range<usize>> {
let mut next_group_start = 0;
self.entries
.iter()
.enumerate()
.filter_map(move |(index, entry)| {
if *entry.unrenamed.is_last_mop_in_insn {
let group_start = next_group_start;
next_group_start = index + 1;
Some(group_start..next_group_start)
} else {
None
}
})
}
fn retire_groups(
&self,
) -> impl Clone + Iterator<Item: DoubleEndedIterator<Item = &RobEntries<C>> + Clone> {
self.retire_groups_unrenamed_ranges()
.map(|range| self.entries.range(range))
}
fn renamed_len(&self) -> usize { fn renamed_len(&self) -> usize {
let Self { let Self {
next_renamed_mop_id: _, next_renamed_mop_id: _,
@ -1026,13 +1055,18 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
} }
write!( write!(
f, f,
": {:#x}{}: {:?}", ": {}{:#x}{}: {:?}",
rob.mop.pc.as_int(),
if *rob.mop.is_first_mop_in_insn { if *rob.mop.is_first_mop_in_insn {
"" ""
} else { } else {
".." ".."
}, },
rob.mop.pc.as_int(),
if *rob.mop.is_last_mop_in_insn {
""
} else {
".."
},
rob.mop.mop, rob.mop.mop,
) )
}) })
@ -1311,6 +1345,7 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
predicted_next_pc, predicted_next_pc,
size_in_bytes, size_in_bytes,
is_first_mop_in_insn, is_first_mop_in_insn,
is_last_mop_in_insn,
mop, mop,
} = &insn; } = &insn;
let mut needed_load = None; let mut needed_load = None;
@ -1367,6 +1402,7 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
predicted_next_pc, predicted_next_pc,
size_in_bytes, size_in_bytes,
is_first_mop_in_insn, is_first_mop_in_insn,
is_last_mop_in_insn,
mop: ReadL2RegMOp::read_l2_reg::<RenamedMOp<C>>( mop: ReadL2RegMOp::read_l2_reg::<RenamedMOp<C>>(
dest, dest,
repeat(RenamedSrcRegUInt[self.config].zero(), ConstUsize), repeat(RenamedSrcRegUInt[self.config].zero(), ConstUsize),
@ -1407,6 +1443,7 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
predicted_next_pc, predicted_next_pc,
size_in_bytes, size_in_bytes,
is_first_mop_in_insn, is_first_mop_in_insn,
is_last_mop_in_insn,
mop, mop,
}, },
), ),
@ -1658,59 +1695,84 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
#[hdl] #[hdl]
fn peek_retiring_insns(&self) -> Vec<SimValue<NextPcPredictorOp<C>>> { fn peek_retiring_insns(&self) -> Vec<SimValue<NextPcPredictorOp<C>>> {
let mut retval = Vec::new(); let mut retval = Vec::new();
let mut prev_caused_cancel = false; for retire_group in self.rob.retire_groups() {
for RobEntries { for renamed_entry in retire_group.clone().flat_map(|v| &v.renamed_entries) {
unrenamed: _,
rename_table_updates: _,
renamed_entries,
} in &self.rob.entries
{
if retval.len() >= self.config.get().fetch_width.get() || prev_caused_cancel {
return retval;
}
let mut unrenamed_op = #[hdl(sim)]
NextPcPredictorOp::<_> {
call_stack_op: #[hdl(sim)]
CallStackOp.None(),
cond_br_taken: #[hdl(sim)]
HdlNone(),
config: self.config,
};
for renamed in renamed_entries {
if prev_caused_cancel {
return retval;
}
if let RobEntry { if let RobEntry {
mop: _, mop: _,
mop_in_unit_state: MOpInUnitState::FinishedAndOrCausedCancel, mop_in_unit_state: MOpInUnitState::FinishedAndOrCausedCancel,
is_speculative: _, is_speculative: _,
finished: Some(renamed_op), finished,
caused_cancel, caused_cancel,
} = renamed } = renamed_entry
&& caused_cancel
.as_ref()
.is_none_or(|caused_cancel| *caused_cancel.cancel_after_retire)
{ {
prev_caused_cancel = caused_cancel.is_some(); if caused_cancel.is_some() {
#[hdl(sim)] // only the part before the cancel needs to be ready
let NextPcPredictorOp::<_> { break;
call_stack_op,
cond_br_taken,
config: _,
} = renamed_op;
#[hdl(sim)]
if let CallStackOp::None = &unrenamed_op.call_stack_op {
unrenamed_op.call_stack_op = call_stack_op.clone();
}
#[hdl(sim)]
if let HdlNone = &unrenamed_op.cond_br_taken {
unrenamed_op.cond_br_taken = cond_br_taken.clone();
} }
assert!(finished.is_some());
} else { } else {
// group isn't ready
return retval;
}
}
for RobEntries {
unrenamed: _,
rename_table_updates: _,
renamed_entries,
} in retire_group
{
let caused_cancel = renamed_entries.iter().any(|v| v.caused_cancel.is_some());
let caused_cancel_after_retire =
renamed_entries
.iter()
.rev()
.enumerate()
.all(|(rev_index, v)| {
if rev_index == 0 {
v.caused_cancel
.as_ref()
.is_some_and(|v| *v.cancel_after_retire)
} else {
v.caused_cancel.is_none()
}
});
if !caused_cancel || caused_cancel_after_retire {
let mut unrenamed_op = #[hdl(sim)]
NextPcPredictorOp::<_> {
call_stack_op: #[hdl(sim)]
CallStackOp.None(),
cond_br_taken: #[hdl(sim)]
HdlNone(),
config: self.config,
};
for renamed in renamed_entries {
let Some(finished) = &renamed.finished else {
unreachable!();
};
#[hdl(sim)]
let NextPcPredictorOp::<_> {
call_stack_op,
cond_br_taken,
config: _,
} = finished;
#[hdl(sim)]
if let CallStackOp::None = &unrenamed_op.call_stack_op {
unrenamed_op.call_stack_op = call_stack_op.clone();
}
#[hdl(sim)]
if let HdlNone = &unrenamed_op.cond_br_taken {
unrenamed_op.cond_br_taken = cond_br_taken.clone();
}
}
retval.push(unrenamed_op);
if retval.len() >= self.config.get().fetch_width.get() {
return retval;
}
}
if caused_cancel {
return retval; return retval;
} }
} }
retval.push(unrenamed_op);
} }
retval retval
} }

File diff suppressed because it is too large Load diff

View file

@ -736,8 +736,8 @@ impl<'a, C: PhantomConstCpuConfig> MockNextPcState<'a, C> {
} }
let insn = self.insns.insns.get(&self.next_pc).ok_or(())?; let insn = self.insns.insns.get(&self.next_pc).ok_or(())?;
let fallthrough_pc = self.next_pc.wrapping_add(insn.size_in_bytes.into()); let fallthrough_pc = self.next_pc.wrapping_add(insn.size_in_bytes.into());
let is_last_mop_index = self.next_mop_index + 1 >= insn.mops.len(); let is_last_mop_in_insn = self.next_mop_index + 1 >= insn.mops.len();
let mut predicted_next_pc = if is_last_mop_index { let mut predicted_next_pc = if is_last_mop_in_insn {
fallthrough_pc fallthrough_pc
} else { } else {
self.next_pc self.next_pc
@ -768,11 +768,12 @@ impl<'a, C: PhantomConstCpuConfig> MockNextPcState<'a, C> {
predicted_next_pc, predicted_next_pc,
size_in_bytes: insn.size_in_bytes.cast_to_static::<UInt<_>>(), size_in_bytes: insn.size_in_bytes.cast_to_static::<UInt<_>>(),
is_first_mop_in_insn: self.next_mop_index == 0, is_first_mop_in_insn: self.next_mop_index == 0,
is_last_mop_in_insn,
mop, mop,
}; };
println!("pushed to fetch queue: {mop:?}"); println!("pushed to fetch queue: {mop:?}");
self.fetch_queue.push_back(mop); self.fetch_queue.push_back(mop);
if is_last_mop_index { if is_last_mop_in_insn {
self.next_mop_index = 0; self.next_mop_index = 0;
self.next_pc = predicted_next_pc; self.next_pc = predicted_next_pc;
} else { } else {
@ -1637,6 +1638,7 @@ trait MockExecutionStateTrait: Default {
predicted_next_pc, predicted_next_pc,
size_in_bytes, size_in_bytes,
is_first_mop_in_insn: _, is_first_mop_in_insn: _,
is_last_mop_in_insn: _,
mop, mop,
} = mop; } = mop;
let fallthrough_pc = pc let fallthrough_pc = pc
@ -2453,6 +2455,7 @@ impl<C: PhantomConstCpuConfig> MockLoadStoreUnitState<C> {
predicted_next_pc, predicted_next_pc,
size_in_bytes, size_in_bytes,
is_first_mop_in_insn, is_first_mop_in_insn,
is_last_mop_in_insn,
mop, mop,
} = mop; } = mop;
let mop = #[hdl(sim)] let mop = #[hdl(sim)]
@ -2468,6 +2471,7 @@ impl<C: PhantomConstCpuConfig> MockLoadStoreUnitState<C> {
predicted_next_pc, predicted_next_pc,
size_in_bytes, size_in_bytes,
is_first_mop_in_insn, is_first_mop_in_insn,
is_last_mop_in_insn,
mop, mop,
}; };
self.ops.push_back(MockLoadStoreOp { self.ops.push_back(MockLoadStoreOp {