group micro ops by the instruction they come from when retiring

This commit is contained in:
Jacob Lifshay 2026-05-05 19:33:25 -07:00
parent 83b3f7bac9
commit 09c8c194e0
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
3 changed files with 286045 additions and 274549 deletions

View file

@ -47,6 +47,11 @@ pub struct MOpInstance<MOp> {
/// it needs to be canceled along with all other &micro;Ops that
/// come from the same ISA-level instruction.
pub is_first_mop_in_insn: Bool,
/// `true` if this &micro;Op is the last &micro;Op in the ISA-level instruction.
/// In general, a single &micro;Op can't be canceled by itself,
/// it needs to be canceled along with all other &micro;Ops that
/// come from the same ISA-level instruction.
pub is_last_mop_in_insn: Bool,
pub mop: MOp,
}
@ -64,11 +69,12 @@ impl<MOp: Type> SimValueDebug for MOpInstance<MOp> {
predicted_next_pc,
size_in_bytes,
is_first_mop_in_insn,
is_last_mop_in_insn,
mop,
} = value;
write!(
f,
"fid={fetch_block_id:?} id={id:?} pc={pc:?} pn_pc={predicted_next_pc:?} sz={size_in_bytes:?} first={is_first_mop_in_insn}: {mop:?}"
"fid={fetch_block_id:?} id={id:?} pc={pc:?} pn_pc={predicted_next_pc:?} sz={size_in_bytes:?} first={is_first_mop_in_insn} last={is_last_mop_in_insn}: {mop:?}"
)
}
}
@ -832,6 +838,29 @@ impl<C: PhantomConstCpuConfig> ReorderBuffer<C> {
) -> impl DoubleEndedIterator<Item = &mut SimValue<MOpInstance<MOp>>> {
self.entries.iter_mut().map(|v| &mut v.unrenamed)
}
fn retire_groups_unrenamed_ranges(
&self,
) -> impl Clone + Iterator<Item = std::ops::Range<usize>> {
let mut next_group_start = 0;
self.entries
.iter()
.enumerate()
.filter_map(move |(index, entry)| {
if *entry.unrenamed.is_last_mop_in_insn {
let group_start = next_group_start;
next_group_start = index + 1;
Some(group_start..next_group_start)
} else {
None
}
})
}
fn retire_groups(
&self,
) -> impl Clone + Iterator<Item: DoubleEndedIterator<Item = &RobEntries<C>> + Clone> {
self.retire_groups_unrenamed_ranges()
.map(|range| self.entries.range(range))
}
fn renamed_len(&self) -> usize {
let Self {
next_renamed_mop_id: _,
@ -1026,13 +1055,18 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
}
write!(
f,
": {:#x}{}: {:?}",
rob.mop.pc.as_int(),
": {}{:#x}{}: {:?}",
if *rob.mop.is_first_mop_in_insn {
""
} else {
".."
},
rob.mop.pc.as_int(),
if *rob.mop.is_last_mop_in_insn {
""
} else {
".."
},
rob.mop.mop,
)
})
@ -1311,6 +1345,7 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
predicted_next_pc,
size_in_bytes,
is_first_mop_in_insn,
is_last_mop_in_insn,
mop,
} = &insn;
let mut needed_load = None;
@ -1367,6 +1402,7 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
predicted_next_pc,
size_in_bytes,
is_first_mop_in_insn,
is_last_mop_in_insn,
mop: ReadL2RegMOp::read_l2_reg::<RenamedMOp<C>>(
dest,
repeat(RenamedSrcRegUInt[self.config].zero(), ConstUsize),
@ -1407,6 +1443,7 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
predicted_next_pc,
size_in_bytes,
is_first_mop_in_insn,
is_last_mop_in_insn,
mop,
},
),
@ -1658,59 +1695,84 @@ impl<C: PhantomConstCpuConfig> RenameExecuteRetireState<C> {
#[hdl]
fn peek_retiring_insns(&self) -> Vec<SimValue<NextPcPredictorOp<C>>> {
let mut retval = Vec::new();
let mut prev_caused_cancel = false;
for RobEntries {
unrenamed: _,
rename_table_updates: _,
renamed_entries,
} in &self.rob.entries
{
if retval.len() >= self.config.get().fetch_width.get() || prev_caused_cancel {
return retval;
}
let mut unrenamed_op = #[hdl(sim)]
NextPcPredictorOp::<_> {
call_stack_op: #[hdl(sim)]
CallStackOp.None(),
cond_br_taken: #[hdl(sim)]
HdlNone(),
config: self.config,
};
for renamed in renamed_entries {
if prev_caused_cancel {
return retval;
}
for retire_group in self.rob.retire_groups() {
for renamed_entry in retire_group.clone().flat_map(|v| &v.renamed_entries) {
if let RobEntry {
mop: _,
mop_in_unit_state: MOpInUnitState::FinishedAndOrCausedCancel,
is_speculative: _,
finished: Some(renamed_op),
finished,
caused_cancel,
} = renamed
&& caused_cancel
.as_ref()
.is_none_or(|caused_cancel| *caused_cancel.cancel_after_retire)
} = renamed_entry
{
prev_caused_cancel = caused_cancel.is_some();
#[hdl(sim)]
let NextPcPredictorOp::<_> {
call_stack_op,
cond_br_taken,
config: _,
} = renamed_op;
#[hdl(sim)]
if let CallStackOp::None = &unrenamed_op.call_stack_op {
unrenamed_op.call_stack_op = call_stack_op.clone();
}
#[hdl(sim)]
if let HdlNone = &unrenamed_op.cond_br_taken {
unrenamed_op.cond_br_taken = cond_br_taken.clone();
if caused_cancel.is_some() {
// only the part before the cancel needs to be ready
break;
}
assert!(finished.is_some());
} else {
// group isn't ready
return retval;
}
}
for RobEntries {
unrenamed: _,
rename_table_updates: _,
renamed_entries,
} in retire_group
{
let caused_cancel = renamed_entries.iter().any(|v| v.caused_cancel.is_some());
let caused_cancel_after_retire =
renamed_entries
.iter()
.rev()
.enumerate()
.all(|(rev_index, v)| {
if rev_index == 0 {
v.caused_cancel
.as_ref()
.is_some_and(|v| *v.cancel_after_retire)
} else {
v.caused_cancel.is_none()
}
});
if !caused_cancel || caused_cancel_after_retire {
let mut unrenamed_op = #[hdl(sim)]
NextPcPredictorOp::<_> {
call_stack_op: #[hdl(sim)]
CallStackOp.None(),
cond_br_taken: #[hdl(sim)]
HdlNone(),
config: self.config,
};
for renamed in renamed_entries {
let Some(finished) = &renamed.finished else {
unreachable!();
};
#[hdl(sim)]
let NextPcPredictorOp::<_> {
call_stack_op,
cond_br_taken,
config: _,
} = finished;
#[hdl(sim)]
if let CallStackOp::None = &unrenamed_op.call_stack_op {
unrenamed_op.call_stack_op = call_stack_op.clone();
}
#[hdl(sim)]
if let HdlNone = &unrenamed_op.cond_br_taken {
unrenamed_op.cond_br_taken = cond_br_taken.clone();
}
}
retval.push(unrenamed_op);
if retval.len() >= self.config.get().fetch_width.get() {
return retval;
}
}
if caused_cancel {
return retval;
}
}
retval.push(unrenamed_op);
}
retval
}

File diff suppressed because it is too large Load diff

View file

@ -736,8 +736,8 @@ impl<'a, C: PhantomConstCpuConfig> MockNextPcState<'a, C> {
}
let insn = self.insns.insns.get(&self.next_pc).ok_or(())?;
let fallthrough_pc = self.next_pc.wrapping_add(insn.size_in_bytes.into());
let is_last_mop_index = self.next_mop_index + 1 >= insn.mops.len();
let mut predicted_next_pc = if is_last_mop_index {
let is_last_mop_in_insn = self.next_mop_index + 1 >= insn.mops.len();
let mut predicted_next_pc = if is_last_mop_in_insn {
fallthrough_pc
} else {
self.next_pc
@ -768,11 +768,12 @@ impl<'a, C: PhantomConstCpuConfig> MockNextPcState<'a, C> {
predicted_next_pc,
size_in_bytes: insn.size_in_bytes.cast_to_static::<UInt<_>>(),
is_first_mop_in_insn: self.next_mop_index == 0,
is_last_mop_in_insn,
mop,
};
println!("pushed to fetch queue: {mop:?}");
self.fetch_queue.push_back(mop);
if is_last_mop_index {
if is_last_mop_in_insn {
self.next_mop_index = 0;
self.next_pc = predicted_next_pc;
} else {
@ -1637,6 +1638,7 @@ trait MockExecutionStateTrait: Default {
predicted_next_pc,
size_in_bytes,
is_first_mop_in_insn: _,
is_last_mop_in_insn: _,
mop,
} = mop;
let fallthrough_pc = pc
@ -2453,6 +2455,7 @@ impl<C: PhantomConstCpuConfig> MockLoadStoreUnitState<C> {
predicted_next_pc,
size_in_bytes,
is_first_mop_in_insn,
is_last_mop_in_insn,
mop,
} = mop;
let mop = #[hdl(sim)]
@ -2468,6 +2471,7 @@ impl<C: PhantomConstCpuConfig> MockLoadStoreUnitState<C> {
predicted_next_pc,
size_in_bytes,
is_first_mop_in_insn,
is_last_mop_in_insn,
mop,
};
self.ops.push_back(MockLoadStoreOp {