WIP fixing bugs
Some checks failed
/ test (pull_request) Failing after 14s

This commit is contained in:
Jacob Lifshay 2025-12-15 02:48:40 -08:00
parent 84e4fde512
commit d42f010cda
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
3 changed files with 17859 additions and 14030 deletions

View file

@ -246,7 +246,7 @@ pub struct RetireToNextPcInterfaceInner<C: PhantomConstGet<CpuConfig>> {
/// branch instruction is mis-speculated.
pub struct RetireToNextPcInterface<C: PhantomConstGet<CpuConfig>> {
pub inner: ReadyValid<RetireToNextPcInterfaceInner<C>>,
pub next_insn_ids: ArrayVec<UInt<12>, CpuConfigRobSize<C>>,
pub next_insn_ids: HdlOption<ArrayVec<UInt<12>, CpuConfigRobSize<C>>>,
}
#[hdl(no_static)]
@ -301,7 +301,7 @@ struct Cancel<C: PhantomConstGet<CpuConfig>> {
}
/// the output of [`Stage::run`].
/// when cancelling operations, the returned [`StageRunOutput.cancel`] should be the state after
/// when canceling operations, the returned [`StageRunOutput.cancel`] should be the state after
/// running all operations returned in [`StageRunOutput.output`].
#[hdl(no_static)]
struct StageRunOutput<C: PhantomConstGet<CpuConfig> + PhantomConstCpuConfig, S: Type + Stage<C>> {
@ -1033,15 +1033,22 @@ impl<C: PhantomConstCpuConfig> Stage<C> for NextPcStageState<C> {
_inputs: &SimValue<Self::Inputs>,
) -> SimValue<StageRunOutput<C, Self>> {
let this_ty = state.ty();
let config = state.config.ty();
let start_call_stack = state.call_stack.clone();
let fetch_block_id = state.next_fetch_block_id.as_int();
*state.next_fetch_block_id = state.next_fetch_block_id.as_int().wrapping_add(1).into();
let start_pc = state.next_pc.as_int();
#[hdl(sim)]
let Self {
call_stack,
branch_target_buffer,
next_pc,
next_fetch_block_id,
config,
} = state;
let config = config.ty();
let start_call_stack = call_stack.clone();
let fetch_block_id = next_fetch_block_id.as_int();
**next_fetch_block_id = fetch_block_id.wrapping_add(1).into();
let start_pc = next_pc.as_int();
let fetch_pc = start_pc & (!0u64 << config.get().log2_fetch_width_in_bytes);
let btb_entry_index = state
.branch_target_buffer
let btb_entry_index = branch_target_buffer
.branch_pc_to_target_map
.iter()
.position(|entry| {
@ -1052,12 +1059,6 @@ impl<C: PhantomConstCpuConfig> Stage<C> for NextPcStageState<C> {
}
});
let (next_start_pc, btb_entry) = if let Some(btb_entry_index) = btb_entry_index {
#[hdl(sim)]
let Self {
call_stack,
branch_target_buffer,
..
} = state;
let entry = #[hdl(sim)]
match &branch_target_buffer.branch_pc_to_target_map[btb_entry_index] {
HdlSome(entry) => entry,
@ -1101,6 +1102,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for NextPcStageState<C> {
HdlNone(),
)
};
**next_pc = next_start_pc.into();
let output = #[hdl(sim)]
NextPcStageOutput::<_> {
start_pc,
@ -3547,13 +3549,12 @@ impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> {
from_external_pipe_output_ready,
} = last_outputs;
assert_eq!(**ArrayVec::len_sim(to_external_pipe_input_input), 0);
#[hdl(sim)]
if let HdlNone = to_external_pipe_input_cancel {
unreachable!();
}
assert_eq!(**from_external_pipe_output_ready, 0);
if !**to_external_pipe_input_cancel_ready {
return CancelResult::InProgress;
#[hdl(sim)]
if let HdlSome(_) = to_external_pipe_input_cancel {
if !**to_external_pipe_input_cancel_ready {
return CancelResult::InProgress;
}
}
for _ in 0..std::mem::replace(input_queue_to_cancel, 0) {
let Some(_) = Queue::undo_push(input_queue) else {
@ -3682,12 +3683,12 @@ impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> {
unreachable!();
};
}
// handle cancelling only after handling all outputs so the outputs aren't canceled
// handle canceling only after handling all outputs so the outputs aren't canceled
#[hdl(sim)]
if let HdlSome(cancel) = cancel {
// ignore the rest of the input_queue and from_external_pipe_output_data,
// it doesn't matter that they're getting ignored since we're
// cancelling all inputs anyway.
// canceling all inputs anyway.
let cancel_count = Queue::len(input_queue);
let sibling_cancel = S::SiblingStage::make_sim_value_cancel(
S::SiblingStage::sim_value_stage_with_queues_opt(sibling).map(|sibling| {
@ -3697,7 +3698,7 @@ impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> {
// this stage's input queue and the sibling stage's input queue,
// and that outputs are removed in lock step from
// this stage's output queue and the sibling stage's output queue.
let sibling_cancel_count = if sibling_already_ran {
let mut sibling_cancel_count = if dbg!(sibling_already_ran) {
// both this stage and its sibling already pushed the same items to
// their input queues, so they are in lock-step and can use the
// same cancel count.
@ -3707,6 +3708,12 @@ impl<C: PhantomConstCpuConfig, S: Stage<C>> StageWithQueues<C, S> {
// input queue, but the sibling hasn't so subtract off those additional items
cancel_count - input_stages_outputs_popped_count
};
if **ArrayVec::len_sim(&outputs) == 0 {
// this item was removed, so we need to remove it in the sibling too
sibling_cancel_count += 1;
}
dbg!(sibling_cancel_count);
dbg!(cancel_count);
let CancelInProgressForStageWithQueues {
cancel_state: _,
input_queue_to_cancel,
@ -3897,6 +3904,7 @@ impl<C: PhantomConstCpuConfig> AllStages<C> {
execute_retire,
config: _,
} = this;
println!("Cancel: {cancel:#?}");
let next_pc =
StageWithQueues::cancel(next_pc, cancel, &inputs.next_pc, &last_outputs.next_pc);
let br_pred =
@ -4253,6 +4261,41 @@ impl<C: PhantomConstCpuConfig> NextPcState<C> {
}
}
}
#[hdl(sim)]
if let HdlNone = cancel {
#[derive(Debug, PartialEq)]
struct FetchOp {
start_pc: SimValue<UInt<64>>,
fetch_block_id: SimValue<UInt<{ FETCH_BLOCK_ID_WIDTH }>>,
}
let br_pred_ops = Vec::from_iter(
Queue::peek_iter(&all_stages.br_pred.output_queue)
.map(|v| FetchOp {
start_pc: v.start_pc.clone(),
fetch_block_id: v.fetch_block_id.clone(),
})
.chain(
Queue::peek_iter(&all_stages.br_pred.input_queue).map(|v| FetchOp {
start_pc: v.start_pc.clone(),
fetch_block_id: v.fetch_block_id.clone(),
}),
),
);
let fetch_decode_ops = Vec::from_iter(
Queue::peek_iter(&all_stages.fetch_decode.output_queue)
.map(|v: &SimValue<FetchDecodeStageOutput<C>>| FetchOp {
start_pc: v.next_pc_stage_output.start_pc.clone(),
fetch_block_id: v.next_pc_stage_output.fetch_block_id.clone(),
})
.chain(
Queue::peek_iter(&all_stages.fetch_decode.input_queue).map(|v| FetchOp {
start_pc: v.start_pc.clone(),
fetch_block_id: v.fetch_block_id.clone(),
}),
),
);
assert_eq!(br_pred_ops, fetch_decode_ops, "queues out of sync");
}
}
}
@ -4352,12 +4395,18 @@ pub fn next_pc(config: PhantomConst<CpuConfig>) {
state = sim.read_past(state_expr, cd.clk).await;
AllStages::dump_queues(&state.all_stages);
let next_retire_insn_ids = sim.read_past(from_retire.next_insn_ids, cd.clk).await;
let next_retire_insn_ids = ArrayVec::elements_sim_ref(&next_retire_insn_ids);
let expected_next_retire_insn_ids = Vec::from_iter(
Queue::peek_iter(&state.all_stages.execute_retire.input_queue)
.map(|v| v.insn.id.clone()),
);
assert_eq!(next_retire_insn_ids, expected_next_retire_insn_ids);
#[hdl(sim)]
if let HdlSome(next_retire_insn_ids) = &next_retire_insn_ids {
#[hdl(sim)]
if let HdlNone = &state.cancel {
let next_retire_insn_ids = ArrayVec::elements_sim_ref(&next_retire_insn_ids);
let expected_next_retire_insn_ids = Vec::from_iter(
Queue::peek_iter(&state.all_stages.execute_retire.input_queue)
.map(|v| v.insn.id.clone()),
);
assert_eq!(next_retire_insn_ids, expected_next_retire_insn_ids);
}
}
let AllStagesInputs {
next_pc,
br_pred,

File diff suppressed because it is too large Load diff

View file

@ -460,6 +460,7 @@ struct MockExecuteState {
queue: VecDeque<SimValue<ExecuteRetirePipeQueueEntry>>,
used_ids: BTreeSet<SimValue<UInt<12>>>,
retire_seq: RetireSeq,
canceling: bool,
config: PhantomConst<CpuConfig>,
}
@ -469,6 +470,7 @@ impl MockExecuteState {
queue: VecDeque::new(),
used_ids: BTreeSet::new(),
retire_seq: RetireSeq::new(),
canceling: false,
config,
}
}
@ -483,6 +485,7 @@ impl MockExecuteState {
fn do_retire(
&mut self,
entry: SimValue<ExecuteRetirePipeQueueEntry>,
passive: bool,
) -> Result<SimValue<RetireToNextPcInterfacePerInsn<PhantomConst<CpuConfig>>>, String> {
#[hdl(sim)]
let ExecuteRetirePipeQueueEntry {
@ -549,6 +552,18 @@ impl MockExecuteState {
"insn doesn't match expected:\ninsn: {insn:?}\nexpected insn: {expected_insn:?}"
));
}
if let Some(next_insn) = self.queue.front() {
if next_pc != next_insn.insn.pc.as_int() {
self.canceling = true;
if !passive {
println!(
"MockExecuteState: starting canceling {} instruction(s): next_pc={next_pc:#x}, mis-predicted next_pc={next_insn_pc}",
self.queue.len(),
next_insn_pc = next_insn.insn.pc
);
}
}
}
Ok(
#[hdl(sim)]
RetireToNextPcInterfacePerInsn::<_> {
@ -569,16 +584,20 @@ impl MockExecuteState {
#[hdl]
fn try_retire(
&mut self,
passive: bool,
) -> Option<(
SimValue<RetireToNextPcInterfacePerInsn<PhantomConst<CpuConfig>>>,
Result<(), String>,
)> {
if self.canceling {
return None;
}
if self.queue.front()?.cycles_left.as_int() != 0 {
return None;
}
let entry = self.queue.pop_front()?;
let id = entry.insn.id.clone();
Some(match self.do_retire(entry) {
Some(match self.do_retire(entry, passive) {
Ok(v) => (v, Ok(())),
Err(e) => (
#[hdl(sim)]
@ -611,6 +630,16 @@ impl MockExecuteState {
},
);
}
#[hdl]
fn finish_cancel(&mut self) {
println!(
"MockExecuteState: finishing canceling {} instruction(s)",
self.queue.len(),
);
self.queue.clear();
self.used_ids.clear();
self.canceling = false;
}
}
#[hdl_module(extern)]
@ -645,7 +674,7 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
.await;
sim.write(
retire_output.next_insn_ids,
retire_output.next_insn_ids.ty().new_sim(0_hdl_u12),
retire_output.next_insn_ids.ty().HdlNone(),
)
.await;
sim.write(
@ -699,7 +728,7 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
let mut sim_queue = queue_debug
.ty()
.new_sim(ExecuteRetirePipeQueueEntry.default_sim());
let mut next_insn_ids = retire_output.next_insn_ids.ty().new_sim(0_hdl_u12);
let mut next_insn_ids = retire_output.next_insn_ids.ty().HdlSome.new_sim(0_hdl_u12);
for entry in &state.queue {
ArrayVec::try_push_sim(&mut sim_queue, entry)
.ok()
@ -707,10 +736,20 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
let _ = ArrayVec::try_push_sim(&mut next_insn_ids, &entry.insn.id);
}
sim.write(queue_debug, sim_queue).await;
sim.write(retire_output.next_insn_ids, next_insn_ids).await;
sim.write(
retire_output.next_insn_ids,
if state.canceling {
#[hdl(sim)]
(retire_output.next_insn_ids.ty()).HdlNone()
} else {
#[hdl(sim)]
(retire_output.next_insn_ids.ty()).HdlSome(next_insn_ids)
},
)
.await;
let mut retiring = retire_vec_ty.new_sim(&empty_retire_insn);
let mut peek_state = state.clone();
while let Some((peek_retire, result)) = peek_state.try_retire() {
while let Some((peek_retire, result)) = peek_state.try_retire(true) {
if result.is_err() && **ArrayVec::len_sim(&retiring) > 0 {
break;
}
@ -737,7 +776,11 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
.await;
sim.write(
from_post_decode.ready,
state.space_available().min(config.get().fetch_width.get()),
if state.canceling {
0
} else {
state.space_available().min(config.get().fetch_width.get())
},
)
.await;
sim.wait_for_clock_edge(cd.clk).await;
@ -752,9 +795,12 @@ fn mock_execute_retire_pipe(config: PhantomConst<CpuConfig>) {
))
}))
);
if state.canceling {
state.finish_cancel();
}
if sim.read_past_bool(retire_output.inner.ready, cd.clk).await {
for _ in 0..**ArrayVec::len_sim(&retiring) {
match state.try_retire() {
match state.try_retire(false) {
Some((_, Ok(_))) => {}
Some((_, Err(e))) => panic!("retire error: {e}"),
None => unreachable!(),
@ -850,7 +896,7 @@ fn test_next_pc() {
};
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, true);
for _cycle in 0..300 {
for _cycle in 0..500 {
sim.advance_time(SimDuration::from_nanos(500));
println!("clock tick");
sim.write_clock(sim.io().cd.clk, true);