next_pc works afaict
Some checks failed
/ test (pull_request) Has been cancelled

This commit is contained in:
Jacob Lifshay 2025-12-16 23:06:32 -08:00
parent 59da0aec06
commit d5a7d9dd9e
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
3 changed files with 293695 additions and 38183 deletions

View file

@ -251,7 +251,8 @@ pub struct RetireToNextPcInterfaceInner<C: PhantomConstGet<CpuConfig>> {
/// branch instruction is mis-speculated.
pub struct RetireToNextPcInterface<C: PhantomConstGet<CpuConfig>> {
pub inner: ReadyValid<RetireToNextPcInterfaceInner<C>>,
pub next_insn_ids: HdlOption<ArrayVec<UInt<12>, CpuConfigRobSize<C>>>,
/// only for debugging
pub next_insns: HdlOption<ArrayVec<WipDecodedInsn, CpuConfigRobSize<C>>>,
}
#[hdl(no_static)]
@ -2036,7 +2037,9 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
let mut branch_offset = 0u8;
let mut after_call_offset = 0u8;
let mut btb_entry_fields = None;
for insn in insns {
let mut branch_history = start_branch_history.clone();
let mut insn_index_of_branch_history_update = 0;
for (index, insn) in insns.iter().enumerate() {
#[hdl(sim)]
let WipDecodedInsn {
fetch_block_id: insn_fetch_block_id,
@ -2050,19 +2053,24 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
insn_fetch_block_id, fetch_block_id,
"fetch decode pipeline's output isn't in-sync with fetching_queue",
);
let guess_branch_addr_kind = |fallback_taken| {
#[hdl(sim)]
let mut cond_branch_taken = None;
let mut guess_cond_branch_addr_kind = |fallback_taken| {
let taken = #[hdl(sim)]
if let HdlSome(entry) = predicted_btb_entry {
let addr_kind = &entry.1.addr_kind;
#[hdl(sim)]
match addr_kind {
BTBEntryAddrKind::Unconditional | BTBEntryAddrKind::Indirect => {}
BTBEntryAddrKind::CondTaken | BTBEntryAddrKind::CondNotTaken => {
return addr_kind.clone();
BTBEntryAddrKind::Unconditional | BTBEntryAddrKind::Indirect => {
fallback_taken
}
BTBEntryAddrKind::CondTaken => true,
BTBEntryAddrKind::CondNotTaken => false,
}
}
if fallback_taken {
} else {
fallback_taken
};
cond_branch_taken = Some(taken);
if taken {
#[hdl(sim)]
BTBEntryAddrKind::CondTaken()
} else {
@ -2091,7 +2099,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
insn_kind = #[hdl(sim)]
BTBEntryInsnKind::Branch();
// guess backwards branches are taken and forwards branches are not
addr_kind = guess_branch_addr_kind(target_pc.as_int() <= pc.as_int());
addr_kind = guess_cond_branch_addr_kind(target_pc.as_int() <= pc.as_int());
can_train_cond_branch_predictor = true;
Some(target_pc.as_int())
}
@ -2115,7 +2123,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
insn_kind = #[hdl(sim)]
BTBEntryInsnKind::Call();
// guess conditional calls are taken
addr_kind = guess_branch_addr_kind(true);
addr_kind = guess_cond_branch_addr_kind(true);
can_train_cond_branch_predictor = true;
Some(target_pc.as_int())
}
@ -2139,7 +2147,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
insn_kind = #[hdl(sim)]
BTBEntryInsnKind::Ret();
// guess conditional returns are taken
addr_kind = guess_branch_addr_kind(true);
addr_kind = guess_cond_branch_addr_kind(true);
can_train_cond_branch_predictor = true;
None
}
@ -2158,6 +2166,10 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
// for now we just truncate the fetch block right before the second ctrl transfer insn.
break;
}
if let Some(taken) = cond_branch_taken {
insn_index_of_branch_history_update = index;
step_branch_history(&mut branch_history, taken);
}
branch_offset = fallthrough_offset;
let target_pc = target_pc.unwrap_or_else(|| predicted_next_start_pc.as_int());
add_output_insn(
@ -2228,11 +2240,45 @@ impl<C: PhantomConstCpuConfig> Stage<C> for PostDecodeStageState<C> {
cancel_self: false,
}
} else {
let mut predicted_next_pc = start_pc.as_int();
for (i, output) in ArrayVec::elements_sim_ref(&outputs).iter().enumerate() {
if output.insn.pc.as_int() != predicted_next_pc {
assert!(
i > 0,
"first instruction's pc ({}) should match the fetch's start_pc {start_pc}",
output.insn.pc,
);
ArrayVec::truncate_sim(&mut outputs, i);
if i <= insn_index_of_branch_history_update {
branch_history = start_branch_history.clone();
}
break;
}
predicted_next_pc = output.insn.predicted_next_pc.as_int();
}
let cancel = if predicted_next_pc != predicted_next_start_pc.as_int() {
#[hdl(sim)]
cancel_ty.HdlSome(
#[hdl(sim)]
Cancel::<_> {
call_stack: start_call_stack,
start_pc: predicted_next_pc,
new_btb_entry: #[hdl(sim)]
HdlNone(),
btb_entry_index: #[hdl(sim)]
HdlNone(),
branch_history,
config,
},
)
} else {
#[hdl(sim)]
cancel_ty.HdlNone()
};
#[hdl(sim)]
StageRunOutput::<_, _> {
outputs,
cancel: #[hdl(sim)]
cancel_ty.HdlNone(),
cancel,
cancel_self: false,
}
}
@ -2277,6 +2323,7 @@ struct ExecuteRetireStageOutput<C: PhantomConstGet<CpuConfig>> {
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
id: UInt<12>,
pc: UInt<64>,
next_pc: UInt<64>,
config: C,
}
@ -2288,6 +2335,7 @@ impl<C: PhantomConstCpuConfig> SimValueDefault for ExecuteRetireStageOutput<C> {
fetch_block_id,
id,
pc: _,
next_pc: _,
config,
} = self;
#[hdl(sim)]
@ -2297,6 +2345,7 @@ impl<C: PhantomConstCpuConfig> SimValueDefault for ExecuteRetireStageOutput<C> {
fetch_block_id: fetch_block_id.zero(),
id: id.zero(),
pc: 0u64,
next_pc: 0u64,
config,
}
}
@ -2389,9 +2438,10 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
fetch_block_id,
id,
pc,
next_pc,
config: _,
} = item;
format!("fid={fetch_block_id} id={id} pc={pc}")
format!("fid={fetch_block_id} id={id} pc={pc} npc={next_pc}")
}
#[hdl]
@ -2565,6 +2615,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
fetch_block_id: &insn.fetch_block_id,
id,
pc: insn.pc,
next_pc,
config,
},
),
@ -2592,6 +2643,7 @@ impl<C: PhantomConstCpuConfig> Stage<C> for ExecuteRetireStageState<C> {
fetch_block_id: &insn.fetch_block_id,
id,
pc: insn.pc,
next_pc,
config,
},
]),
@ -4120,6 +4172,7 @@ impl<C: PhantomConstCpuConfig> AllStages<C> {
fetch_block_id: _,
id: _,
pc: _,
next_pc: _,
config: _,
} = &execute_retire_output;
#[hdl(sim)]
@ -4312,6 +4365,47 @@ impl<C: PhantomConstCpuConfig> AllStages<C> {
StageWithQueues::dump_queues(post_decode);
StageWithQueues::dump_queues(execute_retire);
}
#[hdl]
fn assert_pcs_match_predicted_pcs(this: &SimValue<Self>) {
#[hdl(sim)]
let Self {
next_pc,
br_pred: _,
fetch_decode,
post_decode,
execute_retire,
config: _,
} = this;
// check fetch ops:
let fetch_ops: Vec<_> = Queue::peek_iter(&post_decode.input_queue)
.map(|v| &v.0.next_pc_stage_output)
.chain(Queue::peek_iter(&fetch_decode.output_queue).map(|v| &v.next_pc_stage_output))
.chain(Queue::peek_iter(&fetch_decode.input_queue))
.chain(Queue::peek_iter(&next_pc.output_queue))
.collect();
for i in fetch_ops.windows(2) {
assert_eq!(i[0].next_start_pc, i[1].start_pc, "{i:#?}")
}
// check insns:
// we ignore fetch_decode.output_queue and post_decode.input_queue here
// because the predicted_next_pc values aren't valid yet.
let insns: Vec<_> = Queue::peek_iter(&execute_retire.input_queue)
.map(|v| &v.insn)
.chain(Queue::peek_iter(&post_decode.output_queue).map(|v| &v.insn))
.collect();
for i in insns.windows(2) {
assert_eq!(i[0].predicted_next_pc, i[1].pc, "{i:#?}");
}
if let Some(next_input) = insns.first() {
if let Some(last_output) = Queue::peek_iter(&execute_retire.output_queue).last() {
assert_eq!(
last_output.next_pc, next_input.pc,
"last_output={last_output:#?}\nnext_input={next_input:#?}"
);
}
}
}
}
#[hdl(no_static)]
@ -4435,6 +4529,13 @@ impl<C: PhantomConstCpuConfig> NextPcState<C> {
assert_eq!(br_pred_ops, fetch_decode_ops, "queues out of sync");
}
}
#[hdl]
fn assert_pcs_match_predicted_pcs(this: &SimValue<Self>) {
#[hdl(sim)]
if let HdlNone = &this.cancel {
AllStages::assert_pcs_match_predicted_pcs(&this.all_stages);
}
}
}
fn uint_in_range_inclusive_max<Start: Size, End: Size>(
@ -4567,6 +4668,7 @@ pub fn next_pc(config: PhantomConst<CpuConfig>) {
false
};
AllStages::dump_queues(&state.all_stages);
NextPcState::assert_pcs_match_predicted_pcs(&state);
let next_fetch_block_ids = sim.read_past(to_fetch.next_fetch_block_ids, cd.clk).await;
#[hdl(sim)]
if let HdlSome(next_fetch_block_ids) = &next_fetch_block_ids {
@ -4579,16 +4681,16 @@ pub fn next_pc(config: PhantomConst<CpuConfig>) {
assert_eq!(next_fetch_block_ids, expected_next_fetch_block_ids);
}
}
let next_retire_insn_ids = sim.read_past(from_retire.next_insn_ids, cd.clk).await;
let next_retire_insns = sim.read_past(from_retire.next_insns, cd.clk).await;
#[hdl(sim)]
if let HdlSome(next_retire_insn_ids) = &next_retire_insn_ids {
if let HdlSome(next_retire_insns) = &next_retire_insns {
if !was_canceling && !is_canceling {
let next_retire_insn_ids = ArrayVec::elements_sim_ref(&next_retire_insn_ids);
let expected_next_retire_insn_ids = Vec::from_iter(
let next_retire_insns = ArrayVec::elements_sim_ref(&next_retire_insns);
let expected_next_retire_insns = Vec::from_iter(
Queue::peek_iter(&state.all_stages.execute_retire.input_queue)
.map(|v| v.insn.id.clone()),
.map(|v| v.insn.clone()),
);
assert_eq!(next_retire_insn_ids, expected_next_retire_insn_ids);
assert_eq!(next_retire_insns, expected_next_retire_insns);
}
}
was_canceling = is_canceling;

File diff suppressed because it is too large Load diff

View file

@ -326,7 +326,9 @@ impl MockInsn {
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct MockMachineState {
reset_at_exit: bool,
call_stack: Vec<u64>,
orig_input: &'static [u8],
input: &'static [u8],
output: Vec<u8>,
regs: MockRegs,
@ -340,16 +342,36 @@ const RESET_PC: u64 = 0;
const EXIT_PC: u64 = u64::from_be_bytes(*b"ExitExit");
impl MockMachineState {
fn new(insns: MockInsns, input: impl AsRef<[u8]>) -> Self {
fn new(insns: MockInsns, input: impl AsRef<[u8]>, reset_at_exit: bool) -> Self {
let input = Interned::into_inner(input.as_ref().intern());
Self {
reset_at_exit,
call_stack: Vec::with_capacity(16),
input: Interned::into_inner(input.as_ref().intern()),
orig_input: input,
input,
output: Vec::with_capacity(16),
regs: MockRegs::new(),
pc: RESET_PC,
insns,
}
}
fn reset(&mut self) {
let Self {
reset_at_exit: _,
call_stack,
orig_input,
input,
output,
regs,
pc,
insns: _,
} = self;
call_stack.clear();
*input = *orig_input;
output.clear();
*regs = MockRegs::new();
*pc = RESET_PC;
}
fn run_one(&mut self, trace: bool) -> RetireSeqEntry {
let orig_pc = self.pc;
let insn = self
@ -420,7 +442,18 @@ impl MockMachineState {
next_pc = target;
}
MockInsn::Ret => next_pc = self.call_stack.pop().unwrap_or(DEMO_ILLEGAL_INSN_TRAP),
MockInsn::ExitSysCall => next_pc = EXIT_PC,
MockInsn::ExitSysCall => {
if self.reset_at_exit {
self.reset();
return RetireSeqEntry {
pc: orig_pc,
cond_br_taken: None,
insn,
};
} else {
next_pc = EXIT_PC;
}
}
MockInsn::Illegal => next_pc = DEMO_ILLEGAL_INSN_TRAP,
}
self.pc = next_pc;
@ -721,7 +754,7 @@ fn test_program_expr_parser() {
#[track_caller]
fn test(expected_output: &str, expected_exit_code: u64, input: &str) {
println!("starting new test case: input={input:?}\n\n");
let mut state = MockMachineState::new(mock_program_expr_parser(), input);
let mut state = MockMachineState::new(mock_program_expr_parser(), input, false);
let exit_code = state.run_until_exit(10000, true).unwrap();
println!("output: {:?}", str::from_utf8(&state.output));
println!("exit code: {exit_code}");
@ -1250,8 +1283,8 @@ fn mock_execute_retire_pipe(
)
.await;
sim.write(
retire_output.next_insn_ids,
retire_output.next_insn_ids.ty().HdlNone(),
retire_output.next_insns,
retire_output.next_insns.ty().HdlNone(),
)
.await;
sim.write(
@ -1294,7 +1327,7 @@ fn mock_execute_retire_pipe(
let config = from_post_decode.config.ty();
let mut state = MockExecuteState::new(
config,
RetireSeq::new(MockMachineState::new(mock_insns, mock_input)),
RetireSeq::new(MockMachineState::new(mock_insns, mock_input, true)),
);
let empty_retire_insn = #[hdl(sim)]
RetireToNextPcInterfacePerInsn::<_> {
@ -1312,22 +1345,32 @@ fn mock_execute_retire_pipe(
let mut sim_queue = queue_debug
.ty()
.new_sim(ExecuteRetirePipeQueueEntry.default_sim());
let mut next_insn_ids = retire_output.next_insn_ids.ty().HdlSome.new_sim(0_hdl_u12);
let mut next_insns = retire_output.next_insns.ty().HdlSome.new_sim(
#[hdl(sim)]
WipDecodedInsn {
fetch_block_id: 0u8,
id: 0_hdl_u12,
pc: 0u64,
predicted_next_pc: 0u64,
size_in_bytes: 0_hdl_u4,
kind: WipDecodedInsnKind.NonBranch(),
},
);
for entry in &state.queue {
ArrayVec::try_push_sim(&mut sim_queue, entry)
.ok()
.expect("queue is known to be small enough");
let _ = ArrayVec::try_push_sim(&mut next_insn_ids, &entry.insn.id);
let _ = ArrayVec::try_push_sim(&mut next_insns, &entry.insn);
}
sim.write(queue_debug, sim_queue).await;
sim.write(
retire_output.next_insn_ids,
retire_output.next_insns,
if state.canceling {
#[hdl(sim)]
(retire_output.next_insn_ids.ty()).HdlNone()
(retire_output.next_insns.ty()).HdlNone()
} else {
#[hdl(sim)]
(retire_output.next_insn_ids.ty()).HdlSome(next_insn_ids)
(retire_output.next_insns.ty()).HdlSome(next_insns)
},
)
.await;
@ -1498,7 +1541,7 @@ fn test_next_pc() {
};
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, true);
for cycle in 0..800 {
for cycle in 0..2000 {
sim.advance_time(SimDuration::from_nanos(500));
println!("clock tick: {cycle}");
sim.write_clock(sim.io().cd.clk, true);