WIP adding next_pc: add call stack

This commit is contained in:
Jacob Lifshay 2025-11-28 01:41:18 -08:00
parent 7a77c02cda
commit 033d5d4f34
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ

View file

@ -17,7 +17,6 @@ use fayalite::{
int::{UIntInRange, UIntInRangeInclusive, UIntInRangeType}, int::{UIntInRange, UIntInRangeInclusive, UIntInRangeType},
prelude::*, prelude::*,
sim::value::SimOnlyValueTrait, sim::value::SimOnlyValueTrait,
ty::StaticType,
util::ready_valid::ReadyValid, util::ready_valid::ReadyValid,
}; };
@ -77,11 +76,9 @@ pub enum WipDecodedInsnKind {
Branch(UInt<64>), Branch(UInt<64>),
BranchCond(UInt<64>), BranchCond(UInt<64>),
IndirectBranch, IndirectBranch,
IndirectBranchCond,
Call(UInt<64>), Call(UInt<64>),
CallCond(UInt<64>), CallCond(UInt<64>),
IndirectCall, IndirectCall,
IndirectCallCond,
Ret, Ret,
RetCond, RetCond,
/// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on. /// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on.
@ -98,11 +95,9 @@ impl WipDecodedInsnKind {
| Self::Branch(_) | Self::Branch(_)
| Self::BranchCond(_) | Self::BranchCond(_)
| Self::IndirectBranch | Self::IndirectBranch
| Self::IndirectBranchCond
| Self::Call(_) | Self::Call(_)
| Self::CallCond(_) | Self::CallCond(_)
| Self::IndirectCall | Self::IndirectCall
| Self::IndirectCallCond
| Self::Ret | Self::Ret
| Self::RetCond | Self::RetCond
| Self::Unknown => None, | Self::Unknown => None,
@ -267,10 +262,33 @@ impl<T: SimValueDefault, Len: Size> ResetSteps for ArrayType<T, Len> {
struct CallStack { struct CallStack {
return_addresses: Array<UInt<64>, { CallStack::SIZE }>, return_addresses: Array<UInt<64>, { CallStack::SIZE }>,
len: UIntInRangeInclusive<0, { CallStack::SIZE }>, len: UIntInRangeInclusive<0, { CallStack::SIZE }>,
top: UIntInRange<0, { CallStack::SIZE }>,
} }
impl CallStack { impl CallStack {
const SIZE: usize = 16; const SIZE: usize = 16;
fn push(this: &mut SimValue<CallStack>, value: impl ToSimValue<Type = UInt<64>>) {
let new_len = *this.len + 1;
*this.len = if new_len > Self::SIZE {
Self::SIZE
} else {
new_len
};
let top = *this.top;
this.return_addresses[top] = value.into_sim_value();
*this.top = (top + 1) % Self::SIZE;
}
fn pop(this: &mut SimValue<CallStack>) -> Option<u64> {
if *this.len == 0 {
None
} else {
*this.len -= 1;
let top = *this.top;
let top = (top + Self::SIZE - 1) % Self::SIZE;
*this.top = top;
Some(this.return_addresses[top].as_int())
}
}
} }
impl SimValueDefault for CallStack { impl SimValueDefault for CallStack {
@ -281,6 +299,7 @@ impl SimValueDefault for CallStack {
// something other than zero so you can see the values getting reset // something other than zero so you can see the values getting reset
return_addresses: [!0u64; Self::SIZE], return_addresses: [!0u64; Self::SIZE],
len: 0usize.to_sim_value_with_type(self.len), len: 0usize.to_sim_value_with_type(self.len),
top: 0usize.to_sim_value_with_type(self.top),
} }
} }
} }
@ -292,10 +311,12 @@ impl ResetSteps for CallStack {
let CallStack { let CallStack {
return_addresses, return_addresses,
len, len,
top,
} = this; } = this;
// return_addresses is implemented as a shift register, so it can be all reset at once // return_addresses is implemented as a shift register, so it can be all reset at once
return_addresses.fill(0u64.to_sim_value()); return_addresses.fill(0u64.to_sim_value());
**len = 0; **len = 0;
**top = 0;
ResetStatus::Done ResetStatus::Done
} }
} }
@ -315,15 +336,13 @@ impl BTBEntryInsnKind {
WipDecodedInsnKind::NonBranch => None, WipDecodedInsnKind::NonBranch => None,
WipDecodedInsnKind::Branch(_) WipDecodedInsnKind::Branch(_)
| WipDecodedInsnKind::BranchCond(_) | WipDecodedInsnKind::BranchCond(_)
| WipDecodedInsnKind::IndirectBranch | WipDecodedInsnKind::IndirectBranch => Some(
| WipDecodedInsnKind::IndirectBranchCond => Some(
#[hdl(sim)] #[hdl(sim)]
BTBEntryInsnKind::Branch(), BTBEntryInsnKind::Branch(),
), ),
WipDecodedInsnKind::Call(_) WipDecodedInsnKind::Call(_)
| WipDecodedInsnKind::CallCond(_) | WipDecodedInsnKind::CallCond(_)
| WipDecodedInsnKind::IndirectCall | WipDecodedInsnKind::IndirectCall => Some(
| WipDecodedInsnKind::IndirectCallCond => Some(
#[hdl(sim)] #[hdl(sim)]
BTBEntryInsnKind::Call(), BTBEntryInsnKind::Call(),
), ),
@ -385,11 +404,6 @@ impl BTBEntryAddrKind {
#[hdl(sim)] #[hdl(sim)]
Self::Indirect(), Self::Indirect(),
), ),
WipDecodedInsnKind::IndirectBranchCond | WipDecodedInsnKind::IndirectCallCond => Some(
// our conditional branch prediction doesn't work with indirect branches
#[hdl(sim)]
Self::Indirect(),
),
WipDecodedInsnKind::Interrupt(_) => None, WipDecodedInsnKind::Interrupt(_) => None,
WipDecodedInsnKind::Unknown => None, WipDecodedInsnKind::Unknown => None,
} }
@ -404,6 +418,8 @@ struct BTBEntry {
/// when branch is not taken, the next pc to fetch from is `start_pc + fallthrough_offset`. /// when branch is not taken, the next pc to fetch from is `start_pc + fallthrough_offset`.
/// needed because there may be more than one branch in a fetch block /// needed because there may be more than one branch in a fetch block
fallthrough_offset: UInt<8>, fallthrough_offset: UInt<8>,
/// when a call is made, the return address is `start_pc + after_call_offset`
after_call_offset: UInt<8>,
insn_kind: BTBEntryInsnKind, insn_kind: BTBEntryInsnKind,
addr_kind: BTBEntryAddrKind, addr_kind: BTBEntryAddrKind,
} }
@ -419,13 +435,48 @@ impl BTBEntry {
} }
} }
#[hdl]
struct LFSR31 {
// MSB is always zero, 32 bits makes it easier to manipulate
state: UInt<32>,
}
impl SimValueDefault for LFSR31 {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
#[hdl(sim)]
Self { state: 1u32 }
}
}
impl LFSR31 {
fn next(this: &mut SimValue<Self>) -> u32 {
let state = this.state.as_int();
let state = if state == 0 {
1u32
} else {
// a maximal-length 31-bit LFSR
let lsb = ((state >> 30) ^ (state >> 27)) & 1;
let msb = (state << 1) & ((1 << 31) - 1);
lsb | msb
};
*this.state = state.into();
state
}
}
#[hdl] #[hdl]
struct BranchTargetBuffer { struct BranchTargetBuffer {
branch_pc_to_target_map: Array<HdlOption<BTBEntry>, { BranchTargetBuffer::SIZE }>, branch_pc_to_target_map: Array<HdlOption<BTBEntry>, { BranchTargetBuffer::SIZE }>,
next_index_to_replace_lfsr: LFSR31,
} }
impl BranchTargetBuffer { impl BranchTargetBuffer {
const SIZE: usize = 16; const LOG2_SIZE: usize = 4;
const SIZE: usize = 1 << Self::LOG2_SIZE;
fn next_index_to_replace(this: &mut SimValue<Self>) -> usize {
LFSR31::next(&mut this.next_index_to_replace_lfsr) as usize % Self::SIZE
}
} }
impl SimValueDefault for BranchTargetBuffer { impl SimValueDefault for BranchTargetBuffer {
@ -440,10 +491,12 @@ impl SimValueDefault for BranchTargetBuffer {
start_pc: !0u64, start_pc: !0u64,
target_pc: !0u64, target_pc: !0u64,
fallthrough_offset: !0u8, fallthrough_offset: !0u8,
after_call_offset: !0u8,
insn_kind: BTBEntryInsnKind.Call(), insn_kind: BTBEntryInsnKind.Call(),
addr_kind: BTBEntryAddrKind.CondNotTaken(), addr_kind: BTBEntryAddrKind.CondNotTaken(),
}, },
); Self::SIZE], ); Self::SIZE],
next_index_to_replace_lfsr: LFSR31.sim_value_default(),
} }
} }
} }
@ -454,7 +507,9 @@ impl ResetSteps for BranchTargetBuffer {
#[hdl(sim)] #[hdl(sim)]
let BranchTargetBuffer { let BranchTargetBuffer {
branch_pc_to_target_map, branch_pc_to_target_map,
next_index_to_replace_lfsr,
} = this; } = this;
*next_index_to_replace_lfsr = LFSR31.sim_value_default();
ResetSteps::reset_step(branch_pc_to_target_map, step) ResetSteps::reset_step(branch_pc_to_target_map, step)
} }
} }
@ -850,11 +905,11 @@ impl<C: Type + PhantomConstGet<CpuConfig>> NextPcState<C> {
fetch_block_id: expected_fetch_block_id, fetch_block_id: expected_fetch_block_id,
btb_entry, btb_entry,
btb_entry_index, btb_entry_index,
next_pc, next_pc: orig_next_pc,
} = fetch_queue_entry; } = fetch_queue_entry;
let insns = ArrayVec::elements_sim_ref(&insns); let insns = ArrayVec::elements_sim_ref(&insns);
if let Some(target_pc) = WipDecodedInsnKind::interrupt_target_pc_sim(&insns[0].kind) { if let Some(target_pc) = WipDecodedInsnKind::interrupt_target_pc_sim(&insns[0].kind) {
if **target_pc != *next_pc { if *target_pc != orig_next_pc {
*this.cancel_in_progress_fetches = true; *this.cancel_in_progress_fetches = true;
this.pc = target_pc.clone(); this.pc = target_pc.clone();
} }
@ -862,6 +917,7 @@ impl<C: Type + PhantomConstGet<CpuConfig>> NextPcState<C> {
} }
let start_pc = insns[0].pc.as_int(); let start_pc = insns[0].pc.as_int();
let mut fallthrough_offset = 0u8; let mut fallthrough_offset = 0u8;
let mut after_call_offset = 0u8;
let mut btb_entry_fields = None; let mut btb_entry_fields = None;
let mut eval_cond_branch = || -> SimValue<BTBEntryAddrKind> { let mut eval_cond_branch = || -> SimValue<BTBEntryAddrKind> {
todo!(); todo!();
@ -889,13 +945,13 @@ impl<C: Type + PhantomConstGet<CpuConfig>> NextPcState<C> {
BTBEntryInsnKind::Branch(); BTBEntryInsnKind::Branch();
addr_kind = #[hdl(sim)] addr_kind = #[hdl(sim)]
BTBEntryAddrKind::Unconditional(); BTBEntryAddrKind::Unconditional();
Some(target_pc.clone()) Some(target_pc.as_int())
} }
WipDecodedInsnKind::BranchCond(target_pc) => { WipDecodedInsnKind::BranchCond(target_pc) => {
insn_kind = #[hdl(sim)] insn_kind = #[hdl(sim)]
BTBEntryInsnKind::Branch(); BTBEntryInsnKind::Branch();
addr_kind = eval_cond_branch(); addr_kind = eval_cond_branch();
Some(target_pc.clone()) Some(target_pc.as_int())
} }
WipDecodedInsnKind::IndirectBranch => { WipDecodedInsnKind::IndirectBranch => {
insn_kind = #[hdl(sim)] insn_kind = #[hdl(sim)]
@ -904,26 +960,18 @@ impl<C: Type + PhantomConstGet<CpuConfig>> NextPcState<C> {
BTBEntryAddrKind::Indirect(); BTBEntryAddrKind::Indirect();
None None
} }
WipDecodedInsnKind::IndirectBranchCond => {
// our conditional branch prediction doesn't work with indirect branches
insn_kind = #[hdl(sim)]
BTBEntryInsnKind::Branch();
addr_kind = #[hdl(sim)]
BTBEntryAddrKind::Indirect();
None
}
WipDecodedInsnKind::Call(target_pc) => { WipDecodedInsnKind::Call(target_pc) => {
insn_kind = #[hdl(sim)] insn_kind = #[hdl(sim)]
BTBEntryInsnKind::Call(); BTBEntryInsnKind::Call();
addr_kind = #[hdl(sim)] addr_kind = #[hdl(sim)]
BTBEntryAddrKind::Unconditional(); BTBEntryAddrKind::Unconditional();
Some(target_pc.clone()) Some(target_pc.as_int())
} }
WipDecodedInsnKind::CallCond(target_pc) => { WipDecodedInsnKind::CallCond(target_pc) => {
insn_kind = #[hdl(sim)] insn_kind = #[hdl(sim)]
BTBEntryInsnKind::Call(); BTBEntryInsnKind::Call();
addr_kind = eval_cond_branch(); addr_kind = eval_cond_branch();
Some(target_pc.clone()) Some(target_pc.as_int())
} }
WipDecodedInsnKind::IndirectCall => { WipDecodedInsnKind::IndirectCall => {
insn_kind = #[hdl(sim)] insn_kind = #[hdl(sim)]
@ -932,14 +980,6 @@ impl<C: Type + PhantomConstGet<CpuConfig>> NextPcState<C> {
BTBEntryAddrKind::Indirect(); BTBEntryAddrKind::Indirect();
None None
} }
WipDecodedInsnKind::IndirectCallCond => {
// our conditional branch prediction doesn't work with indirect calls
insn_kind = #[hdl(sim)]
BTBEntryInsnKind::Call();
addr_kind = #[hdl(sim)]
BTBEntryAddrKind::Indirect();
None
}
WipDecodedInsnKind::Ret => { WipDecodedInsnKind::Ret => {
insn_kind = #[hdl(sim)] insn_kind = #[hdl(sim)]
BTBEntryInsnKind::Ret(); BTBEntryInsnKind::Ret();
@ -969,28 +1009,70 @@ impl<C: Type + PhantomConstGet<CpuConfig>> NextPcState<C> {
} }
btb_entry_fields = Some((insn_kind, addr_kind, target_pc)); btb_entry_fields = Some((insn_kind, addr_kind, target_pc));
fallthrough_offset += size_in_bytes.cast_to_static::<UInt<8>>().as_int(); fallthrough_offset += size_in_bytes.cast_to_static::<UInt<8>>().as_int();
#[hdl(sim)]
match insn_kind {
BTBEntryInsnKind::Call => after_call_offset = fallthrough_offset,
BTBEntryInsnKind::Branch | BTBEntryInsnKind::Ret | BTBEntryInsnKind::Unknown => {}
}
} }
if let Some((insn_kind, addr_kind, target_pc)) = btb_entry_fields { let new_next_pc = if let Some((insn_kind, addr_kind, mut target_pc)) = btb_entry_fields {
let expected_btb_entry = #[hdl(sim)] // add/update BTBEntry if it doesn't match
let btb_entry_index = #[hdl(sim)]
if let HdlSome(btb_entry) = btb_entry {
// verify it hasn't been changed meanwhile
#[hdl(sim)]
if let HdlSome(entry) =
&this.branch_target_buffer.branch_pc_to_target_map[*btb_entry_index]
{
// we have a btb entry, check if it has been modified
if entry.start_pc == btb_entry.start_pc {
// we found the correct BTBEntry
if target_pc.is_none() {
// save the existing target_pc if we know it
target_pc = Some(entry.target_pc.as_int());
}
Some(*btb_entry_index)
} else {
None
}
} else {
None
}
} else {
None
};
let btb_entry_index = btb_entry_index.unwrap_or_else(|| {
// we need to add a new entry, pick an entry to replace
BranchTargetBuffer::next_index_to_replace(&mut this.branch_target_buffer)
});
let new_next_pc = #[hdl(sim)]
match insn_kind {
BTBEntryInsnKind::Branch => {}
BTBEntryInsnKind::Call => {
CallStack::push(&mut this.speculative_call_stack, todo!());
todo!()
}
BTBEntryInsnKind::Ret => {
target_pc = CallStack::pop(&mut this.speculative_call_stack).or(target_pc);
}
};
let new_entry = #[hdl(sim)]
BTBEntry { BTBEntry {
start_pc, start_pc,
target_pc: target_pc.unwrap_or_else(|| 0u64.to_sim_value()), target_pc: target_pc.unwrap_or(0u64),
fallthrough_offset, fallthrough_offset,
after_call_offset,
insn_kind, insn_kind,
addr_kind, addr_kind,
}; };
// add/update BTBEntry if it doesn't match let entry_mut = &mut this.branch_target_buffer.branch_pc_to_target_map[btb_entry_index];
#[hdl(sim)] *entry_mut = #[hdl(sim)]
if let HdlSome(btb_entry) = btb_entry { HdlSome(new_entry);
todo!() new_next_pc
} else {
// add BTBEntry
todo!()
}
} else { } else {
#[hdl(sim)] #[hdl(sim)]
if let HdlSome(btb_entry) = btb_entry { if let HdlSome(btb_entry) = btb_entry {
// remove BTBEntry // the fetched instructions do not need a BTBEntry, remove the BTBEntry if it still exists
let entry_mut = let entry_mut =
&mut this.branch_target_buffer.branch_pc_to_target_map[*btb_entry_index]; &mut this.branch_target_buffer.branch_pc_to_target_map[*btb_entry_index];
// verify it hasn't been changed meanwhile // verify it hasn't been changed meanwhile
@ -1002,6 +1084,11 @@ impl<C: Type + PhantomConstGet<CpuConfig>> NextPcState<C> {
} }
} }
} }
start_pc + u64::from(fallthrough_offset)
};
if new_next_pc != orig_next_pc.as_int() {
*this.cancel_in_progress_fetches = true;
*this.pc = new_next_pc.into();
} }
} }
} }