WIP adding next_pc: added mock_fetch_decode_pipe

This commit is contained in:
Jacob Lifshay 2025-10-27 22:41:33 -07:00
parent 688732ec4c
commit a927451f8c
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
6 changed files with 12796 additions and 4 deletions

View file

@ -34,6 +34,8 @@ pub struct CpuConfig {
pub units: Vec<UnitConfig>,
pub out_reg_num_width: usize,
pub fetch_width: NonZeroUsize,
pub max_branches_per_fetch: NonZeroUsize,
pub log2_fetch_width_in_bytes: u8,
/// default value for [`UnitConfig::max_in_flight`]
pub default_unit_max_in_flight: NonZeroUsize,
pub rob_size: NonZeroUsize,
@ -47,6 +49,13 @@ impl CpuConfig {
};
v
};
pub const DEFAULT_MAX_BRANCHES_PER_FETCH: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(1) else {
unreachable!();
};
v
};
pub const DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES: u8 = 3;
pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(8) else {
unreachable!();
@ -58,6 +67,8 @@ impl CpuConfig {
units,
out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH,
fetch_width: Self::DEFAULT_FETCH_WIDTH,
max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH,
log2_fetch_width_in_bytes: Self::DEFAULT_LOG2_FETCH_WIDTH_IN_BYTES,
default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT,
rob_size,
}
@ -117,4 +128,21 @@ impl CpuConfig {
UnitToRegAlloc[mop_ty][extra_out_ty][self.unit_num_width()][self.out_reg_num_width]
[self.non_const_unit_nums().len()]
}
pub fn fetch_width_in_bytes(&self) -> usize {
1usize
.checked_shl(self.log2_fetch_width_in_bytes.into())
.expect("log2_fetch_width_in_bytes is too big")
}
}
#[hdl(get(|c| c.fetch_width.get()))]
pub type CpuConfigFetchWidth<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.max_branches_per_fetch.get()))]
pub type CpuConfigMaxBranchesPerFetch<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.log2_fetch_width_in_bytes.into()))]
pub type CpuConfigLog2FetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.fetch_width_in_bytes()))]
pub type CpuConfigFetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;

View file

@ -2,6 +2,7 @@
// See Notices.txt for copyright information
pub mod config;
pub mod instruction;
pub mod next_pc;
pub mod reg_alloc;
pub mod register;
pub mod unit;

804
crates/cpu/src/next_pc.rs Normal file
View file

@ -0,0 +1,804 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
//! [Next-Instruction Logic](https://git.libre-chip.org/libre-chip/grant-tracking/issues/10)
//!
//! The basic idea here is that there's a `next_pc` stage that sends predicted fetch PCs to the `fetch` stage,
//! the `fetch` stage's outputs eventually end up in the `decode` stage,
//! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`)
//! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds
//! information back to the `fetch` stage to cancel fetches that need to be predicted differently.
use crate::{
config::{CpuConfig, CpuConfigFetchWidth},
util::array_vec::ArrayVec,
};
use fayalite::{
int::{UIntInRange, UIntInRangeInclusive, UIntInRangeType},
prelude::*,
sim::{ForkJoinScope, value::SimOnlyValueTrait},
util::ready_valid::ReadyValid,
};
#[hdl]
pub enum PredictedCond {
Taken,
Fallthrough,
}
#[hdl]
pub struct PredictedFallthrough {}
#[hdl]
pub enum BranchPredictionKind<CondKind> {
Branch(HdlOption<CondKind>),
IndirectBranch(HdlOption<CondKind>),
Call(HdlOption<CondKind>),
IndirectCall(HdlOption<CondKind>),
Ret(HdlOption<CondKind>),
}
#[hdl(get(|c| c.max_branches_per_fetch.get() - 1))]
pub type NextPcPredictionMaxBranchesBeforeLast<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(no_static)]
pub struct NextPcPrediction<C: PhantomConstGet<CpuConfig>> {
pub fetch_pc: UInt<64>,
pub async_interrupt: Bool,
pub branches_before_last: ArrayVec<
BranchPredictionKind<PredictedFallthrough>,
NextPcPredictionMaxBranchesBeforeLast<C>,
>,
pub last_branch: HdlOption<BranchPredictionKind<PredictedCond>>,
pub last_branch_target_pc: UInt<64>,
}
pub const FETCH_BLOCK_ID_WIDTH: usize = FetchBlockIdInt::BITS as usize;
type FetchBlockIdInt = u8;
#[hdl]
pub struct NextPcToFetchInterfaceInner {
pub next_fetch_pc: UInt<64>,
pub fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
pub in_progress_fetches_to_cancel: UInt<8>,
}
#[hdl(no_static)]
pub struct NextPcToFetchInterface<C: PhantomConstGet<CpuConfig>> {
pub inner: ReadyValid<NextPcToFetchInterfaceInner>,
pub config: C,
}
#[hdl]
/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
pub enum WipDecodedInsnKind {
NonBranch,
Branch(UInt<64>),
BranchCond(UInt<64>),
IndirectBranch,
IndirectBranchCond,
Call(UInt<64>),
CallCond(UInt<64>),
IndirectCall,
IndirectCallCond,
Ret,
RetCond,
/// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on.
Interrupt(UInt<64>),
}
#[hdl]
/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
pub struct WipDecodedInsn {
pub fetch_block_id: UInt<8>,
pub id: UInt<12>,
pub pc: UInt<64>,
pub size_in_bytes: UInt<4>,
pub kind: WipDecodedInsnKind,
}
#[hdl(no_static)]
/// handles updating speculative branch predictor state (e.g. branch histories) when instructions retire,
/// as well as updating state when a branch instruction is mis-speculated.
pub struct NextPcToRetireInterface<C: PhantomConstGet<CpuConfig>> {
// TODO: add needed fields
pub config: C,
}
#[hdl(no_static)]
pub struct DecodeToPostDecodeInterfaceInner<C: PhantomConstGet<CpuConfig>> {
pub fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
pub insns: ArrayVec<WipDecodedInsn, CpuConfigFetchWidth<C>>,
// TODO: add needed fields
pub config: C,
}
#[hdl(no_static)]
pub struct DecodeToPostDecodeInterface<C: PhantomConstGet<CpuConfig>> {
pub inner: ReadyValid<DecodeToPostDecodeInterfaceInner<C>>,
}
#[hdl(no_static)]
pub struct PostDecodeOutputInterface<C: PhantomConstGet<CpuConfig>> {
// TODO: add needed fields
pub config: C,
}
#[hdl]
enum BranchPredictionState {
StronglyNotTaken,
WeaklyNotTaken,
WeaklyTaken,
StronglyTaken,
}
impl BranchPredictionState {
#[must_use]
#[hdl]
fn is_taken(this: &SimValue<Self>) -> bool {
#[hdl(sim)]
match this {
Self::StronglyNotTaken => false,
Self::WeaklyNotTaken => false,
Self::WeaklyTaken => true,
Self::StronglyTaken => true,
}
}
#[must_use]
#[hdl]
fn towards_taken(this: &SimValue<Self>) -> SimValue<Self> {
(#[hdl(sim)]
match this {
Self::StronglyNotTaken => BranchPredictionState.WeaklyNotTaken(),
Self::WeaklyNotTaken => BranchPredictionState.WeaklyTaken(),
Self::WeaklyTaken => BranchPredictionState.StronglyTaken(),
Self::StronglyTaken => BranchPredictionState.StronglyTaken(),
})
.to_sim_value()
}
#[must_use]
#[hdl]
fn towards_not_taken(this: &SimValue<Self>) -> SimValue<Self> {
(#[hdl(sim)]
match this {
Self::StronglyNotTaken => BranchPredictionState.StronglyNotTaken(),
Self::WeaklyNotTaken => BranchPredictionState.StronglyNotTaken(),
Self::WeaklyTaken => BranchPredictionState.WeaklyNotTaken(),
Self::StronglyTaken => BranchPredictionState.WeaklyTaken(),
})
.to_sim_value()
}
}
impl SimValueDefault for BranchPredictionState {
fn sim_value_default(self) -> SimValue<Self> {
self.WeaklyNotTaken().to_sim_value()
}
}
#[derive(Copy, Clone, Debug)]
#[must_use]
enum ResetStatus {
Done,
Working,
}
impl ResetStatus {
fn and(self, other: Self) -> Self {
match (self, other) {
(ResetStatus::Done, ResetStatus::Done) => ResetStatus::Done,
(ResetStatus::Done | ResetStatus::Working, ResetStatus::Working)
| (ResetStatus::Working, ResetStatus::Done) => ResetStatus::Working,
}
}
}
trait SimValueDefault: Type {
fn sim_value_default(self) -> SimValue<Self>;
}
impl<T: SimOnlyValueTrait> SimValueDefault for SimOnly<T> {
fn sim_value_default(self) -> SimValue<Self> {
SimOnlyValue::<T>::default().to_sim_value_with_type(self)
}
}
impl<T: Type> SimValueDefault for HdlOption<T> {
fn sim_value_default(self) -> SimValue<Self> {
self.HdlNone().to_sim_value_with_type(self)
}
}
impl SimValueDefault for Bool {
fn sim_value_default(self) -> SimValue<Self> {
false.to_sim_value()
}
}
impl<Width: Size> SimValueDefault for UIntType<Width> {
fn sim_value_default(self) -> SimValue<Self> {
self.zero().to_sim_value()
}
}
trait ResetSteps: Type {
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus;
}
impl<T: SimValueDefault, Len: Size> ResetSteps for ArrayType<T, Len> {
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
let element = SimValue::ty(this).element();
let len = SimValue::ty(this).len();
if step < len {
this[step] = element.sim_value_default();
}
if step.saturating_add(1) >= len {
ResetStatus::Done
} else {
ResetStatus::Working
}
}
}
#[hdl]
struct CallStack {
return_addresses: Array<UInt<64>, { CallStack::SIZE }>,
len: UIntInRangeInclusive<0, { CallStack::SIZE }>,
}
impl CallStack {
const SIZE: usize = 16;
}
impl SimValueDefault for CallStack {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
#[hdl(sim)]
CallStack {
// something other than zero so you can see the values getting reset
return_addresses: [!0u64; Self::SIZE],
len: 0usize.to_sim_value_with_type(self.len),
}
}
}
impl ResetSteps for CallStack {
#[hdl]
fn reset_step(this: &mut SimValue<Self>, _step: usize) -> ResetStatus {
#[hdl(sim)]
let CallStack {
return_addresses,
len,
} = this;
// return_addresses is implemented as a shift register, so it can be all reset at once
return_addresses.fill(0u64.to_sim_value());
**len = 0;
ResetStatus::Done
}
}
#[hdl]
enum BTBEntryInsnKind {
Branch,
Call,
Ret,
}
#[hdl]
enum BTBEntryAddrKind {
Unconditional,
Indirect,
CondTaken,
CondNotTaken,
}
impl BTBEntryAddrKind {
#[hdl]
fn taken(this: &SimValue<Self>) -> bool {
#[hdl(sim)]
match this {
Self::Unconditional | Self::Indirect | Self::CondTaken => true,
Self::CondNotTaken => false,
}
}
}
#[hdl]
struct BTBEntry {
/// address of first instruction to run in this fetch block
start_pc: UInt<64>,
target_pc: UInt<64>,
/// when branch is not taken, the next pc to fetch from is `start_pc + fallthrough_offset`.
/// needed because there may be more than one branch in a fetch block
fallthrough_offset: UInt<8>,
insn_kind: BTBEntryInsnKind,
addr_kind: BTBEntryAddrKind,
}
impl BTBEntry {
fn taken_pc(this: &SimValue<Self>) -> u64 {
this.target_pc.as_int()
}
fn not_taken_fetch_pc(this: &SimValue<Self>) -> u64 {
this.start_pc
.as_int()
.wrapping_add(this.fallthrough_offset.as_int().into())
}
}
#[hdl]
struct BranchTargetBuffer {
branch_pc_to_target_map: Array<HdlOption<BTBEntry>, { BranchTargetBuffer::SIZE }>,
}
impl BranchTargetBuffer {
const SIZE: usize = 16;
}
impl SimValueDefault for BranchTargetBuffer {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
#[hdl(sim)]
BranchTargetBuffer {
// something other than zero so you can see the values getting reset
branch_pc_to_target_map: [HdlSome(
#[hdl(sim)]
BTBEntry {
start_pc: !0u64,
target_pc: !0u64,
fallthrough_offset: !0u8,
insn_kind: BTBEntryInsnKind.Call(),
addr_kind: BTBEntryAddrKind.CondNotTaken(),
},
); Self::SIZE],
}
}
}
impl ResetSteps for BranchTargetBuffer {
#[hdl]
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
#[hdl(sim)]
let BranchTargetBuffer {
branch_pc_to_target_map,
} = this;
ResetSteps::reset_step(branch_pc_to_target_map, step)
}
}
#[hdl]
struct BranchHistory {
history: Array<Bool, { BranchHistory::SIZE }>,
/// exclusive
tail: UIntInRange<0, { BranchHistory::SIZE }>,
/// inclusive, always at or after tail, always at or before speculative_head
non_speculative_head: UIntInRange<0, { BranchHistory::SIZE }>,
/// inclusive, always at or after both tail and non_speculative_head
speculative_head: UIntInRange<0, { BranchHistory::SIZE }>,
}
impl ResetSteps for BranchHistory {
#[hdl]
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
#[hdl(sim)]
let Self {
history,
tail,
non_speculative_head,
speculative_head,
} = this;
**tail = 0;
**non_speculative_head = 0;
**speculative_head = 0;
ResetSteps::reset_step(history, step)
}
}
impl SimValueDefault for BranchHistory {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
#[hdl(sim)]
BranchHistory {
// something other than zero so you can see the values getting reset
history: [true; Self::SIZE],
tail: 0usize.to_sim_value_with_type(self.tail),
non_speculative_head: 0usize.to_sim_value_with_type(self.non_speculative_head),
speculative_head: 0usize.to_sim_value_with_type(self.speculative_head),
}
}
}
enum BranchHistoryTryPushSpeculativeError {
NoSpace,
}
enum BranchHistoryTryPushNonSpeculativeError {
NoSpace,
Misprediction { speculated: bool },
}
impl BranchHistory {
const LOG2_SIZE: usize = 8;
const SIZE: usize = 1 << Self::LOG2_SIZE;
fn next_pos(pos: usize) -> usize {
(pos + 1) % Self::SIZE
}
fn prev_pos(pos: usize) -> usize {
(pos + Self::SIZE - 1) % Self::SIZE
}
fn history_from_head<const N: usize>(this: &SimValue<Self>, head: usize) -> [bool; N] {
let mut retval = [false; N];
let mut pos = head;
for entry in &mut retval {
if pos == *this.tail {
break;
}
*entry = *this.history[pos];
pos = Self::prev_pos(pos);
}
retval
}
fn delete_speculative_history(this: &mut SimValue<Self>) {
let non_speculative_head = *this.non_speculative_head;
*this.speculative_head = non_speculative_head;
}
fn recent_history_including_speculative<const N: usize>(this: &SimValue<Self>) -> [bool; N] {
let head = *this.speculative_head;
Self::history_from_head(this, head)
}
fn speculative_full(this: &SimValue<Self>) -> bool {
let speculative_head = *this.speculative_head;
Self::next_pos(speculative_head) == *this.tail
}
fn try_push_speculative(
this: &mut SimValue<Self>,
value: bool,
) -> Result<(), BranchHistoryTryPushSpeculativeError> {
if Self::speculative_full(this) {
Err(BranchHistoryTryPushSpeculativeError::NoSpace)
} else {
let speculative_head = Self::next_pos(*this.speculative_head);
*this.speculative_head = speculative_head;
*this.history[speculative_head] = value;
Ok(())
}
}
fn try_push_non_speculative(
this: &mut SimValue<Self>,
value: bool,
) -> Result<(), BranchHistoryTryPushNonSpeculativeError> {
let speculative_head = *this.speculative_head;
let non_speculative_head = *this.non_speculative_head;
if speculative_head == non_speculative_head {
Err(BranchHistoryTryPushNonSpeculativeError::NoSpace)
} else {
let pos = Self::next_pos(non_speculative_head);
let speculated = *this.history[pos];
if speculated != value {
Err(BranchHistoryTryPushNonSpeculativeError::Misprediction { speculated })
} else {
*this.non_speculative_head = pos;
Ok(())
}
}
}
}
#[hdl]
struct Queue<T, Capacity: Size> {
data: ArrayType<T, Capacity>,
/// inclusive
head: UIntInRangeType<ConstUsize<0>, Capacity>,
/// exclusive
tail: UIntInRangeType<ConstUsize<0>, Capacity>,
}
impl<T: Type, Capacity: Size> Queue<T, Capacity> {
fn capacity(self) -> usize {
self.data.len()
}
fn next_pos(self, pos: usize) -> usize {
assert_ne!(self.capacity(), 0);
(pos + 1) % self.capacity()
}
fn prev_pos(self, pos: usize) -> usize {
assert_ne!(self.capacity(), 0);
(pos + self.capacity() - 1) % self.capacity()
}
fn is_empty(this: &SimValue<Self>) -> bool {
this.head == this.tail
}
fn is_full(this: &SimValue<Self>) -> bool {
let head = *this.head;
let tail = *this.tail;
SimValue::ty(this).next_pos(head) == tail
}
fn try_push(this: &mut SimValue<Self>, value: impl ToSimValueWithType<T>) -> Result<(), ()> {
if Self::is_full(this) {
Err(())
} else {
let head = *this.head;
let head = SimValue::ty(this).next_pos(head);
*this.head = head;
let data = &mut this.data[head];
*data = value.to_sim_value_with_type(SimValue::ty(data));
Ok(())
}
}
}
impl<T: SimValueDefault, Capacity: Size> SimValueDefault for Queue<T, Capacity> {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
let Self { data, head, tail } = self;
#[hdl(sim)]
Queue::<T, Capacity> {
data: repeat(
data.element().sim_value_default(),
Capacity::from_usize(data.len()),
),
head: 0usize.to_sim_value_with_type(head),
tail: 0usize.to_sim_value_with_type(tail),
}
}
}
impl<T: SimValueDefault, Capacity: Size> ResetSteps for Queue<T, Capacity> {
#[hdl]
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
#[hdl(sim)]
let Queue::<T, Capacity> { data, head, tail } = this;
**head = 0;
**tail = 0;
ResetSteps::reset_step(data, step)
}
}
#[hdl]
struct FetchQueueEntry {
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
}
impl SimValueDefault for FetchQueueEntry {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
#[hdl(sim)]
FetchQueueEntry {
fetch_block_id: 0 as FetchBlockIdInt,
}
}
}
const BRANCH_PREDICTOR_LOG2_SIZE: usize = 8;
const BRANCH_PREDICTOR_SIZE: usize = 1 << BRANCH_PREDICTOR_LOG2_SIZE;
#[hdl]
pub struct NextPcState<C: PhantomConstGet<CpuConfig>> {
speculative_call_stack: CallStack,
non_speculative_call_stack: CallStack,
branch_target_buffer: BranchTargetBuffer,
branch_history: BranchHistory,
branch_predictor: Array<BranchPredictionState, { BRANCH_PREDICTOR_SIZE }>,
fetching_queue: Queue<FetchQueueEntry, ConstUsize<{ 1 << FETCH_BLOCK_ID_WIDTH }>>,
pc: UInt<64>,
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
config: C,
}
impl<C: Type + PhantomConstGet<CpuConfig>> NextPcState<C> {
fn next_fetch_pc(this: &SimValue<Self>) -> u64 {
let pc = u64::try_from(this.pc.to_bigint()).expect("in range");
pc & (!0u64 << SimValue::ty(&this.config).get().log2_fetch_width_in_bytes)
}
fn branch_predictor_index(this: &SimValue<Self>, pc: u64) -> usize {
let mut history = 0u64;
let history_bits: [bool; BRANCH_PREDICTOR_LOG2_SIZE] =
BranchHistory::recent_history_including_speculative(&this.branch_history);
for history_bit in history_bits {
history <<= 1;
if history_bit {
history |= 1;
}
}
let mut t = history;
t ^= t.rotate_left(5) & !pc.rotate_right(3);
t ^= pc;
t ^= !t.rotate_left(2) & t.rotate_left(4);
let mut retval = 0;
for i in (0..BRANCH_PREDICTOR_LOG2_SIZE).step_by(BRANCH_PREDICTOR_LOG2_SIZE) {
retval ^= t >> i;
}
retval as usize % BRANCH_PREDICTOR_SIZE
}
}
impl SimValueDefault for NextPcState<PhantomConst<CpuConfig>> {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
let Self {
speculative_call_stack,
non_speculative_call_stack,
branch_target_buffer,
branch_history,
branch_predictor: _,
fetching_queue,
pc: _,
fetch_block_id: _,
config,
} = self;
#[hdl(sim)]
Self {
speculative_call_stack: speculative_call_stack.sim_value_default(),
non_speculative_call_stack: non_speculative_call_stack.sim_value_default(),
branch_target_buffer: branch_target_buffer.sim_value_default(),
branch_history: branch_history.sim_value_default(),
// use something other than the default so you can see the reset progress
branch_predictor: std::array::from_fn(|_| {
BranchPredictionState::towards_not_taken(&BranchPredictionState.sim_value_default())
}),
fetching_queue: fetching_queue.sim_value_default(),
// use something other than the default so you can see the reset progress
pc: !0u64,
// use something other than the default so you can see the reset progress
fetch_block_id: !0u8,
config,
}
}
}
impl<C: Type + PhantomConstGet<CpuConfig>> ResetSteps for NextPcState<C> {
#[hdl]
fn reset_step(this: &mut SimValue<Self>, step: usize) -> ResetStatus {
#[hdl(sim)]
let NextPcState::<C> {
speculative_call_stack,
non_speculative_call_stack,
branch_target_buffer,
branch_history,
branch_predictor,
fetching_queue,
pc,
fetch_block_id,
config: _,
} = this;
**pc = 0u64.into(); // match Microwatt's reset PC
**fetch_block_id = 0u8.into();
let speculative_call_stack = ResetSteps::reset_step(speculative_call_stack, step);
let non_speculative_call_stack = ResetSteps::reset_step(non_speculative_call_stack, step);
let branch_target_buffer = ResetSteps::reset_step(branch_target_buffer, step);
let branch_history = ResetSteps::reset_step(branch_history, step);
let branch_predictor = ResetSteps::reset_step(branch_predictor, step);
let fetching_queue = ResetSteps::reset_step(fetching_queue, step);
speculative_call_stack
.and(non_speculative_call_stack)
.and(branch_target_buffer)
.and(branch_history)
.and(branch_predictor)
.and(fetching_queue)
}
}
#[hdl_module(extern)]
pub fn next_pc(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let to_fetch: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
m.output(NextPcToFetchInterface[config]);
#[hdl]
let from_decode: DecodeToPostDecodeInterface<PhantomConst<CpuConfig>> =
m.input(DecodeToPostDecodeInterface[config]);
#[hdl]
let state_for_debug: NextPcState<PhantomConst<CpuConfig>> = m.output(NextPcState[config]);
m.register_clock_for_past(cd.clk);
#[hdl]
async fn run(
scope: ForkJoinScope<'_>,
mut sim: ExternModuleSimulationState,
cd: Expr<ClockDomain>,
to_fetch: Expr<NextPcToFetchInterface<PhantomConst<CpuConfig>>>,
from_decode: Expr<DecodeToPostDecodeInterface<PhantomConst<CpuConfig>>>,
state_expr: Expr<NextPcState<PhantomConst<CpuConfig>>>,
) {
let config = state_expr.config.ty();
let mut state = sim.read(state_expr).await;
for step in 0usize.. {
sim.write(state_expr, state).await;
sim.wait_for_clock_edge(cd.clk).await;
state = sim.read_past(state_expr, cd.clk).await;
let reset_status = ResetSteps::reset_step(&mut state, step);
match reset_status {
ResetStatus::Done => break,
ResetStatus::Working => {}
}
}
scope.spawn_detached(|_, mut sim: ExternModuleSimulationState| async move {
loop {
let state = sim.read(state_expr).await;
if Queue::is_full(&state.fetching_queue) {
sim.write(to_fetch.inner.data, HdlNone()).await;
} else {
sim.write(
to_fetch.inner.data,
HdlSome(
#[hdl(sim)]
NextPcToFetchInterfaceInner {
next_fetch_pc: NextPcState::next_fetch_pc(&state),
fetch_block_id: state.fetch_block_id,
in_progress_fetches_to_cancel: 0u8, // TODO: implement
},
),
)
.await;
}
sim.wait_for_changes([state_expr], None).await;
}
});
scope.spawn_detached(|_, mut sim: ExternModuleSimulationState| async move {
loop {
sim.write(state_expr, state).await;
sim.wait_for_clock_edge(cd.clk).await;
state = sim.read_past(state_expr, cd.clk).await;
let next_fetch_pc = NextPcState::next_fetch_pc(&state);
if Queue::is_full(&state.fetching_queue) {
continue;
}
if sim.read_past_bool(to_fetch.inner.ready, cd.clk).await {
let fetch_block_id =
FetchBlockIdInt::try_from(state.fetch_block_id.to_bigint())
.expect("in range");
// TODO: handle instructions not aligned with fetch blocks
let mut new_pc =
next_fetch_pc.wrapping_add(config.get().fetch_width_in_bytes() as u64);
for entry in &state.branch_target_buffer.branch_pc_to_target_map {
#[hdl(sim)]
match entry {
HdlNone => continue,
HdlSome(entry) => {
if entry.start_pc == state.pc {
new_pc = if BTBEntryAddrKind::taken(&entry.addr_kind) {
BTBEntry::taken_pc(entry)
} else {
BTBEntry::not_taken_fetch_pc(entry)
};
break;
}
}
}
}
Queue::try_push(
&mut state.fetching_queue,
#[hdl(sim)]
FetchQueueEntry { fetch_block_id },
)
.expect("checked is_full above");
// TODO: insert pipeline stage between BTB and branch predictor
*state.pc = new_pc.into();
*state.fetch_block_id = fetch_block_id.wrapping_add(1).into();
}
}
});
// TODO: finish
}
m.extern_module_simulation_fn(
(cd, to_fetch, from_decode, state_for_debug),
|(cd, to_fetch, from_decode, state_for_debug), mut sim| async move {
sim.write(state_for_debug, state_for_debug.ty().sim_value_default())
.await;
sim.resettable(
cd,
|mut sim: ExternModuleSimulationState| async move {
sim.write(to_fetch.inner.data, HdlNone()).await;
sim.write(from_decode.inner.ready, false).await;
},
|mut sim: ExternModuleSimulationState, ()| async move {
sim.fork_join_scope(|scope, sim| {
run(scope, sim, cd, to_fetch, from_decode, state_for_debug)
})
.await
},
)
.await;
},
);
}

View file

@ -22,6 +22,18 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
len: 0u8.cast_to(self.len),
}
}
#[hdl]
pub fn new_sim(self, uninit_element: impl ToSimValueWithType<T>) -> SimValue<Self> {
let uninit_element = uninit_element.into_sim_value_with_type(self.element());
#[hdl(sim)]
ArrayVec::<_, _> {
elements: SimValue::from_array_elements(
self.elements,
(0..self.elements.len()).map(|_| uninit_element.clone()),
),
len: 0u8.cast_to(self.len),
}
}
pub fn element(self) -> T {
self.elements.element()
}
@ -52,6 +64,9 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
pub fn len(this: impl ToExpr<Type = Self>) -> Expr<Length<N>> {
this.to_expr().len
}
pub fn len_sim(this: &SimValue<Self>) -> &SimValue<Length<N>> {
&this.len
}
pub fn is_empty(this: impl ToExpr<Type = Self>) -> Expr<Bool> {
let len = Self::len(this);
len.cmp_eq(0u8)
@ -75,6 +90,62 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
}
}
}
#[hdl]
pub async fn for_each_sim(
this: impl ToSimValue<Type = Self>,
mut f: impl AsyncFnMut(usize, SimValue<T>),
) {
#[hdl(sim)]
let ArrayVec::<_, _> { elements, len } = this.into_sim_value();
for (index, element) in elements.into_iter().enumerate() {
if index.cmp_lt(*len) {
f(index, element).await;
}
}
}
#[hdl]
pub async fn for_each_sim_ref<'a>(
this: &'a SimValue<Self>,
mut f: impl AsyncFnMut(usize, &'a SimValue<T>),
) {
#[hdl(sim)]
let ArrayVec::<_, _> { elements, len } = this;
for (index, element) in elements.iter().enumerate() {
if index.cmp_lt(**len) {
f(index, element).await;
}
}
}
#[hdl]
pub async fn for_each_sim_mut<'a>(
this: &'a mut SimValue<Self>,
mut f: impl AsyncFnMut(usize, &'a mut SimValue<T>),
) {
#[hdl(sim)]
let ArrayVec::<_, _> { elements, len } = this;
for (index, element) in elements.iter_mut().enumerate() {
if index.cmp_lt(**len) {
f(index, element).await;
}
}
}
#[hdl]
pub fn try_push_sim(
this: &mut SimValue<Self>,
value: impl ToSimValueWithType<T>,
) -> Result<(), SimValue<T>> {
let value = value.into_sim_value_with_type(this.ty().element());
let capacity = this.ty().capacity();
#[hdl(sim)]
let ArrayVec::<_, _> { elements, len } = this;
if **len < capacity {
elements[**len] = value;
**len += 1;
Ok(())
} else {
Err(value)
}
}
pub fn mapped_ty<U: Type>(self, new_element_ty: U) -> ArrayVec<U, N> {
ArrayVec {
elements: ArrayType[new_element_ty][N::from_usize(self.elements.len())],
@ -100,10 +171,8 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
pub fn as_array_of_options(this: impl ToExpr<Type = Self>) -> Expr<ArrayType<HdlOption<T>, N>> {
let this = this.to_expr();
#[hdl]
let array_vec_as_array_of_options = wire(
ArrayType[HdlOption[this.ty().element()]]
[N::from_usize(this.ty().capacity())],
);
let array_vec_as_array_of_options =
wire(ArrayType[HdlOption[this.ty().element()]][N::from_usize(this.ty().capacity())]);
for element in array_vec_as_array_of_options {
connect(element, element.ty().HdlNone());
}

File diff suppressed because it is too large Load diff

338
crates/cpu/tests/next_pc.rs Normal file
View file

@ -0,0 +1,338 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use cpu::{
config::{CpuConfig, UnitConfig},
next_pc::{
DecodeToPostDecodeInterface, DecodeToPostDecodeInterfaceInner, FETCH_BLOCK_ID_WIDTH,
NextPcToFetchInterface, NextPcToFetchInterfaceInner, WipDecodedInsn, WipDecodedInsnKind,
next_pc,
},
unit::UnitKind,
util::array_vec::ArrayVec,
};
use fayalite::{prelude::*, sim::vcd::VcdWriterDecls, util::RcWriter};
use std::{
cell::Cell,
collections::{BTreeMap, VecDeque},
num::NonZeroUsize,
};
#[derive(Copy, Clone, Debug)]
enum MockInsn {
Nop4,
Jump { target: u64 },
CondBranch { target: u64 },
Call { target: u64 },
Ret,
}
impl MockInsn {
fn byte_len(self) -> u64 {
match self {
MockInsn::Nop4 => 4,
MockInsn::Jump { .. } => 4,
MockInsn::CondBranch { .. } => 4,
MockInsn::Call { .. } => 4,
MockInsn::Ret => 4,
}
}
}
#[derive(Debug)]
struct MockInsns {
insns: BTreeMap<u64, MockInsn>,
}
impl MockInsns {
fn new() -> Self {
Self {
insns: BTreeMap::from_iter([
(0x0, MockInsn::Nop4),
(0x4, MockInsn::Nop4),
(0x8, MockInsn::CondBranch { target: 0x4 }),
(0xC, MockInsn::Call { target: 0x18 }),
(0x10, MockInsn::Jump { target: 0x10 }),
(0x14, MockInsn::Jump { target: 0x10 }),
(0x18, MockInsn::Jump { target: 0x1C }),
(0x1C, MockInsn::Ret),
]),
}
}
fn fetch_block(&self, pc_range: std::ops::Range<u64>) -> impl Iterator<Item = (u64, MockInsn)> {
self.insns
.range(pc_range.clone())
.filter_map(move |(&pc, &insn)| {
if pc_range.end >= pc + insn.byte_len() {
Some((pc, insn))
} else {
None
}
})
}
}
const FETCH_PIPE_QUEUE_SIZE: usize = 5;
const DEMO_ILLEGAL_INSN_TRAP: u64 = 0xFF000000u64;
#[hdl]
struct FetchPipeQueueEntry {
fetch_pc: UInt<64>,
cycles_left: UInt<8>,
fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>,
}
impl FetchPipeQueueEntry {
#[hdl]
fn default_sim(self) -> SimValue<Self> {
#[hdl(sim)]
FetchPipeQueueEntry {
fetch_pc: 0u64,
cycles_left: 0u8,
fetch_block_id: 0u8,
}
}
fn get_next_delay(delay_sequence_index: &Cell<u64>) -> u8 {
let index = delay_sequence_index.get();
delay_sequence_index.set(delay_sequence_index.get().wrapping_add(1));
// make a pseudo-random number deterministically based on index
let random = index
.wrapping_add(1)
.wrapping_mul(0x18C49126EABE7A0D) // random prime
.rotate_left(32)
.wrapping_mul(0x92B38C197608A6B) // random prime
.rotate_right(60);
(random % 8) as u8
}
}
#[hdl_module(extern)]
fn mock_fetch_pipe(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let from_fetch: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
m.input(NextPcToFetchInterface[config]);
#[hdl]
let to_post_decode: DecodeToPostDecodeInterface<PhantomConst<CpuConfig>> =
m.output(DecodeToPostDecodeInterface[config]);
#[hdl]
let queue_debug: ArrayVec<FetchPipeQueueEntry, ConstUsize<{ FETCH_PIPE_QUEUE_SIZE }>> =
m.output();
m.register_clock_for_past(cd.clk);
m.extern_module_simulation_fn(
(cd, from_fetch, to_post_decode, queue_debug),
|(cd, from_fetch, to_post_decode, queue_debug), mut sim| async move {
// intentionally have a different sequence each time we're reset
let delay_sequence_index = Cell::new(0);
sim.resettable(
cd,
async |mut sim| {
sim.write(from_fetch.inner.ready, false).await;
sim.write(
to_post_decode.inner.data,
to_post_decode.ty().inner.data.HdlNone(),
)
.await;
sim.write(
queue_debug,
queue_debug.ty().new_sim(FetchPipeQueueEntry.default_sim()),
)
.await;
},
|sim, ()| {
run_fn(
cd,
from_fetch,
to_post_decode,
queue_debug,
&delay_sequence_index,
sim,
)
},
)
.await;
},
);
#[hdl]
async fn run_fn(
cd: Expr<ClockDomain>,
from_fetch: Expr<NextPcToFetchInterface<PhantomConst<CpuConfig>>>,
to_post_decode: Expr<DecodeToPostDecodeInterface<PhantomConst<CpuConfig>>>,
queue_debug: Expr<ArrayVec<FetchPipeQueueEntry, ConstUsize<{ FETCH_PIPE_QUEUE_SIZE }>>>,
delay_sequence_index: &Cell<u64>,
mut sim: ExternModuleSimulationState,
) {
let config = from_fetch.config.ty();
let mock_insns = MockInsns::new();
let mut queue: VecDeque<SimValue<FetchPipeQueueEntry>> = VecDeque::new();
let mut next_id = 0u32;
loop {
let mut sim_queue = queue_debug.ty().new_sim(FetchPipeQueueEntry.default_sim());
for entry in &queue {
ArrayVec::try_push_sim(&mut sim_queue, entry)
.ok()
.expect("queue is known to be small enough");
}
sim.write(queue_debug, sim_queue).await;
if let Some(front) = queue.front().filter(|v| v.cycles_left.as_int() == 0) {
#[hdl(sim)]
let FetchPipeQueueEntry {
fetch_pc,
cycles_left: _,
fetch_block_id,
} = front;
let fetch_pc = fetch_pc.as_int();
let fetch_end =
(fetch_pc + 1).next_multiple_of(config.get().fetch_width_in_bytes() as u64);
let insns = to_post_decode.ty().inner.data.HdlSome.insns;
let zeroed_insn = UInt[insns.element().canonical().bit_width()]
.zero()
.cast_bits_to(insns.element());
let mut insns = insns.new_sim(zeroed_insn);
// TODO: handle instructions that go past the end of a fetch block
for (pc, insn) in mock_insns.fetch_block(fetch_pc..fetch_end) {
let kind = match insn {
MockInsn::Nop4 => WipDecodedInsnKind.NonBranch(),
MockInsn::Jump { target } => WipDecodedInsnKind.Branch(target),
MockInsn::CondBranch { target } => WipDecodedInsnKind.BranchCond(target),
MockInsn::Call { target } => WipDecodedInsnKind.Call(target),
MockInsn::Ret => WipDecodedInsnKind.Ret(),
};
let insn = #[hdl(sim)]
WipDecodedInsn {
fetch_block_id,
id: next_id.cast_to_static::<UInt<_>>(),
pc,
size_in_bytes: insn.byte_len().cast_to_static::<UInt<_>>(),
kind,
};
match ArrayVec::try_push_sim(&mut insns, insn) {
Ok(()) => next_id = next_id.wrapping_add(1),
Err(_) => break,
}
}
if **ArrayVec::len_sim(&insns) == 0 {
let Ok(()) = ArrayVec::try_push_sim(
&mut insns,
#[hdl(sim)]
WipDecodedInsn {
fetch_block_id,
id: next_id.cast_to_static::<UInt<_>>(),
pc: fetch_pc,
size_in_bytes: 0u8.cast_to_static::<UInt<_>>(),
kind: WipDecodedInsnKind.Interrupt(DEMO_ILLEGAL_INSN_TRAP),
},
) else {
unreachable!();
};
next_id = next_id.wrapping_add(1);
}
sim.write(
to_post_decode.inner.data,
HdlSome(
#[hdl(sim)]
DecodeToPostDecodeInterfaceInner::<_> {
fetch_block_id,
insns,
config,
},
),
)
.await;
} else {
sim.write(
to_post_decode.inner.data,
to_post_decode.ty().inner.data.HdlNone(),
)
.await;
}
sim.write(from_fetch.inner.ready, queue.len() < FETCH_PIPE_QUEUE_SIZE)
.await;
sim.wait_for_clock_edge(cd.clk).await;
if sim.read_past_bool(to_post_decode.inner.ready, cd.clk).await {
#[hdl(sim)]
if let HdlSome(_) = sim.read_past(to_post_decode.inner.data, cd.clk).await {
queue.pop_front();
}
}
for entry in &mut queue {
if entry.cycles_left.as_int() > 0 {
entry.cycles_left = (entry.cycles_left.as_int() - 1u8).to_sim_value();
}
}
if !sim.read_past_bool(from_fetch.inner.ready, cd.clk).await {
continue;
}
#[hdl(sim)]
if let HdlSome(inner) = sim.read_past(from_fetch.inner.data, cd.clk).await {
#[hdl(sim)]
let NextPcToFetchInterfaceInner {
next_fetch_pc,
fetch_block_id,
in_progress_fetches_to_cancel,
} = &inner;
// cancel in-progress fetches from newest to oldest
for _ in 0..in_progress_fetches_to_cancel.as_int() {
let _ = queue.pop_back();
}
queue.push_back(
#[hdl(sim)]
FetchPipeQueueEntry {
fetch_pc: next_fetch_pc,
cycles_left: FetchPipeQueueEntry::get_next_delay(delay_sequence_index),
fetch_block_id,
},
);
}
}
}
}
#[hdl_module]
fn dut(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let next_pc = instance(next_pc(config));
connect(next_pc.cd, cd);
#[hdl]
let mock_fetch_pipe = instance(mock_fetch_pipe(config));
connect(mock_fetch_pipe.cd, cd);
connect(mock_fetch_pipe.from_fetch, next_pc.to_fetch);
connect(next_pc.from_decode, mock_fetch_pipe.to_post_decode);
}
#[hdl]
#[test]
fn test_next_pc() {
let _n = SourceLocation::normalize_files_for_tests();
let mut config = CpuConfig::new(
vec![
UnitConfig::new(UnitKind::AluBranch),
UnitConfig::new(UnitKind::AluBranch),
],
NonZeroUsize::new(20).unwrap(),
);
config.fetch_width = NonZeroUsize::new(2).unwrap();
let m = dut(PhantomConst::new_sized(config));
let mut sim = Simulation::new(m);
let mut writer = RcWriter::default();
sim.add_trace_writer(VcdWriterDecls::new(writer.clone()));
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, true);
for _cycle in 0..300 {
sim.advance_time(SimDuration::from_nanos(500));
sim.write_clock(sim.io().cd.clk, true);
sim.advance_time(SimDuration::from_nanos(500));
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, false);
}
// FIXME: vcd is just whatever next_pc does now, which isn't known to be correct
let vcd = String::from_utf8(writer.take()).unwrap();
println!("####### VCD:\n{vcd}\n#######");
if vcd != include_str!("expected/next_pc.vcd") {
panic!();
}
}