WIP adding next_pc

This commit is contained in:
Jacob Lifshay 2025-10-27 22:41:33 -07:00
parent 42462127db
commit b5beb08216
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
5 changed files with 632 additions and 0 deletions

View file

@ -34,6 +34,8 @@ pub struct CpuConfig {
pub units: Vec<UnitConfig>, pub units: Vec<UnitConfig>,
pub out_reg_num_width: usize, pub out_reg_num_width: usize,
pub fetch_width: NonZeroUsize, pub fetch_width: NonZeroUsize,
pub max_branches_per_fetch: NonZeroUsize,
pub fetch_width_in_bytes: NonZeroUsize,
/// default value for [`UnitConfig::max_in_flight`] /// default value for [`UnitConfig::max_in_flight`]
pub default_unit_max_in_flight: NonZeroUsize, pub default_unit_max_in_flight: NonZeroUsize,
pub rob_size: NonZeroUsize, pub rob_size: NonZeroUsize,
@ -47,6 +49,18 @@ impl CpuConfig {
}; };
v v
}; };
pub const DEFAULT_MAX_BRANCHES_PER_FETCH: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(1) else {
unreachable!();
};
v
};
pub const DEFAULT_FETCH_WIDTH_IN_BYTES: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(4) else {
unreachable!();
};
v
};
pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = { pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(8) else { let Some(v) = NonZeroUsize::new(8) else {
unreachable!(); unreachable!();
@ -58,6 +72,8 @@ impl CpuConfig {
units, units,
out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH, out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH,
fetch_width: Self::DEFAULT_FETCH_WIDTH, fetch_width: Self::DEFAULT_FETCH_WIDTH,
max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH,
fetch_width_in_bytes: Self::DEFAULT_FETCH_WIDTH_IN_BYTES,
default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT, default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT,
rob_size, rob_size,
} }
@ -118,3 +134,12 @@ impl CpuConfig {
[self.non_const_unit_nums().len()] [self.non_const_unit_nums().len()]
} }
} }
#[hdl(get(|c| c.fetch_width.get()))]
pub type CpuConfigFetchWidth<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.max_branches_per_fetch.get()))]
pub type CpuConfigMaxBranchesPerFetch<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.fetch_width_in_bytes.get()))]
pub type CpuConfigFetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;

View file

@ -2,6 +2,7 @@
// See Notices.txt for copyright information // See Notices.txt for copyright information
pub mod config; pub mod config;
pub mod instruction; pub mod instruction;
pub mod next_pc;
pub mod reg_alloc; pub mod reg_alloc;
pub mod register; pub mod register;
pub mod unit; pub mod unit;

561
crates/cpu/src/next_pc.rs Normal file
View file

@ -0,0 +1,561 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
//! [Next-Instruction Logic](https://git.libre-chip.org/libre-chip/grant-tracking/issues/10)
//!
//! The basic idea here is that there's a `next_pc` stage that sends predicted fetch PCs to the `fetch` stage,
//! the `fetch` stage's outputs eventually end up in the `decode` stage,
//! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`)
//! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds
//! information back to the `fetch` stage to cancel fetches that need to be predicted differently.
use crate::{config::CpuConfig, util::array_vec::ArrayVec};
use fayalite::{
int::{UIntInRange, UIntInRangeInclusive},
prelude::*,
sim::value::SimOnlyValueTrait,
util::ready_valid::ReadyValid,
};
#[hdl]
pub enum PredictedCond {
Taken,
Fallthrough,
}
#[hdl]
pub struct PredictedFallthrough {}
#[hdl]
pub enum BranchPredictionKind<CondKind> {
Branch(HdlOption<CondKind>),
IndirectBranch(HdlOption<CondKind>),
Call(HdlOption<CondKind>),
IndirectCall(HdlOption<CondKind>),
Ret(HdlOption<CondKind>),
}
#[hdl(get(|c| c.max_branches_per_fetch.get() - 1))]
pub type NextPcPredictionMaxBranchesBeforeLast<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(no_static)]
pub struct NextPcPrediction<C: PhantomConstGet<CpuConfig>> {
pub fetch_pc: UInt<64>,
pub async_interrupt: Bool,
pub branches_before_last: ArrayVec<
BranchPredictionKind<PredictedFallthrough>,
NextPcPredictionMaxBranchesBeforeLast<C>,
>,
pub last_branch: HdlOption<BranchPredictionKind<PredictedCond>>,
pub last_branch_target_pc: UInt<64>,
}
#[hdl]
pub struct NextPcToFetchInterfaceInner {
pub next_fetch_pc: UInt<64>,
pub fetch_block_id: UInt<8>,
pub in_progress_fetches_to_cancel: UInt<8>,
}
#[hdl(no_static)]
pub struct NextPcToFetchInterface<C: PhantomConstGet<CpuConfig>> {
pub inner: ReadyValid<NextPcToFetchInterfaceInner>,
pub config: C,
}
#[hdl]
/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
pub enum WipDecodedInsnKind {
NonBranch,
Branch(UInt<64>),
BranchCond(UInt<64>),
IndirectBranch,
IndirectBranchCond,
Call(UInt<64>),
CallCond(UInt<64>),
IndirectCall,
IndirectCallCond,
Ret,
RetCond,
/// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on.
Interrupt(UInt<64>),
}
#[hdl]
/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
pub struct WipDecodedInsn {
pub fetch_block_id: UInt<8>,
pub id: UInt<12>,
pub pc: UInt<64>,
pub kind: WipDecodedInsnKind,
}
#[hdl(no_static)]
/// handles updating speculative branch predictor state (e.g. branch histories) when instructions retire,
/// as well as updating state when a branch instruction is mis-speculated.
pub struct NextPcToRetireInterface<C: PhantomConstGet<CpuConfig>> {
// TODO: add needed fields
pub config: C,
}
#[hdl(no_static)]
pub struct DecodeToPostDecodeInterface<C: PhantomConstGet<CpuConfig>> {
// TODO: add needed fields
pub config: C,
}
#[hdl(no_static)]
pub struct PostDecodeOutputInterface<C: PhantomConstGet<CpuConfig>> {
// TODO: add needed fields
pub config: C,
}
#[derive(
Copy, Clone, PartialEq, Eq, Debug, Hash, Default, serde::Serialize, serde::Deserialize,
)]
enum BranchPredictionState {
StronglyNotTaken,
#[default]
WeaklyNotTaken,
WeaklyTaken,
StronglyTaken,
}
impl BranchPredictionState {
#[must_use]
fn is_taken(self) -> bool {
match self {
Self::StronglyNotTaken => false,
Self::WeaklyNotTaken => false,
Self::WeaklyTaken => true,
Self::StronglyTaken => true,
}
}
#[must_use]
fn towards_taken(self) -> Self {
match self {
Self::StronglyNotTaken => Self::WeaklyNotTaken,
Self::WeaklyNotTaken => Self::WeaklyTaken,
Self::WeaklyTaken => Self::StronglyTaken,
Self::StronglyTaken => Self::StronglyTaken,
}
}
#[must_use]
fn towards_not_taken(self) -> Self {
match self {
Self::StronglyNotTaken => Self::StronglyNotTaken,
Self::WeaklyNotTaken => Self::StronglyNotTaken,
Self::WeaklyTaken => Self::WeaklyNotTaken,
Self::StronglyTaken => Self::WeaklyTaken,
}
}
}
#[derive(Copy, Clone, Debug)]
#[must_use]
enum ResetStatus {
Done,
Working,
}
impl ResetStatus {
fn and(self, other: Self) -> Self {
match (self, other) {
(ResetStatus::Done, ResetStatus::Done) => ResetStatus::Done,
(ResetStatus::Done | ResetStatus::Working, ResetStatus::Working)
| (ResetStatus::Working, ResetStatus::Done) => ResetStatus::Working,
}
}
}
trait SimValueDefault: Type {
fn sim_value_default(self) -> SimValue<Self>;
}
impl<T: SimOnlyValueTrait> SimValueDefault for SimOnly<T> {
fn sim_value_default(self) -> SimValue<Self> {
SimOnlyValue::<T>::default().to_sim_value_with_type(self)
}
}
impl<T: Type> SimValueDefault for HdlOption<T> {
fn sim_value_default(self) -> SimValue<Self> {
self.HdlNone().to_sim_value_with_type(self)
}
}
impl SimValueDefault for Bool {
fn sim_value_default(self) -> SimValue<Self> {
false.to_sim_value()
}
}
impl<Width: Size> SimValueDefault for UIntType<Width> {
fn sim_value_default(self) -> SimValue<Self> {
self.zero().to_sim_value()
}
}
trait ResetSteps: Type {
async fn reset_step(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
step: usize,
) -> ResetStatus;
}
impl<T: SimValueDefault, Len: Size> ResetSteps for ArrayType<T, Len> {
async fn reset_step(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
step: usize,
) -> ResetStatus {
let element = Expr::ty(this).element();
let len = Expr::ty(this).len();
if step < len {
sim.write(this[step], element.sim_value_default()).await;
}
if step.saturating_add(1) >= len {
ResetStatus::Done
} else {
ResetStatus::Working
}
}
}
#[hdl]
struct CallStack {
return_addresses: Array<UInt<64>, { CallStack::SIZE }>,
len: UIntInRangeInclusive<0, { CallStack::SIZE }>,
}
impl CallStack {
const SIZE: usize = 16;
}
impl SimValueDefault for CallStack {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
#[hdl(sim)]
CallStack {
// something other than zero so you can see the values getting reset
return_addresses: [!0u64; Self::SIZE],
len: 0usize.to_sim_value_with_type(self.len),
}
}
}
impl ResetSteps for CallStack {
#[hdl]
async fn reset_step(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
_step: usize,
) -> ResetStatus {
#[hdl]
let CallStack {
return_addresses,
len,
} = this;
// return_addresses is implemented as a shift register, so it can be all reset at once
for i in return_addresses {
sim.write(i, 0u64).await;
}
sim.write(len, 0usize).await;
ResetStatus::Done
}
}
#[hdl]
struct BranchTargetBuffer {
branch_pc_to_target_map: Array<HdlOption<(UInt<64>, UInt<64>)>, { BranchTargetBuffer::SIZE }>,
}
impl BranchTargetBuffer {
const SIZE: usize = 16;
}
impl SimValueDefault for BranchTargetBuffer {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
#[hdl(sim)]
BranchTargetBuffer {
// something other than zero so you can see the values getting reset
branch_pc_to_target_map: [HdlSome((0u64, 0u64)); Self::SIZE],
}
}
}
impl ResetSteps for BranchTargetBuffer {
#[hdl]
async fn reset_step(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
step: usize,
) -> ResetStatus {
#[hdl]
let BranchTargetBuffer {
branch_pc_to_target_map,
} = this;
ResetSteps::reset_step(branch_pc_to_target_map, sim, step).await
}
}
#[hdl]
struct BranchHistory {
history: Array<Bool, { BranchHistory::SIZE }>,
/// exclusive
tail: UIntInRange<0, { BranchHistory::SIZE }>,
/// inclusive, always at or after tail, always at or before speculative_head
non_speculative_head: UIntInRange<0, { BranchHistory::SIZE }>,
/// inclusive, always at or after both tail and non_speculative_head
speculative_head: UIntInRange<0, { BranchHistory::SIZE }>,
}
impl ResetSteps for BranchHistory {
#[hdl]
async fn reset_step(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
step: usize,
) -> ResetStatus {
#[hdl]
let Self {
history,
tail,
non_speculative_head,
speculative_head,
} = this;
sim.write(tail, 0usize).await;
sim.write(non_speculative_head, 0usize).await;
sim.write(speculative_head, 0usize).await;
ResetSteps::reset_step(history, sim, step).await
}
}
impl SimValueDefault for BranchHistory {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
#[hdl(sim)]
BranchHistory {
// something other than zero so you can see the values getting reset
history: [true; Self::SIZE],
tail: 0usize.to_sim_value_with_type(self.tail),
non_speculative_head: 0usize.to_sim_value_with_type(self.non_speculative_head),
speculative_head: 0usize.to_sim_value_with_type(self.speculative_head),
}
}
}
enum BranchHistoryTryPushSpeculativeError {
NoSpace,
}
enum BranchHistoryTryPushNonSpeculativeError {
NoSpace,
Misprediction { speculated: bool },
}
impl BranchHistory {
const LOG2_SIZE: usize = 8;
const SIZE: usize = 1 << Self::LOG2_SIZE;
fn next_pos(pos: usize) -> usize {
(pos + 1) % Self::SIZE
}
fn prev_pos(pos: usize) -> usize {
(pos + Self::SIZE - 1) % Self::SIZE
}
async fn history_from_head<const N: usize>(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
head: usize,
) -> [bool; N] {
let mut retval = [false; N];
let mut pos = head;
for entry in &mut retval {
if pos == *sim.read(this.tail).await {
break;
}
*entry = sim.read_bool(this.history[pos]).await;
pos = Self::prev_pos(pos);
}
retval
}
async fn delete_speculative_history(this: Expr<Self>, sim: &mut ExternModuleSimulationState) {
let non_speculative_head = sim.read(this.non_speculative_head).await;
sim.write(this.speculative_head, non_speculative_head).await;
}
async fn recent_history_including_speculative<const N: usize>(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
) -> [bool; N] {
let head = *sim.read(this.speculative_head).await;
Self::history_from_head(this, sim, head).await
}
async fn speculative_full(this: Expr<Self>, sim: &mut ExternModuleSimulationState) -> bool {
let speculative_head = *sim.read(this.speculative_head).await;
Self::next_pos(speculative_head) == *sim.read(this.tail).await
}
async fn try_push_speculative(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
value: bool,
) -> Result<(), BranchHistoryTryPushSpeculativeError> {
if Self::speculative_full(this, sim).await {
Err(BranchHistoryTryPushSpeculativeError::NoSpace)
} else {
let speculative_head = *sim.read(this.speculative_head).await;
let speculative_head = Self::next_pos(speculative_head);
sim.write(this.speculative_head, speculative_head).await;
sim.write(this.history[speculative_head], value).await;
Ok(())
}
}
async fn try_push_non_speculative(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
value: bool,
) -> Result<(), BranchHistoryTryPushNonSpeculativeError> {
let speculative_head = *sim.read(this.speculative_head).await;
let non_speculative_head = *sim.read(this.non_speculative_head).await;
if speculative_head == non_speculative_head {
Err(BranchHistoryTryPushNonSpeculativeError::NoSpace)
} else {
let pos = Self::next_pos(non_speculative_head);
let speculated = sim.read_bool(this.history[pos]).await;
if speculated != value {
Err(BranchHistoryTryPushNonSpeculativeError::Misprediction { speculated })
} else {
sim.write(this.non_speculative_head, pos).await;
Ok(())
}
}
}
}
#[hdl]
pub struct NextPcState {
speculative_call_stack: CallStack,
non_speculative_call_stack: CallStack,
branch_target_buffer: BranchTargetBuffer,
branch_history: BranchHistory,
branch_predictor: Array<SimOnly<BranchPredictionState>, { NextPcState::BRANCH_PREDICTOR_SIZE }>,
}
impl NextPcState {
const BRANCH_PREDICTOR_LOG2_SIZE: usize = 8;
const BRANCH_PREDICTOR_SIZE: usize = 1 << Self::BRANCH_PREDICTOR_LOG2_SIZE;
async fn branch_predictor_index(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
pc: u64,
) -> usize {
let mut history = 0u64;
let history_bits: [bool; Self::BRANCH_PREDICTOR_LOG2_SIZE] =
BranchHistory::recent_history_including_speculative(this.branch_history, sim).await;
for history_bit in history_bits {
history <<= 1;
if history_bit {
history |= 1;
}
}
let mut t = history;
t ^= t.rotate_left(5) & !pc.rotate_right(3);
t ^= pc;
t ^= !t.rotate_left(2) & t.rotate_left(4);
let mut retval = 0;
for i in (0..Self::BRANCH_PREDICTOR_LOG2_SIZE).step_by(Self::BRANCH_PREDICTOR_LOG2_SIZE) {
retval ^= t >> i;
}
retval as usize % Self::BRANCH_PREDICTOR_SIZE
}
}
impl SimValueDefault for NextPcState {
#[hdl]
fn sim_value_default(self) -> SimValue<Self> {
let Self {
speculative_call_stack,
non_speculative_call_stack,
branch_target_buffer,
branch_history,
branch_predictor,
} = self;
#[hdl(sim)]
Self {
speculative_call_stack: speculative_call_stack.sim_value_default(),
non_speculative_call_stack: non_speculative_call_stack.sim_value_default(),
branch_target_buffer: branch_target_buffer.sim_value_default(),
branch_history: branch_history.sim_value_default(),
// use something other than the default so you can see the reset progress
branch_predictor: std::array::from_fn(|_| {
SimOnlyValue::new(BranchPredictionState::default().towards_not_taken())
}),
}
}
}
impl ResetSteps for NextPcState {
#[hdl]
async fn reset_step(
this: Expr<Self>,
sim: &mut ExternModuleSimulationState,
step: usize,
) -> ResetStatus {
#[hdl]
let NextPcState {
speculative_call_stack,
non_speculative_call_stack,
branch_target_buffer,
branch_history,
branch_predictor,
} = this;
let speculative_call_stack =
ResetSteps::reset_step(speculative_call_stack, sim, step).await;
let non_speculative_call_stack =
ResetSteps::reset_step(non_speculative_call_stack, sim, step).await;
let branch_target_buffer = ResetSteps::reset_step(branch_target_buffer, sim, step).await;
let branch_history = ResetSteps::reset_step(branch_history, sim, step).await;
let branch_predictor = ResetSteps::reset_step(branch_predictor, sim, step).await;
speculative_call_stack
.and(non_speculative_call_stack)
.and(branch_target_buffer)
.and(branch_history)
.and(branch_predictor)
}
}
#[hdl_module(extern)]
pub fn next_pc(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let to_fetch: NextPcToFetchInterface<PhantomConst<CpuConfig>> =
m.output(NextPcToFetchInterface[config]);
#[hdl]
let state_for_debug: NextPcState = m.output();
m.extern_module_simulation_fn(
(cd, to_fetch, state_for_debug),
|(cd, to_fetch, state_for_debug), mut sim| async move {
sim.write(state_for_debug, NextPcState.sim_value_default())
.await;
sim.resettable(
cd,
|mut sim: ExternModuleSimulationState| async move {
sim.write(to_fetch.inner.data, HdlNone()).await;
},
|mut sim: ExternModuleSimulationState, ()| async move {
for step in 0usize.. {
sim.wait_for_clock_edge(cd.clk).await;
match ResetSteps::reset_step(state_for_debug, &mut sim, step).await {
ResetStatus::Done => break,
ResetStatus::Working => {}
}
}
// TODO: finish
},
)
.await;
},
);
}

View file

View file

@ -0,0 +1,45 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use cpu::{
config::{CpuConfig, UnitConfig},
next_pc::next_pc,
unit::UnitKind,
};
use fayalite::{prelude::*, sim::vcd::VcdWriterDecls, util::RcWriter};
use std::num::NonZeroUsize;
#[hdl]
#[test]
fn test_next_pc() {
let _n = SourceLocation::normalize_files_for_tests();
let mut config = CpuConfig::new(
vec![
UnitConfig::new(UnitKind::AluBranch),
UnitConfig::new(UnitKind::AluBranch),
],
NonZeroUsize::new(20).unwrap(),
);
config.fetch_width = NonZeroUsize::new(2).unwrap();
let m = next_pc(PhantomConst::new_sized(config));
let mut sim = Simulation::new(m);
let mut writer = RcWriter::default();
sim.add_trace_writer(VcdWriterDecls::new(writer.clone()));
let to_fetch = sim.io().to_fetch;
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, true);
sim.write_bool(to_fetch.inner.ready, true);
for _cycle in 0..300 {
sim.advance_time(SimDuration::from_nanos(500));
sim.write_clock(sim.io().cd.clk, true);
sim.advance_time(SimDuration::from_nanos(500));
sim.write_clock(sim.io().cd.clk, false);
sim.write_reset(sim.io().cd.rst, false);
}
// FIXME: vcd is just whatever next_pc does now, which isn't known to be correct
let vcd = String::from_utf8(writer.take()).unwrap();
println!("####### VCD:\n{vcd}\n#######");
if vcd != include_str!("expected/next_pc.vcd") {
panic!();
}
}