From e5a439d56e5ed1a16f92f1663fdbfc4f01949c15 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Mon, 27 Oct 2025 22:41:33 -0700 Subject: [PATCH] WIP adding next_pc --- crates/cpu/src/config.rs | 25 +++++ crates/cpu/src/lib.rs | 1 + crates/cpu/src/next_pc.rs | 206 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 232 insertions(+) create mode 100644 crates/cpu/src/next_pc.rs diff --git a/crates/cpu/src/config.rs b/crates/cpu/src/config.rs index 9a66c68..ed3b814 100644 --- a/crates/cpu/src/config.rs +++ b/crates/cpu/src/config.rs @@ -34,6 +34,8 @@ pub struct CpuConfig { pub units: Vec, pub out_reg_num_width: usize, pub fetch_width: NonZeroUsize, + pub max_branches_per_fetch: NonZeroUsize, + pub fetch_width_in_bytes: NonZeroUsize, /// default value for [`UnitConfig::max_in_flight`] pub default_unit_max_in_flight: NonZeroUsize, pub rob_size: NonZeroUsize, @@ -47,6 +49,18 @@ impl CpuConfig { }; v }; + pub const DEFAULT_MAX_BRANCHES_PER_FETCH: NonZeroUsize = { + let Some(v) = NonZeroUsize::new(1) else { + unreachable!(); + }; + v + }; + pub const DEFAULT_FETCH_WIDTH_IN_BYTES: NonZeroUsize = { + let Some(v) = NonZeroUsize::new(4) else { + unreachable!(); + }; + v + }; pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = { let Some(v) = NonZeroUsize::new(8) else { unreachable!(); @@ -58,6 +72,8 @@ impl CpuConfig { units, out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH, fetch_width: Self::DEFAULT_FETCH_WIDTH, + max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH, + fetch_width_in_bytes: Self::DEFAULT_FETCH_WIDTH_IN_BYTES, default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT, rob_size, } @@ -118,3 +134,12 @@ impl CpuConfig { [self.non_const_unit_nums().len()] } } + +#[hdl(get(|c| c.fetch_width.get()))] +pub type CpuConfigFetchWidth> = DynSize; + +#[hdl(get(|c| c.max_branches_per_fetch.get()))] +pub type CpuConfigMaxBranchesPerFetch> = DynSize; + +#[hdl(get(|c| c.fetch_width_in_bytes.get()))] +pub type CpuConfigFetchWidthInBytes> = DynSize; diff --git a/crates/cpu/src/lib.rs b/crates/cpu/src/lib.rs index bae3720..a00b668 100644 --- a/crates/cpu/src/lib.rs +++ b/crates/cpu/src/lib.rs @@ -2,6 +2,7 @@ // See Notices.txt for copyright information pub mod config; pub mod instruction; +pub mod next_pc; pub mod reg_alloc; pub mod register; pub mod unit; diff --git a/crates/cpu/src/next_pc.rs b/crates/cpu/src/next_pc.rs new file mode 100644 index 0000000..e4195ba --- /dev/null +++ b/crates/cpu/src/next_pc.rs @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +//! [Next-Instruction Logic](https://git.libre-chip.org/libre-chip/grant-tracking/issues/10) +//! +//! The basic idea here is that there's a `next_pc` stage that sends predicted fetch PCs to the `fetch` stage, +//! the `fetch` stage's outputs eventually end up in the `decode` stage, +//! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`) +//! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds +//! information back to the `fetch` stage to cancel fetches that need to be predicted differently. + +use crate::{config::CpuConfig, util::array_vec::ArrayVec}; +use fayalite::prelude::*; +use fayalite::util::ready_valid::ReadyValid; +use std::collections::{HashMap, VecDeque}; + +#[hdl] +pub enum PredictedCond { + Taken, + Fallthrough, +} + +#[hdl] +pub struct PredictedFallthrough {} + +#[hdl] +pub enum BranchPredictionKind { + Branch(HdlOption), + IndirectBranch(HdlOption), + Call(HdlOption), + IndirectCall(HdlOption), + Ret(HdlOption), +} + +#[hdl(get(|c| c.max_branches_per_fetch.get() - 1))] +pub type NextPcPredictionMaxBranchesBeforeLast> = DynSize; + +#[hdl(no_static)] +pub struct NextPcPrediction> { + pub fetch_pc: UInt<64>, + pub async_interrupt: Bool, + pub branches_before_last: ArrayVec< + BranchPredictionKind, + NextPcPredictionMaxBranchesBeforeLast, + >, + pub last_branch: HdlOption>, + pub last_branch_target_pc: UInt<64>, +} + +#[hdl] +pub struct NextPcToFetchInterfaceInner { + pub next_fetch_pc: UInt<64>, + pub fetch_block_id: UInt<8>, + pub in_progress_fetches_to_cancel: UInt<8>, +} + +#[hdl(no_static)] +pub struct NextPcToFetchInterface> { + pub inner: ReadyValid, + pub config: C, +} + +#[hdl] +/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point. +pub enum WipDecodedInsnKind { + NonBranch, + Branch(UInt<64>), + BranchCond(UInt<64>), + IndirectBranch, + IndirectBranchCond, + Call(UInt<64>), + CallCond(UInt<64>), + IndirectCall, + IndirectCallCond, + Ret, + RetCond, + /// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on. + Interrupt(UInt<64>), +} + +#[hdl] +/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point. +pub struct WipDecodedInsn { + pub fetch_block_id: UInt<8>, + pub id: UInt<12>, + pub pc: UInt<64>, + pub kind: WipDecodedInsnKind, +} + +#[hdl(no_static)] +/// handles updating speculative branch predictor state (e.g. branch histories) when instructions retire, +/// as well as updating state when a branch instruction is mis-speculated. +pub struct NextPcToRetireInterface> { + // TODO: add needed fields + pub config: C, +} + +#[hdl(no_static)] +pub struct DecodeToPostDecodeInterface> { + // TODO: add needed fields + pub config: C, +} + +#[hdl(no_static)] +pub struct PostDecodeOutputInterface> { + // TODO: add needed fields + pub config: C, +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)] +enum BranchPredictionState { + StronglyNotTaken, + #[default] + WeaklyNotTaken, + WeaklyTaken, + StronglyTaken, +} + +impl BranchPredictionState { + #[must_use] + fn is_taken(self) -> bool { + match self { + Self::StronglyNotTaken => false, + Self::WeaklyNotTaken => false, + Self::WeaklyTaken => true, + Self::StronglyTaken => true, + } + } + #[must_use] + fn towards_taken(self) -> Self { + match self { + Self::StronglyNotTaken => Self::WeaklyNotTaken, + Self::WeaklyNotTaken => Self::WeaklyTaken, + Self::WeaklyTaken => Self::StronglyTaken, + Self::StronglyTaken => Self::StronglyTaken, + } + } + #[must_use] + fn towards_not_taken(self) -> Self { + match self { + Self::StronglyNotTaken => Self::StronglyNotTaken, + Self::WeaklyNotTaken => Self::StronglyNotTaken, + Self::WeaklyTaken => Self::WeaklyNotTaken, + Self::StronglyTaken => Self::WeaklyTaken, + } + } +} + +struct NextPcState { + call_stack: Vec, + branch_target_buffer: HashMap, + history: VecDeque, + speculative_history_len: usize, + branch_predictor: Box<[BranchPredictionState; Self::BRANCH_PREDICTOR_SIZE]>, +} + +impl NextPcState { + const BRANCH_PREDICTOR_LOG2_SIZE: usize = 8; + const BRANCH_PREDICTOR_SIZE: usize = 1 << Self::BRANCH_PREDICTOR_LOG2_SIZE; + fn branch_predictor_index(&self, pc: u64) -> usize { + let mut history = 0u64; + for i in 0..Self::BRANCH_PREDICTOR_LOG2_SIZE { + history <<= 1; + if self.history.get(i).copied().unwrap_or(false) { + history |= 1; + } + } + let mut t = history; + t ^= t.rotate_left(5) & !pc.rotate_right(3); + t ^= pc; + t ^= !t.rotate_left(2) & t.rotate_left(4); + let mut retval = 0; + for i in (0..Self::BRANCH_PREDICTOR_LOG2_SIZE).step_by(Self::BRANCH_PREDICTOR_LOG2_SIZE) { + retval ^= t >> i; + } + retval as usize % Self::BRANCH_PREDICTOR_SIZE + } +} + +impl Default for NextPcState { + fn default() -> Self { + Self { + call_stack: Default::default(), + branch_target_buffer: Default::default(), + history: Default::default(), + speculative_history_len: Default::default(), + branch_predictor: vec![Default::default(); Self::BRANCH_PREDICTOR_SIZE] + .try_into() + .expect("has right size"), + } + } +} + +#[hdl_module(extern)] +pub fn next_pc(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + m.extern_module_simulation_fn((cd,), |(cd,), mut sim| async move { + sim.resettable( + cd, + |mut sim: ExternModuleSimulationState| async move { NextPcState::default() }, + |mut sim: ExternModuleSimulationState, mut state| async move {}, + ) + .await; + }); +}