diff --git a/Cargo.lock b/Cargo.lock index f417853..b16095a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -210,6 +210,7 @@ name = "cpu" version = "0.1.0" dependencies = [ "fayalite", + "serde", ] [[package]] @@ -303,7 +304,7 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "fayalite" version = "0.3.0" -source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57" +source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#0b77d1bea0af932a40fd221daf65d5a9b7d62bc5" dependencies = [ "base64", "bitvec", @@ -330,7 +331,7 @@ dependencies = [ [[package]] name = "fayalite-proc-macros" version = "0.3.0" -source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57" +source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#0b77d1bea0af932a40fd221daf65d5a9b7d62bc5" dependencies = [ "fayalite-proc-macros-impl", ] @@ -338,7 +339,7 @@ dependencies = [ [[package]] name = "fayalite-proc-macros-impl" version = "0.3.0" -source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57" +source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#0b77d1bea0af932a40fd221daf65d5a9b7d62bc5" dependencies = [ "base16ct", "num-bigint", @@ -353,7 +354,7 @@ dependencies = [ [[package]] name = "fayalite-visit-gen" version = "0.3.0" -source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57" +source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#0b77d1bea0af932a40fd221daf65d5a9b7d62bc5" dependencies = [ "indexmap", "prettyplease", diff --git a/Cargo.toml b/Cargo.toml index 57ca631..a3e74f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ rust-version = "1.89.0" [workspace.dependencies] fayalite = { git = "https://git.libre-chip.org/libre-chip/fayalite.git", version = "0.3.0", branch = "master" } +serde = { version = "1.0.202", features = ["derive"] } [profile.dev] opt-level = 1 diff --git a/crates/cpu/Cargo.toml b/crates/cpu/Cargo.toml index 16ec0b9..4dd85d8 100644 --- a/crates/cpu/Cargo.toml +++ b/crates/cpu/Cargo.toml @@ -16,3 +16,4 @@ version.workspace = true [dependencies] fayalite.workspace = true +serde.workspace = true diff --git a/crates/cpu/src/config.rs b/crates/cpu/src/config.rs index 4e66010..ed3b814 100644 --- a/crates/cpu/src/config.rs +++ b/crates/cpu/src/config.rs @@ -8,9 +8,10 @@ use crate::{ }, }; use fayalite::prelude::*; +use serde::{Deserialize, Serialize}; use std::num::NonZeroUsize; -#[derive(Clone, Eq, PartialEq, Hash, Debug)] +#[derive(Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)] #[non_exhaustive] pub struct UnitConfig { pub kind: UnitKind, @@ -27,12 +28,14 @@ impl UnitConfig { } } -#[derive(Clone, Eq, PartialEq, Hash, Debug)] +#[derive(Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)] #[non_exhaustive] pub struct CpuConfig { pub units: Vec, pub out_reg_num_width: usize, pub fetch_width: NonZeroUsize, + pub max_branches_per_fetch: NonZeroUsize, + pub fetch_width_in_bytes: NonZeroUsize, /// default value for [`UnitConfig::max_in_flight`] pub default_unit_max_in_flight: NonZeroUsize, pub rob_size: NonZeroUsize, @@ -46,6 +49,18 @@ impl CpuConfig { }; v }; + pub const DEFAULT_MAX_BRANCHES_PER_FETCH: NonZeroUsize = { + let Some(v) = NonZeroUsize::new(1) else { + unreachable!(); + }; + v + }; + pub const DEFAULT_FETCH_WIDTH_IN_BYTES: NonZeroUsize = { + let Some(v) = NonZeroUsize::new(4) else { + unreachable!(); + }; + v + }; pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = { let Some(v) = NonZeroUsize::new(8) else { unreachable!(); @@ -57,6 +72,8 @@ impl CpuConfig { units, out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH, fetch_width: Self::DEFAULT_FETCH_WIDTH, + max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH, + fetch_width_in_bytes: Self::DEFAULT_FETCH_WIDTH_IN_BYTES, default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT, rob_size, } @@ -117,3 +134,12 @@ impl CpuConfig { [self.non_const_unit_nums().len()] } } + +#[hdl(get(|c| c.fetch_width.get()))] +pub type CpuConfigFetchWidth> = DynSize; + +#[hdl(get(|c| c.max_branches_per_fetch.get()))] +pub type CpuConfigMaxBranchesPerFetch> = DynSize; + +#[hdl(get(|c| c.fetch_width_in_bytes.get()))] +pub type CpuConfigFetchWidthInBytes> = DynSize; diff --git a/crates/cpu/src/lib.rs b/crates/cpu/src/lib.rs index bae3720..a00b668 100644 --- a/crates/cpu/src/lib.rs +++ b/crates/cpu/src/lib.rs @@ -2,6 +2,7 @@ // See Notices.txt for copyright information pub mod config; pub mod instruction; +pub mod next_pc; pub mod reg_alloc; pub mod register; pub mod unit; diff --git a/crates/cpu/src/next_pc.rs b/crates/cpu/src/next_pc.rs new file mode 100644 index 0000000..e4195ba --- /dev/null +++ b/crates/cpu/src/next_pc.rs @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +//! [Next-Instruction Logic](https://git.libre-chip.org/libre-chip/grant-tracking/issues/10) +//! +//! The basic idea here is that there's a `next_pc` stage that sends predicted fetch PCs to the `fetch` stage, +//! the `fetch` stage's outputs eventually end up in the `decode` stage, +//! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`) +//! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds +//! information back to the `fetch` stage to cancel fetches that need to be predicted differently. + +use crate::{config::CpuConfig, util::array_vec::ArrayVec}; +use fayalite::prelude::*; +use fayalite::util::ready_valid::ReadyValid; +use std::collections::{HashMap, VecDeque}; + +#[hdl] +pub enum PredictedCond { + Taken, + Fallthrough, +} + +#[hdl] +pub struct PredictedFallthrough {} + +#[hdl] +pub enum BranchPredictionKind { + Branch(HdlOption), + IndirectBranch(HdlOption), + Call(HdlOption), + IndirectCall(HdlOption), + Ret(HdlOption), +} + +#[hdl(get(|c| c.max_branches_per_fetch.get() - 1))] +pub type NextPcPredictionMaxBranchesBeforeLast> = DynSize; + +#[hdl(no_static)] +pub struct NextPcPrediction> { + pub fetch_pc: UInt<64>, + pub async_interrupt: Bool, + pub branches_before_last: ArrayVec< + BranchPredictionKind, + NextPcPredictionMaxBranchesBeforeLast, + >, + pub last_branch: HdlOption>, + pub last_branch_target_pc: UInt<64>, +} + +#[hdl] +pub struct NextPcToFetchInterfaceInner { + pub next_fetch_pc: UInt<64>, + pub fetch_block_id: UInt<8>, + pub in_progress_fetches_to_cancel: UInt<8>, +} + +#[hdl(no_static)] +pub struct NextPcToFetchInterface> { + pub inner: ReadyValid, + pub config: C, +} + +#[hdl] +/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point. +pub enum WipDecodedInsnKind { + NonBranch, + Branch(UInt<64>), + BranchCond(UInt<64>), + IndirectBranch, + IndirectBranchCond, + Call(UInt<64>), + CallCond(UInt<64>), + IndirectCall, + IndirectCallCond, + Ret, + RetCond, + /// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on. + Interrupt(UInt<64>), +} + +#[hdl] +/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point. +pub struct WipDecodedInsn { + pub fetch_block_id: UInt<8>, + pub id: UInt<12>, + pub pc: UInt<64>, + pub kind: WipDecodedInsnKind, +} + +#[hdl(no_static)] +/// handles updating speculative branch predictor state (e.g. branch histories) when instructions retire, +/// as well as updating state when a branch instruction is mis-speculated. +pub struct NextPcToRetireInterface> { + // TODO: add needed fields + pub config: C, +} + +#[hdl(no_static)] +pub struct DecodeToPostDecodeInterface> { + // TODO: add needed fields + pub config: C, +} + +#[hdl(no_static)] +pub struct PostDecodeOutputInterface> { + // TODO: add needed fields + pub config: C, +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)] +enum BranchPredictionState { + StronglyNotTaken, + #[default] + WeaklyNotTaken, + WeaklyTaken, + StronglyTaken, +} + +impl BranchPredictionState { + #[must_use] + fn is_taken(self) -> bool { + match self { + Self::StronglyNotTaken => false, + Self::WeaklyNotTaken => false, + Self::WeaklyTaken => true, + Self::StronglyTaken => true, + } + } + #[must_use] + fn towards_taken(self) -> Self { + match self { + Self::StronglyNotTaken => Self::WeaklyNotTaken, + Self::WeaklyNotTaken => Self::WeaklyTaken, + Self::WeaklyTaken => Self::StronglyTaken, + Self::StronglyTaken => Self::StronglyTaken, + } + } + #[must_use] + fn towards_not_taken(self) -> Self { + match self { + Self::StronglyNotTaken => Self::StronglyNotTaken, + Self::WeaklyNotTaken => Self::StronglyNotTaken, + Self::WeaklyTaken => Self::WeaklyNotTaken, + Self::StronglyTaken => Self::WeaklyTaken, + } + } +} + +struct NextPcState { + call_stack: Vec, + branch_target_buffer: HashMap, + history: VecDeque, + speculative_history_len: usize, + branch_predictor: Box<[BranchPredictionState; Self::BRANCH_PREDICTOR_SIZE]>, +} + +impl NextPcState { + const BRANCH_PREDICTOR_LOG2_SIZE: usize = 8; + const BRANCH_PREDICTOR_SIZE: usize = 1 << Self::BRANCH_PREDICTOR_LOG2_SIZE; + fn branch_predictor_index(&self, pc: u64) -> usize { + let mut history = 0u64; + for i in 0..Self::BRANCH_PREDICTOR_LOG2_SIZE { + history <<= 1; + if self.history.get(i).copied().unwrap_or(false) { + history |= 1; + } + } + let mut t = history; + t ^= t.rotate_left(5) & !pc.rotate_right(3); + t ^= pc; + t ^= !t.rotate_left(2) & t.rotate_left(4); + let mut retval = 0; + for i in (0..Self::BRANCH_PREDICTOR_LOG2_SIZE).step_by(Self::BRANCH_PREDICTOR_LOG2_SIZE) { + retval ^= t >> i; + } + retval as usize % Self::BRANCH_PREDICTOR_SIZE + } +} + +impl Default for NextPcState { + fn default() -> Self { + Self { + call_stack: Default::default(), + branch_target_buffer: Default::default(), + history: Default::default(), + speculative_history_len: Default::default(), + branch_predictor: vec![Default::default(); Self::BRANCH_PREDICTOR_SIZE] + .try_into() + .expect("has right size"), + } + } +} + +#[hdl_module(extern)] +pub fn next_pc(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + m.extern_module_simulation_fn((cd,), |(cd,), mut sim| async move { + sim.resettable( + cd, + |mut sim: ExternModuleSimulationState| async move { NextPcState::default() }, + |mut sim: ExternModuleSimulationState, mut state| async move {}, + ) + .await; + }); +} diff --git a/crates/cpu/src/unit.rs b/crates/cpu/src/unit.rs index d6cd1d6..b71f79a 100644 --- a/crates/cpu/src/unit.rs +++ b/crates/cpu/src/unit.rs @@ -15,6 +15,7 @@ use fayalite::{ intern::{Intern, Interned}, prelude::*, }; +use serde::{Deserialize, Serialize}; pub mod alu_branch; pub mod unit_base; @@ -36,7 +37,7 @@ macro_rules! all_units { } ) => { $(#[$enum_meta])* - #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] + #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] $vis enum $UnitKind { $( $(#[$variant_meta])*