Compare commits

...
Sign in to create a new pull request.

3 commits

7 changed files with 244 additions and 7 deletions

9
Cargo.lock generated
View file

@ -210,6 +210,7 @@ name = "cpu"
version = "0.1.0"
dependencies = [
"fayalite",
"serde",
]
[[package]]
@ -303,7 +304,7 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
[[package]]
name = "fayalite"
version = "0.3.0"
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57"
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#840c5e1895b7cdad3eaa2c009558de9196fe477b"
dependencies = [
"base64",
"bitvec",
@ -330,7 +331,7 @@ dependencies = [
[[package]]
name = "fayalite-proc-macros"
version = "0.3.0"
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57"
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#840c5e1895b7cdad3eaa2c009558de9196fe477b"
dependencies = [
"fayalite-proc-macros-impl",
]
@ -338,7 +339,7 @@ dependencies = [
[[package]]
name = "fayalite-proc-macros-impl"
version = "0.3.0"
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57"
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#840c5e1895b7cdad3eaa2c009558de9196fe477b"
dependencies = [
"base16ct",
"num-bigint",
@ -353,7 +354,7 @@ dependencies = [
[[package]]
name = "fayalite-visit-gen"
version = "0.3.0"
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57"
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#840c5e1895b7cdad3eaa2c009558de9196fe477b"
dependencies = [
"indexmap",
"prettyplease",

View file

@ -15,6 +15,7 @@ rust-version = "1.89.0"
[workspace.dependencies]
fayalite = { git = "https://git.libre-chip.org/libre-chip/fayalite.git", version = "0.3.0", branch = "master" }
serde = { version = "1.0.202", features = ["derive"] }
[profile.dev]
opt-level = 1

View file

@ -16,3 +16,4 @@ version.workspace = true
[dependencies]
fayalite.workspace = true
serde.workspace = true

View file

@ -8,9 +8,10 @@ use crate::{
},
};
use fayalite::prelude::*;
use serde::{Deserialize, Serialize};
use std::num::NonZeroUsize;
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
#[derive(Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)]
#[non_exhaustive]
pub struct UnitConfig {
pub kind: UnitKind,
@ -27,12 +28,14 @@ impl UnitConfig {
}
}
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
#[derive(Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)]
#[non_exhaustive]
pub struct CpuConfig {
pub units: Vec<UnitConfig>,
pub out_reg_num_width: usize,
pub fetch_width: NonZeroUsize,
pub max_branches_per_fetch: NonZeroUsize,
pub fetch_width_in_bytes: NonZeroUsize,
/// default value for [`UnitConfig::max_in_flight`]
pub default_unit_max_in_flight: NonZeroUsize,
pub rob_size: NonZeroUsize,
@ -46,6 +49,18 @@ impl CpuConfig {
};
v
};
pub const DEFAULT_MAX_BRANCHES_PER_FETCH: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(1) else {
unreachable!();
};
v
};
pub const DEFAULT_FETCH_WIDTH_IN_BYTES: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(4) else {
unreachable!();
};
v
};
pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = {
let Some(v) = NonZeroUsize::new(8) else {
unreachable!();
@ -57,6 +72,8 @@ impl CpuConfig {
units,
out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH,
fetch_width: Self::DEFAULT_FETCH_WIDTH,
max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH,
fetch_width_in_bytes: Self::DEFAULT_FETCH_WIDTH_IN_BYTES,
default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT,
rob_size,
}
@ -117,3 +134,12 @@ impl CpuConfig {
[self.non_const_unit_nums().len()]
}
}
#[hdl(get(|c| c.fetch_width.get()))]
pub type CpuConfigFetchWidth<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.max_branches_per_fetch.get()))]
pub type CpuConfigMaxBranchesPerFetch<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(get(|c| c.fetch_width_in_bytes.get()))]
pub type CpuConfigFetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;

View file

@ -2,6 +2,7 @@
// See Notices.txt for copyright information
pub mod config;
pub mod instruction;
pub mod next_pc;
pub mod reg_alloc;
pub mod register;
pub mod unit;

206
crates/cpu/src/next_pc.rs Normal file
View file

@ -0,0 +1,206 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
//! [Next-Instruction Logic](https://git.libre-chip.org/libre-chip/grant-tracking/issues/10)
//!
//! The basic idea here is that there's a `next_pc` stage that sends predicted fetch PCs to the `fetch` stage,
//! the `fetch` stage's outputs eventually end up in the `decode` stage,
//! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`)
//! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds
//! information back to the `fetch` stage to cancel fetches that need to be predicted differently.
use crate::{config::CpuConfig, util::array_vec::ArrayVec};
use fayalite::prelude::*;
use fayalite::util::ready_valid::ReadyValid;
use std::collections::{HashMap, VecDeque};
#[hdl]
pub enum PredictedCond {
Taken,
Fallthrough,
}
#[hdl]
pub struct PredictedFallthrough {}
#[hdl]
pub enum BranchPredictionKind<CondKind> {
Branch(HdlOption<CondKind>),
IndirectBranch(HdlOption<CondKind>),
Call(HdlOption<CondKind>),
IndirectCall(HdlOption<CondKind>),
Ret(HdlOption<CondKind>),
}
#[hdl(get(|c| c.max_branches_per_fetch.get() - 1))]
pub type NextPcPredictionMaxBranchesBeforeLast<C: PhantomConstGet<CpuConfig>> = DynSize;
#[hdl(no_static)]
pub struct NextPcPrediction<C: PhantomConstGet<CpuConfig>> {
pub fetch_pc: UInt<64>,
pub async_interrupt: Bool,
pub branches_before_last: ArrayVec<
BranchPredictionKind<PredictedFallthrough>,
NextPcPredictionMaxBranchesBeforeLast<C>,
>,
pub last_branch: HdlOption<BranchPredictionKind<PredictedCond>>,
pub last_branch_target_pc: UInt<64>,
}
#[hdl]
pub struct NextPcToFetchInterfaceInner {
pub next_fetch_pc: UInt<64>,
pub fetch_block_id: UInt<8>,
pub in_progress_fetches_to_cancel: UInt<8>,
}
#[hdl(no_static)]
pub struct NextPcToFetchInterface<C: PhantomConstGet<CpuConfig>> {
pub inner: ReadyValid<NextPcToFetchInterfaceInner>,
pub config: C,
}
#[hdl]
/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
pub enum WipDecodedInsnKind {
NonBranch,
Branch(UInt<64>),
BranchCond(UInt<64>),
IndirectBranch,
IndirectBranchCond,
Call(UInt<64>),
CallCond(UInt<64>),
IndirectCall,
IndirectCallCond,
Ret,
RetCond,
/// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on.
Interrupt(UInt<64>),
}
#[hdl]
/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
pub struct WipDecodedInsn {
pub fetch_block_id: UInt<8>,
pub id: UInt<12>,
pub pc: UInt<64>,
pub kind: WipDecodedInsnKind,
}
#[hdl(no_static)]
/// handles updating speculative branch predictor state (e.g. branch histories) when instructions retire,
/// as well as updating state when a branch instruction is mis-speculated.
pub struct NextPcToRetireInterface<C: PhantomConstGet<CpuConfig>> {
// TODO: add needed fields
pub config: C,
}
#[hdl(no_static)]
pub struct DecodeToPostDecodeInterface<C: PhantomConstGet<CpuConfig>> {
// TODO: add needed fields
pub config: C,
}
#[hdl(no_static)]
pub struct PostDecodeOutputInterface<C: PhantomConstGet<CpuConfig>> {
// TODO: add needed fields
pub config: C,
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
enum BranchPredictionState {
StronglyNotTaken,
#[default]
WeaklyNotTaken,
WeaklyTaken,
StronglyTaken,
}
impl BranchPredictionState {
#[must_use]
fn is_taken(self) -> bool {
match self {
Self::StronglyNotTaken => false,
Self::WeaklyNotTaken => false,
Self::WeaklyTaken => true,
Self::StronglyTaken => true,
}
}
#[must_use]
fn towards_taken(self) -> Self {
match self {
Self::StronglyNotTaken => Self::WeaklyNotTaken,
Self::WeaklyNotTaken => Self::WeaklyTaken,
Self::WeaklyTaken => Self::StronglyTaken,
Self::StronglyTaken => Self::StronglyTaken,
}
}
#[must_use]
fn towards_not_taken(self) -> Self {
match self {
Self::StronglyNotTaken => Self::StronglyNotTaken,
Self::WeaklyNotTaken => Self::StronglyNotTaken,
Self::WeaklyTaken => Self::WeaklyNotTaken,
Self::StronglyTaken => Self::WeaklyTaken,
}
}
}
struct NextPcState {
call_stack: Vec<u64>,
branch_target_buffer: HashMap<u64, u64>,
history: VecDeque<bool>,
speculative_history_len: usize,
branch_predictor: Box<[BranchPredictionState; Self::BRANCH_PREDICTOR_SIZE]>,
}
impl NextPcState {
const BRANCH_PREDICTOR_LOG2_SIZE: usize = 8;
const BRANCH_PREDICTOR_SIZE: usize = 1 << Self::BRANCH_PREDICTOR_LOG2_SIZE;
fn branch_predictor_index(&self, pc: u64) -> usize {
let mut history = 0u64;
for i in 0..Self::BRANCH_PREDICTOR_LOG2_SIZE {
history <<= 1;
if self.history.get(i).copied().unwrap_or(false) {
history |= 1;
}
}
let mut t = history;
t ^= t.rotate_left(5) & !pc.rotate_right(3);
t ^= pc;
t ^= !t.rotate_left(2) & t.rotate_left(4);
let mut retval = 0;
for i in (0..Self::BRANCH_PREDICTOR_LOG2_SIZE).step_by(Self::BRANCH_PREDICTOR_LOG2_SIZE) {
retval ^= t >> i;
}
retval as usize % Self::BRANCH_PREDICTOR_SIZE
}
}
impl Default for NextPcState {
fn default() -> Self {
Self {
call_stack: Default::default(),
branch_target_buffer: Default::default(),
history: Default::default(),
speculative_history_len: Default::default(),
branch_predictor: vec![Default::default(); Self::BRANCH_PREDICTOR_SIZE]
.try_into()
.expect("has right size"),
}
}
}
#[hdl_module(extern)]
pub fn next_pc(config: PhantomConst<CpuConfig>) {
#[hdl]
let cd: ClockDomain = m.input();
m.extern_module_simulation_fn((cd,), |(cd,), mut sim| async move {
sim.resettable(
cd,
|mut sim: ExternModuleSimulationState| async move { NextPcState::default() },
|mut sim: ExternModuleSimulationState, mut state| async move {},
)
.await;
});
}

View file

@ -15,6 +15,7 @@ use fayalite::{
intern::{Intern, Interned},
prelude::*,
};
use serde::{Deserialize, Serialize};
pub mod alu_branch;
pub mod unit_base;
@ -36,7 +37,7 @@ macro_rules! all_units {
}
) => {
$(#[$enum_meta])*
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)]
$vis enum $UnitKind {
$(
$(#[$variant_meta])*