forked from libre-chip/cpu
Compare commits
No commits in common. "next-pc" and "master" have entirely different histories.
7 changed files with 7 additions and 244 deletions
9
Cargo.lock
generated
9
Cargo.lock
generated
|
|
@ -210,7 +210,6 @@ name = "cpu"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"fayalite",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -304,7 +303,7 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
|
|||
[[package]]
|
||||
name = "fayalite"
|
||||
version = "0.3.0"
|
||||
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#0b77d1bea0af932a40fd221daf65d5a9b7d62bc5"
|
||||
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"bitvec",
|
||||
|
|
@ -331,7 +330,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "fayalite-proc-macros"
|
||||
version = "0.3.0"
|
||||
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#0b77d1bea0af932a40fd221daf65d5a9b7d62bc5"
|
||||
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57"
|
||||
dependencies = [
|
||||
"fayalite-proc-macros-impl",
|
||||
]
|
||||
|
|
@ -339,7 +338,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "fayalite-proc-macros-impl"
|
||||
version = "0.3.0"
|
||||
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#0b77d1bea0af932a40fd221daf65d5a9b7d62bc5"
|
||||
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57"
|
||||
dependencies = [
|
||||
"base16ct",
|
||||
"num-bigint",
|
||||
|
|
@ -354,7 +353,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "fayalite-visit-gen"
|
||||
version = "0.3.0"
|
||||
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#0b77d1bea0af932a40fd221daf65d5a9b7d62bc5"
|
||||
source = "git+https://git.libre-chip.org/libre-chip/fayalite.git?branch=master#edcc5927a5f9ebca6df5720bb1f5931e50095a57"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"prettyplease",
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ rust-version = "1.89.0"
|
|||
|
||||
[workspace.dependencies]
|
||||
fayalite = { git = "https://git.libre-chip.org/libre-chip/fayalite.git", version = "0.3.0", branch = "master" }
|
||||
serde = { version = "1.0.202", features = ["derive"] }
|
||||
|
||||
[profile.dev]
|
||||
opt-level = 1
|
||||
|
|
|
|||
|
|
@ -16,4 +16,3 @@ version.workspace = true
|
|||
|
||||
[dependencies]
|
||||
fayalite.workspace = true
|
||||
serde.workspace = true
|
||||
|
|
|
|||
|
|
@ -8,10 +8,9 @@ use crate::{
|
|||
},
|
||||
};
|
||||
use fayalite::prelude::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
#[derive(Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub struct UnitConfig {
|
||||
pub kind: UnitKind,
|
||||
|
|
@ -28,14 +27,12 @@ impl UnitConfig {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Eq, PartialEq, Hash, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub struct CpuConfig {
|
||||
pub units: Vec<UnitConfig>,
|
||||
pub out_reg_num_width: usize,
|
||||
pub fetch_width: NonZeroUsize,
|
||||
pub max_branches_per_fetch: NonZeroUsize,
|
||||
pub fetch_width_in_bytes: NonZeroUsize,
|
||||
/// default value for [`UnitConfig::max_in_flight`]
|
||||
pub default_unit_max_in_flight: NonZeroUsize,
|
||||
pub rob_size: NonZeroUsize,
|
||||
|
|
@ -49,18 +46,6 @@ impl CpuConfig {
|
|||
};
|
||||
v
|
||||
};
|
||||
pub const DEFAULT_MAX_BRANCHES_PER_FETCH: NonZeroUsize = {
|
||||
let Some(v) = NonZeroUsize::new(1) else {
|
||||
unreachable!();
|
||||
};
|
||||
v
|
||||
};
|
||||
pub const DEFAULT_FETCH_WIDTH_IN_BYTES: NonZeroUsize = {
|
||||
let Some(v) = NonZeroUsize::new(4) else {
|
||||
unreachable!();
|
||||
};
|
||||
v
|
||||
};
|
||||
pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = {
|
||||
let Some(v) = NonZeroUsize::new(8) else {
|
||||
unreachable!();
|
||||
|
|
@ -72,8 +57,6 @@ impl CpuConfig {
|
|||
units,
|
||||
out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH,
|
||||
fetch_width: Self::DEFAULT_FETCH_WIDTH,
|
||||
max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH,
|
||||
fetch_width_in_bytes: Self::DEFAULT_FETCH_WIDTH_IN_BYTES,
|
||||
default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT,
|
||||
rob_size,
|
||||
}
|
||||
|
|
@ -134,12 +117,3 @@ impl CpuConfig {
|
|||
[self.non_const_unit_nums().len()]
|
||||
}
|
||||
}
|
||||
|
||||
#[hdl(get(|c| c.fetch_width.get()))]
|
||||
pub type CpuConfigFetchWidth<C: PhantomConstGet<CpuConfig>> = DynSize;
|
||||
|
||||
#[hdl(get(|c| c.max_branches_per_fetch.get()))]
|
||||
pub type CpuConfigMaxBranchesPerFetch<C: PhantomConstGet<CpuConfig>> = DynSize;
|
||||
|
||||
#[hdl(get(|c| c.fetch_width_in_bytes.get()))]
|
||||
pub type CpuConfigFetchWidthInBytes<C: PhantomConstGet<CpuConfig>> = DynSize;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
// See Notices.txt for copyright information
|
||||
pub mod config;
|
||||
pub mod instruction;
|
||||
pub mod next_pc;
|
||||
pub mod reg_alloc;
|
||||
pub mod register;
|
||||
pub mod unit;
|
||||
|
|
|
|||
|
|
@ -1,206 +0,0 @@
|
|||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
// See Notices.txt for copyright information
|
||||
|
||||
//! [Next-Instruction Logic](https://git.libre-chip.org/libre-chip/grant-tracking/issues/10)
|
||||
//!
|
||||
//! The basic idea here is that there's a `next_pc` stage that sends predicted fetch PCs to the `fetch` stage,
|
||||
//! the `fetch` stage's outputs eventually end up in the `decode` stage,
|
||||
//! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`)
|
||||
//! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds
|
||||
//! information back to the `fetch` stage to cancel fetches that need to be predicted differently.
|
||||
|
||||
use crate::{config::CpuConfig, util::array_vec::ArrayVec};
|
||||
use fayalite::prelude::*;
|
||||
use fayalite::util::ready_valid::ReadyValid;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
|
||||
#[hdl]
|
||||
pub enum PredictedCond {
|
||||
Taken,
|
||||
Fallthrough,
|
||||
}
|
||||
|
||||
#[hdl]
|
||||
pub struct PredictedFallthrough {}
|
||||
|
||||
#[hdl]
|
||||
pub enum BranchPredictionKind<CondKind> {
|
||||
Branch(HdlOption<CondKind>),
|
||||
IndirectBranch(HdlOption<CondKind>),
|
||||
Call(HdlOption<CondKind>),
|
||||
IndirectCall(HdlOption<CondKind>),
|
||||
Ret(HdlOption<CondKind>),
|
||||
}
|
||||
|
||||
#[hdl(get(|c| c.max_branches_per_fetch.get() - 1))]
|
||||
pub type NextPcPredictionMaxBranchesBeforeLast<C: PhantomConstGet<CpuConfig>> = DynSize;
|
||||
|
||||
#[hdl(no_static)]
|
||||
pub struct NextPcPrediction<C: PhantomConstGet<CpuConfig>> {
|
||||
pub fetch_pc: UInt<64>,
|
||||
pub async_interrupt: Bool,
|
||||
pub branches_before_last: ArrayVec<
|
||||
BranchPredictionKind<PredictedFallthrough>,
|
||||
NextPcPredictionMaxBranchesBeforeLast<C>,
|
||||
>,
|
||||
pub last_branch: HdlOption<BranchPredictionKind<PredictedCond>>,
|
||||
pub last_branch_target_pc: UInt<64>,
|
||||
}
|
||||
|
||||
#[hdl]
|
||||
pub struct NextPcToFetchInterfaceInner {
|
||||
pub next_fetch_pc: UInt<64>,
|
||||
pub fetch_block_id: UInt<8>,
|
||||
pub in_progress_fetches_to_cancel: UInt<8>,
|
||||
}
|
||||
|
||||
#[hdl(no_static)]
|
||||
pub struct NextPcToFetchInterface<C: PhantomConstGet<CpuConfig>> {
|
||||
pub inner: ReadyValid<NextPcToFetchInterfaceInner>,
|
||||
pub config: C,
|
||||
}
|
||||
|
||||
#[hdl]
|
||||
/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
|
||||
pub enum WipDecodedInsnKind {
|
||||
NonBranch,
|
||||
Branch(UInt<64>),
|
||||
BranchCond(UInt<64>),
|
||||
IndirectBranch,
|
||||
IndirectBranchCond,
|
||||
Call(UInt<64>),
|
||||
CallCond(UInt<64>),
|
||||
IndirectCall,
|
||||
IndirectCallCond,
|
||||
Ret,
|
||||
RetCond,
|
||||
/// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on.
|
||||
Interrupt(UInt<64>),
|
||||
}
|
||||
|
||||
#[hdl]
|
||||
/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point.
|
||||
pub struct WipDecodedInsn {
|
||||
pub fetch_block_id: UInt<8>,
|
||||
pub id: UInt<12>,
|
||||
pub pc: UInt<64>,
|
||||
pub kind: WipDecodedInsnKind,
|
||||
}
|
||||
|
||||
#[hdl(no_static)]
|
||||
/// handles updating speculative branch predictor state (e.g. branch histories) when instructions retire,
|
||||
/// as well as updating state when a branch instruction is mis-speculated.
|
||||
pub struct NextPcToRetireInterface<C: PhantomConstGet<CpuConfig>> {
|
||||
// TODO: add needed fields
|
||||
pub config: C,
|
||||
}
|
||||
|
||||
#[hdl(no_static)]
|
||||
pub struct DecodeToPostDecodeInterface<C: PhantomConstGet<CpuConfig>> {
|
||||
// TODO: add needed fields
|
||||
pub config: C,
|
||||
}
|
||||
|
||||
#[hdl(no_static)]
|
||||
pub struct PostDecodeOutputInterface<C: PhantomConstGet<CpuConfig>> {
|
||||
// TODO: add needed fields
|
||||
pub config: C,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
|
||||
enum BranchPredictionState {
|
||||
StronglyNotTaken,
|
||||
#[default]
|
||||
WeaklyNotTaken,
|
||||
WeaklyTaken,
|
||||
StronglyTaken,
|
||||
}
|
||||
|
||||
impl BranchPredictionState {
|
||||
#[must_use]
|
||||
fn is_taken(self) -> bool {
|
||||
match self {
|
||||
Self::StronglyNotTaken => false,
|
||||
Self::WeaklyNotTaken => false,
|
||||
Self::WeaklyTaken => true,
|
||||
Self::StronglyTaken => true,
|
||||
}
|
||||
}
|
||||
#[must_use]
|
||||
fn towards_taken(self) -> Self {
|
||||
match self {
|
||||
Self::StronglyNotTaken => Self::WeaklyNotTaken,
|
||||
Self::WeaklyNotTaken => Self::WeaklyTaken,
|
||||
Self::WeaklyTaken => Self::StronglyTaken,
|
||||
Self::StronglyTaken => Self::StronglyTaken,
|
||||
}
|
||||
}
|
||||
#[must_use]
|
||||
fn towards_not_taken(self) -> Self {
|
||||
match self {
|
||||
Self::StronglyNotTaken => Self::StronglyNotTaken,
|
||||
Self::WeaklyNotTaken => Self::StronglyNotTaken,
|
||||
Self::WeaklyTaken => Self::WeaklyNotTaken,
|
||||
Self::StronglyTaken => Self::WeaklyTaken,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct NextPcState {
|
||||
call_stack: Vec<u64>,
|
||||
branch_target_buffer: HashMap<u64, u64>,
|
||||
history: VecDeque<bool>,
|
||||
speculative_history_len: usize,
|
||||
branch_predictor: Box<[BranchPredictionState; Self::BRANCH_PREDICTOR_SIZE]>,
|
||||
}
|
||||
|
||||
impl NextPcState {
|
||||
const BRANCH_PREDICTOR_LOG2_SIZE: usize = 8;
|
||||
const BRANCH_PREDICTOR_SIZE: usize = 1 << Self::BRANCH_PREDICTOR_LOG2_SIZE;
|
||||
fn branch_predictor_index(&self, pc: u64) -> usize {
|
||||
let mut history = 0u64;
|
||||
for i in 0..Self::BRANCH_PREDICTOR_LOG2_SIZE {
|
||||
history <<= 1;
|
||||
if self.history.get(i).copied().unwrap_or(false) {
|
||||
history |= 1;
|
||||
}
|
||||
}
|
||||
let mut t = history;
|
||||
t ^= t.rotate_left(5) & !pc.rotate_right(3);
|
||||
t ^= pc;
|
||||
t ^= !t.rotate_left(2) & t.rotate_left(4);
|
||||
let mut retval = 0;
|
||||
for i in (0..Self::BRANCH_PREDICTOR_LOG2_SIZE).step_by(Self::BRANCH_PREDICTOR_LOG2_SIZE) {
|
||||
retval ^= t >> i;
|
||||
}
|
||||
retval as usize % Self::BRANCH_PREDICTOR_SIZE
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for NextPcState {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
call_stack: Default::default(),
|
||||
branch_target_buffer: Default::default(),
|
||||
history: Default::default(),
|
||||
speculative_history_len: Default::default(),
|
||||
branch_predictor: vec![Default::default(); Self::BRANCH_PREDICTOR_SIZE]
|
||||
.try_into()
|
||||
.expect("has right size"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[hdl_module(extern)]
|
||||
pub fn next_pc(config: PhantomConst<CpuConfig>) {
|
||||
#[hdl]
|
||||
let cd: ClockDomain = m.input();
|
||||
m.extern_module_simulation_fn((cd,), |(cd,), mut sim| async move {
|
||||
sim.resettable(
|
||||
cd,
|
||||
|mut sim: ExternModuleSimulationState| async move { NextPcState::default() },
|
||||
|mut sim: ExternModuleSimulationState, mut state| async move {},
|
||||
)
|
||||
.await;
|
||||
});
|
||||
}
|
||||
|
|
@ -15,7 +15,6 @@ use fayalite::{
|
|||
intern::{Intern, Interned},
|
||||
prelude::*,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod alu_branch;
|
||||
pub mod unit_base;
|
||||
|
|
@ -37,7 +36,7 @@ macro_rules! all_units {
|
|||
}
|
||||
) => {
|
||||
$(#[$enum_meta])*
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)]
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
|
||||
$vis enum $UnitKind {
|
||||
$(
|
||||
$(#[$variant_meta])*
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue