diff --git a/crates/cpu/src/config.rs b/crates/cpu/src/config.rs index 9a66c68..ed3b814 100644 --- a/crates/cpu/src/config.rs +++ b/crates/cpu/src/config.rs @@ -34,6 +34,8 @@ pub struct CpuConfig { pub units: Vec, pub out_reg_num_width: usize, pub fetch_width: NonZeroUsize, + pub max_branches_per_fetch: NonZeroUsize, + pub fetch_width_in_bytes: NonZeroUsize, /// default value for [`UnitConfig::max_in_flight`] pub default_unit_max_in_flight: NonZeroUsize, pub rob_size: NonZeroUsize, @@ -47,6 +49,18 @@ impl CpuConfig { }; v }; + pub const DEFAULT_MAX_BRANCHES_PER_FETCH: NonZeroUsize = { + let Some(v) = NonZeroUsize::new(1) else { + unreachable!(); + }; + v + }; + pub const DEFAULT_FETCH_WIDTH_IN_BYTES: NonZeroUsize = { + let Some(v) = NonZeroUsize::new(4) else { + unreachable!(); + }; + v + }; pub const DEFAULT_UNIT_MAX_IN_FLIGHT: NonZeroUsize = { let Some(v) = NonZeroUsize::new(8) else { unreachable!(); @@ -58,6 +72,8 @@ impl CpuConfig { units, out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH, fetch_width: Self::DEFAULT_FETCH_WIDTH, + max_branches_per_fetch: Self::DEFAULT_MAX_BRANCHES_PER_FETCH, + fetch_width_in_bytes: Self::DEFAULT_FETCH_WIDTH_IN_BYTES, default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT, rob_size, } @@ -118,3 +134,12 @@ impl CpuConfig { [self.non_const_unit_nums().len()] } } + +#[hdl(get(|c| c.fetch_width.get()))] +pub type CpuConfigFetchWidth> = DynSize; + +#[hdl(get(|c| c.max_branches_per_fetch.get()))] +pub type CpuConfigMaxBranchesPerFetch> = DynSize; + +#[hdl(get(|c| c.fetch_width_in_bytes.get()))] +pub type CpuConfigFetchWidthInBytes> = DynSize; diff --git a/crates/cpu/src/lib.rs b/crates/cpu/src/lib.rs index bae3720..a00b668 100644 --- a/crates/cpu/src/lib.rs +++ b/crates/cpu/src/lib.rs @@ -2,6 +2,7 @@ // See Notices.txt for copyright information pub mod config; pub mod instruction; +pub mod next_pc; pub mod reg_alloc; pub mod register; pub mod unit; diff --git a/crates/cpu/src/next_pc.rs b/crates/cpu/src/next_pc.rs new file mode 100644 index 0000000..4ce87b3 --- /dev/null +++ b/crates/cpu/src/next_pc.rs @@ -0,0 +1,561 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +//! [Next-Instruction Logic](https://git.libre-chip.org/libre-chip/grant-tracking/issues/10) +//! +//! The basic idea here is that there's a `next_pc` stage that sends predicted fetch PCs to the `fetch` stage, +//! the `fetch` stage's outputs eventually end up in the `decode` stage, +//! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`) +//! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds +//! information back to the `fetch` stage to cancel fetches that need to be predicted differently. + +use crate::{config::CpuConfig, util::array_vec::ArrayVec}; +use fayalite::{ + int::{UIntInRange, UIntInRangeInclusive}, + prelude::*, + sim::value::SimOnlyValueTrait, + util::ready_valid::ReadyValid, +}; + +#[hdl] +pub enum PredictedCond { + Taken, + Fallthrough, +} + +#[hdl] +pub struct PredictedFallthrough {} + +#[hdl] +pub enum BranchPredictionKind { + Branch(HdlOption), + IndirectBranch(HdlOption), + Call(HdlOption), + IndirectCall(HdlOption), + Ret(HdlOption), +} + +#[hdl(get(|c| c.max_branches_per_fetch.get() - 1))] +pub type NextPcPredictionMaxBranchesBeforeLast> = DynSize; + +#[hdl(no_static)] +pub struct NextPcPrediction> { + pub fetch_pc: UInt<64>, + pub async_interrupt: Bool, + pub branches_before_last: ArrayVec< + BranchPredictionKind, + NextPcPredictionMaxBranchesBeforeLast, + >, + pub last_branch: HdlOption>, + pub last_branch_target_pc: UInt<64>, +} + +#[hdl] +pub struct NextPcToFetchInterfaceInner { + pub next_fetch_pc: UInt<64>, + pub fetch_block_id: UInt<8>, + pub in_progress_fetches_to_cancel: UInt<8>, +} + +#[hdl(no_static)] +pub struct NextPcToFetchInterface> { + pub inner: ReadyValid, + pub config: C, +} + +#[hdl] +/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point. +pub enum WipDecodedInsnKind { + NonBranch, + Branch(UInt<64>), + BranchCond(UInt<64>), + IndirectBranch, + IndirectBranchCond, + Call(UInt<64>), + CallCond(UInt<64>), + IndirectCall, + IndirectCallCond, + Ret, + RetCond, + /// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on. + Interrupt(UInt<64>), +} + +#[hdl] +/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point. +pub struct WipDecodedInsn { + pub fetch_block_id: UInt<8>, + pub id: UInt<12>, + pub pc: UInt<64>, + pub kind: WipDecodedInsnKind, +} + +#[hdl(no_static)] +/// handles updating speculative branch predictor state (e.g. branch histories) when instructions retire, +/// as well as updating state when a branch instruction is mis-speculated. +pub struct NextPcToRetireInterface> { + // TODO: add needed fields + pub config: C, +} + +#[hdl(no_static)] +pub struct DecodeToPostDecodeInterface> { + // TODO: add needed fields + pub config: C, +} + +#[hdl(no_static)] +pub struct PostDecodeOutputInterface> { + // TODO: add needed fields + pub config: C, +} + +#[derive( + Copy, Clone, PartialEq, Eq, Debug, Hash, Default, serde::Serialize, serde::Deserialize, +)] +enum BranchPredictionState { + StronglyNotTaken, + #[default] + WeaklyNotTaken, + WeaklyTaken, + StronglyTaken, +} + +impl BranchPredictionState { + #[must_use] + fn is_taken(self) -> bool { + match self { + Self::StronglyNotTaken => false, + Self::WeaklyNotTaken => false, + Self::WeaklyTaken => true, + Self::StronglyTaken => true, + } + } + #[must_use] + fn towards_taken(self) -> Self { + match self { + Self::StronglyNotTaken => Self::WeaklyNotTaken, + Self::WeaklyNotTaken => Self::WeaklyTaken, + Self::WeaklyTaken => Self::StronglyTaken, + Self::StronglyTaken => Self::StronglyTaken, + } + } + #[must_use] + fn towards_not_taken(self) -> Self { + match self { + Self::StronglyNotTaken => Self::StronglyNotTaken, + Self::WeaklyNotTaken => Self::StronglyNotTaken, + Self::WeaklyTaken => Self::WeaklyNotTaken, + Self::StronglyTaken => Self::WeaklyTaken, + } + } +} + +#[derive(Copy, Clone, Debug)] +#[must_use] +enum ResetStatus { + Done, + Working, +} + +impl ResetStatus { + fn and(self, other: Self) -> Self { + match (self, other) { + (ResetStatus::Done, ResetStatus::Done) => ResetStatus::Done, + (ResetStatus::Done | ResetStatus::Working, ResetStatus::Working) + | (ResetStatus::Working, ResetStatus::Done) => ResetStatus::Working, + } + } +} + +trait SimValueDefault: Type { + fn sim_value_default(self) -> SimValue; +} + +impl SimValueDefault for SimOnly { + fn sim_value_default(self) -> SimValue { + SimOnlyValue::::default().to_sim_value_with_type(self) + } +} + +impl SimValueDefault for HdlOption { + fn sim_value_default(self) -> SimValue { + self.HdlNone().to_sim_value_with_type(self) + } +} + +impl SimValueDefault for Bool { + fn sim_value_default(self) -> SimValue { + false.to_sim_value() + } +} + +impl SimValueDefault for UIntType { + fn sim_value_default(self) -> SimValue { + self.zero().to_sim_value() + } +} + +trait ResetSteps: Type { + async fn reset_step( + this: Expr, + sim: &mut ExternModuleSimulationState, + step: usize, + ) -> ResetStatus; +} + +impl ResetSteps for ArrayType { + async fn reset_step( + this: Expr, + sim: &mut ExternModuleSimulationState, + step: usize, + ) -> ResetStatus { + let element = Expr::ty(this).element(); + let len = Expr::ty(this).len(); + if step < len { + sim.write(this[step], element.sim_value_default()).await; + } + if step.saturating_add(1) >= len { + ResetStatus::Done + } else { + ResetStatus::Working + } + } +} + +#[hdl] +struct CallStack { + return_addresses: Array, { CallStack::SIZE }>, + len: UIntInRangeInclusive<0, { CallStack::SIZE }>, +} + +impl CallStack { + const SIZE: usize = 16; +} + +impl SimValueDefault for CallStack { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + CallStack { + // something other than zero so you can see the values getting reset + return_addresses: [!0u64; Self::SIZE], + len: 0usize.to_sim_value_with_type(self.len), + } + } +} + +impl ResetSteps for CallStack { + #[hdl] + async fn reset_step( + this: Expr, + sim: &mut ExternModuleSimulationState, + _step: usize, + ) -> ResetStatus { + #[hdl] + let CallStack { + return_addresses, + len, + } = this; + // return_addresses is implemented as a shift register, so it can be all reset at once + for i in return_addresses { + sim.write(i, 0u64).await; + } + sim.write(len, 0usize).await; + ResetStatus::Done + } +} + +#[hdl] +struct BranchTargetBuffer { + branch_pc_to_target_map: Array, UInt<64>)>, { BranchTargetBuffer::SIZE }>, +} + +impl BranchTargetBuffer { + const SIZE: usize = 16; +} + +impl SimValueDefault for BranchTargetBuffer { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + BranchTargetBuffer { + // something other than zero so you can see the values getting reset + branch_pc_to_target_map: [HdlSome((0u64, 0u64)); Self::SIZE], + } + } +} + +impl ResetSteps for BranchTargetBuffer { + #[hdl] + async fn reset_step( + this: Expr, + sim: &mut ExternModuleSimulationState, + step: usize, + ) -> ResetStatus { + #[hdl] + let BranchTargetBuffer { + branch_pc_to_target_map, + } = this; + ResetSteps::reset_step(branch_pc_to_target_map, sim, step).await + } +} + +#[hdl] +struct BranchHistory { + history: Array, + /// exclusive + tail: UIntInRange<0, { BranchHistory::SIZE }>, + /// inclusive, always at or after tail, always at or before speculative_head + non_speculative_head: UIntInRange<0, { BranchHistory::SIZE }>, + /// inclusive, always at or after both tail and non_speculative_head + speculative_head: UIntInRange<0, { BranchHistory::SIZE }>, +} + +impl ResetSteps for BranchHistory { + #[hdl] + async fn reset_step( + this: Expr, + sim: &mut ExternModuleSimulationState, + step: usize, + ) -> ResetStatus { + #[hdl] + let Self { + history, + tail, + non_speculative_head, + speculative_head, + } = this; + sim.write(tail, 0usize).await; + sim.write(non_speculative_head, 0usize).await; + sim.write(speculative_head, 0usize).await; + ResetSteps::reset_step(history, sim, step).await + } +} + +impl SimValueDefault for BranchHistory { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + BranchHistory { + // something other than zero so you can see the values getting reset + history: [true; Self::SIZE], + tail: 0usize.to_sim_value_with_type(self.tail), + non_speculative_head: 0usize.to_sim_value_with_type(self.non_speculative_head), + speculative_head: 0usize.to_sim_value_with_type(self.speculative_head), + } + } +} + +enum BranchHistoryTryPushSpeculativeError { + NoSpace, +} + +enum BranchHistoryTryPushNonSpeculativeError { + NoSpace, + Misprediction { speculated: bool }, +} + +impl BranchHistory { + const LOG2_SIZE: usize = 8; + const SIZE: usize = 1 << Self::LOG2_SIZE; + fn next_pos(pos: usize) -> usize { + (pos + 1) % Self::SIZE + } + fn prev_pos(pos: usize) -> usize { + (pos + Self::SIZE - 1) % Self::SIZE + } + async fn history_from_head( + this: Expr, + sim: &mut ExternModuleSimulationState, + head: usize, + ) -> [bool; N] { + let mut retval = [false; N]; + let mut pos = head; + for entry in &mut retval { + if pos == *sim.read(this.tail).await { + break; + } + *entry = sim.read_bool(this.history[pos]).await; + pos = Self::prev_pos(pos); + } + retval + } + async fn delete_speculative_history(this: Expr, sim: &mut ExternModuleSimulationState) { + let non_speculative_head = sim.read(this.non_speculative_head).await; + sim.write(this.speculative_head, non_speculative_head).await; + } + async fn recent_history_including_speculative( + this: Expr, + sim: &mut ExternModuleSimulationState, + ) -> [bool; N] { + let head = *sim.read(this.speculative_head).await; + Self::history_from_head(this, sim, head).await + } + async fn speculative_full(this: Expr, sim: &mut ExternModuleSimulationState) -> bool { + let speculative_head = *sim.read(this.speculative_head).await; + Self::next_pos(speculative_head) == *sim.read(this.tail).await + } + async fn try_push_speculative( + this: Expr, + sim: &mut ExternModuleSimulationState, + value: bool, + ) -> Result<(), BranchHistoryTryPushSpeculativeError> { + if Self::speculative_full(this, sim).await { + Err(BranchHistoryTryPushSpeculativeError::NoSpace) + } else { + let speculative_head = *sim.read(this.speculative_head).await; + let speculative_head = Self::next_pos(speculative_head); + sim.write(this.speculative_head, speculative_head).await; + sim.write(this.history[speculative_head], value).await; + Ok(()) + } + } + async fn try_push_non_speculative( + this: Expr, + sim: &mut ExternModuleSimulationState, + value: bool, + ) -> Result<(), BranchHistoryTryPushNonSpeculativeError> { + let speculative_head = *sim.read(this.speculative_head).await; + let non_speculative_head = *sim.read(this.non_speculative_head).await; + if speculative_head == non_speculative_head { + Err(BranchHistoryTryPushNonSpeculativeError::NoSpace) + } else { + let pos = Self::next_pos(non_speculative_head); + let speculated = sim.read_bool(this.history[pos]).await; + if speculated != value { + Err(BranchHistoryTryPushNonSpeculativeError::Misprediction { speculated }) + } else { + sim.write(this.non_speculative_head, pos).await; + Ok(()) + } + } + } +} + +#[hdl] +pub struct NextPcState { + speculative_call_stack: CallStack, + non_speculative_call_stack: CallStack, + branch_target_buffer: BranchTargetBuffer, + branch_history: BranchHistory, + branch_predictor: Array, { NextPcState::BRANCH_PREDICTOR_SIZE }>, +} + +impl NextPcState { + const BRANCH_PREDICTOR_LOG2_SIZE: usize = 8; + const BRANCH_PREDICTOR_SIZE: usize = 1 << Self::BRANCH_PREDICTOR_LOG2_SIZE; + async fn branch_predictor_index( + this: Expr, + sim: &mut ExternModuleSimulationState, + pc: u64, + ) -> usize { + let mut history = 0u64; + let history_bits: [bool; Self::BRANCH_PREDICTOR_LOG2_SIZE] = + BranchHistory::recent_history_including_speculative(this.branch_history, sim).await; + for history_bit in history_bits { + history <<= 1; + if history_bit { + history |= 1; + } + } + let mut t = history; + t ^= t.rotate_left(5) & !pc.rotate_right(3); + t ^= pc; + t ^= !t.rotate_left(2) & t.rotate_left(4); + let mut retval = 0; + for i in (0..Self::BRANCH_PREDICTOR_LOG2_SIZE).step_by(Self::BRANCH_PREDICTOR_LOG2_SIZE) { + retval ^= t >> i; + } + retval as usize % Self::BRANCH_PREDICTOR_SIZE + } +} + +impl SimValueDefault for NextPcState { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + speculative_call_stack, + non_speculative_call_stack, + branch_target_buffer, + branch_history, + branch_predictor, + } = self; + #[hdl(sim)] + Self { + speculative_call_stack: speculative_call_stack.sim_value_default(), + non_speculative_call_stack: non_speculative_call_stack.sim_value_default(), + branch_target_buffer: branch_target_buffer.sim_value_default(), + branch_history: branch_history.sim_value_default(), + // use something other than the default so you can see the reset progress + branch_predictor: std::array::from_fn(|_| { + SimOnlyValue::new(BranchPredictionState::default().towards_not_taken()) + }), + } + } +} + +impl ResetSteps for NextPcState { + #[hdl] + async fn reset_step( + this: Expr, + sim: &mut ExternModuleSimulationState, + step: usize, + ) -> ResetStatus { + #[hdl] + let NextPcState { + speculative_call_stack, + non_speculative_call_stack, + branch_target_buffer, + branch_history, + branch_predictor, + } = this; + let speculative_call_stack = + ResetSteps::reset_step(speculative_call_stack, sim, step).await; + let non_speculative_call_stack = + ResetSteps::reset_step(non_speculative_call_stack, sim, step).await; + let branch_target_buffer = ResetSteps::reset_step(branch_target_buffer, sim, step).await; + let branch_history = ResetSteps::reset_step(branch_history, sim, step).await; + let branch_predictor = ResetSteps::reset_step(branch_predictor, sim, step).await; + speculative_call_stack + .and(non_speculative_call_stack) + .and(branch_target_buffer) + .and(branch_history) + .and(branch_predictor) + } +} + +#[hdl_module(extern)] +pub fn next_pc(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let to_fetch: NextPcToFetchInterface> = + m.output(NextPcToFetchInterface[config]); + #[hdl] + let state_for_debug: NextPcState = m.output(); + m.extern_module_simulation_fn( + (cd, to_fetch, state_for_debug), + |(cd, to_fetch, state_for_debug), mut sim| async move { + sim.write(state_for_debug, NextPcState.sim_value_default()) + .await; + sim.resettable( + cd, + |mut sim: ExternModuleSimulationState| async move { + sim.write(to_fetch.inner.data, HdlNone()).await; + }, + |mut sim: ExternModuleSimulationState, ()| async move { + for step in 0usize.. { + sim.wait_for_clock_edge(cd.clk).await; + match ResetSteps::reset_step(state_for_debug, &mut sim, step).await { + ResetStatus::Done => break, + ResetStatus::Working => {} + } + } + // TODO: finish + }, + ) + .await; + }, + ); +} diff --git a/crates/cpu/tests/expected/next_pc.vcd b/crates/cpu/tests/expected/next_pc.vcd new file mode 100644 index 0000000..e69de29 diff --git a/crates/cpu/tests/next_pc.rs b/crates/cpu/tests/next_pc.rs new file mode 100644 index 0000000..28c78e0 --- /dev/null +++ b/crates/cpu/tests/next_pc.rs @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use cpu::{ + config::{CpuConfig, UnitConfig}, + next_pc::next_pc, + unit::UnitKind, +}; +use fayalite::{prelude::*, sim::vcd::VcdWriterDecls, util::RcWriter}; +use std::num::NonZeroUsize; + +#[hdl] +#[test] +fn test_next_pc() { + let _n = SourceLocation::normalize_files_for_tests(); + let mut config = CpuConfig::new( + vec![ + UnitConfig::new(UnitKind::AluBranch), + UnitConfig::new(UnitKind::AluBranch), + ], + NonZeroUsize::new(20).unwrap(), + ); + config.fetch_width = NonZeroUsize::new(2).unwrap(); + let m = next_pc(PhantomConst::new_sized(config)); + let mut sim = Simulation::new(m); + let mut writer = RcWriter::default(); + sim.add_trace_writer(VcdWriterDecls::new(writer.clone())); + let to_fetch = sim.io().to_fetch; + sim.write_clock(sim.io().cd.clk, false); + sim.write_reset(sim.io().cd.rst, true); + sim.write_bool(to_fetch.inner.ready, true); + for _cycle in 0..300 { + sim.advance_time(SimDuration::from_nanos(500)); + sim.write_clock(sim.io().cd.clk, true); + sim.advance_time(SimDuration::from_nanos(500)); + sim.write_clock(sim.io().cd.clk, false); + sim.write_reset(sim.io().cd.rst, false); + } + // FIXME: vcd is just whatever next_pc does now, which isn't known to be correct + let vcd = String::from_utf8(writer.take()).unwrap(); + println!("####### VCD:\n{vcd}\n#######"); + if vcd != include_str!("expected/next_pc.vcd") { + panic!(); + } +}